--- /dev/null
+<?php
+/**
+ * Zend Framework
+ *
+ * LICENSE
+ *
+ * This source file is subject to the new BSD license that is bundled
+ * with this package in the file LICENSE.txt.
+ * It is also available through the world-wide-web at this URL:
+ * http://framework.zend.com/license/new-bsd
+ * If you did not receive a copy of the license and are unable to
+ * obtain it through the world-wide-web, please send an email
+ * to license@zend.com so we can send you a copy immediately.
+ *
+ * @category Zend
+ * @package Zend_Search_Lucene
+ * @subpackage Search
+ * @copyright Copyright (c) 2005-2007 Zend Technologies USA Inc. (http://www.zend.com)
+ * @license http://framework.zend.com/license/new-bsd New BSD License
+ */
+
+
+/** Zend_Search_Lucene_FSM */
+require_once $CFG->dirroot.'/search/Zend/Search/Lucene/FSM.php';
+
+/** Zend_Search_Lucene_Search_QueryToken */
+require_once $CFG->dirroot.'/search/Zend/Search/Lucene/Search/QueryToken.php';
+
+/** Zend_Search_Lucene_Search_QueryParser */
+require_once $CFG->dirroot.'/search/Zend/Search/Lucene/Search/QueryParser.php';
+
+
+/** Zend_Search_Lucene_Exception */
+require_once $CFG->dirroot.'/search/Zend/Search/Lucene/Exception.php';
+
+/**
+ * @category Zend
+ * @package Zend_Search_Lucene
+ * @subpackage Search
+ * @copyright Copyright (c) 2005-2007 Zend Technologies USA Inc. (http://www.zend.com)
+ * @license http://framework.zend.com/license/new-bsd New BSD License
+ */
+class Zend_Search_Lucene_Search_BooleanExpressionRecognizer extends Zend_Search_Lucene_FSM
+{
+ /** State Machine states */
+ const ST_START = 0;
+ const ST_LITERAL = 1;
+ const ST_NOT_OPERATOR = 2;
+ const ST_AND_OPERATOR = 3;
+ const ST_OR_OPERATOR = 4;
+
+ /** Input symbols */
+ const IN_LITERAL = 0;
+ const IN_NOT_OPERATOR = 1;
+ const IN_AND_OPERATOR = 2;
+ const IN_OR_OPERATOR = 3;
+
+
+ /**
+ * NOT operator signal
+ *
+ * @var boolean
+ */
+ private $_negativeLiteral = false;
+
+ /**
+ * Current literal
+ *
+ * @var mixed
+ */
+ private $_literal;
+
+
+ /**
+ * Set of boolean query conjunctions
+ *
+ * Each conjunction is an array of conjunction elements
+ * Each conjunction element is presented with two-elements array:
+ * array(<literal>, <is_negative>)
+ *
+ * So, it has a structure:
+ * array( array( array(<literal>, <is_negative>), // first literal of first conjuction
+ * array(<literal>, <is_negative>), // second literal of first conjuction
+ * ...
+ * array(<literal>, <is_negative>)
+ * ), // end of first conjuction
+ * array( array(<literal>, <is_negative>), // first literal of second conjuction
+ * array(<literal>, <is_negative>), // second literal of second conjuction
+ * ...
+ * array(<literal>, <is_negative>)
+ * ), // end of second conjuction
+ * ...
+ * ) // end of structure
+ *
+ * @var array
+ */
+ private $_conjunctions = array();
+
+ /**
+ * Current conjuction
+ *
+ * @var array
+ */
+ private $_currentConjunction = array();
+
+
+ /**
+ * Object constructor
+ */
+ public function __construct()
+ {
+ parent::__construct( array(self::ST_START,
+ self::ST_LITERAL,
+ self::ST_NOT_OPERATOR,
+ self::ST_AND_OPERATOR,
+ self::ST_OR_OPERATOR),
+ array(self::IN_LITERAL,
+ self::IN_NOT_OPERATOR,
+ self::IN_AND_OPERATOR,
+ self::IN_OR_OPERATOR));
+
+ $emptyOperatorAction = new Zend_Search_Lucene_FSMAction($this, 'emptyOperatorAction');
+ $emptyNotOperatorAction = new Zend_Search_Lucene_FSMAction($this, 'emptyNotOperatorAction');
+
+ $this->addRules(array( array(self::ST_START, self::IN_LITERAL, self::ST_LITERAL),
+ array(self::ST_START, self::IN_NOT_OPERATOR, self::ST_NOT_OPERATOR),
+
+ array(self::ST_LITERAL, self::IN_AND_OPERATOR, self::ST_AND_OPERATOR),
+ array(self::ST_LITERAL, self::IN_OR_OPERATOR, self::ST_OR_OPERATOR),
+ array(self::ST_LITERAL, self::IN_LITERAL, self::ST_LITERAL, $emptyOperatorAction),
+ array(self::ST_LITERAL, self::IN_NOT_OPERATOR, self::ST_NOT_OPERATOR, $emptyNotOperatorAction),
+
+ array(self::ST_NOT_OPERATOR, self::IN_LITERAL, self::ST_LITERAL),
+
+ array(self::ST_AND_OPERATOR, self::IN_LITERAL, self::ST_LITERAL),
+ array(self::ST_AND_OPERATOR, self::IN_NOT_OPERATOR, self::ST_NOT_OPERATOR),
+
+ array(self::ST_OR_OPERATOR, self::IN_LITERAL, self::ST_LITERAL),
+ array(self::ST_OR_OPERATOR, self::IN_NOT_OPERATOR, self::ST_NOT_OPERATOR),
+ ));
+
+ $notOperatorAction = new Zend_Search_Lucene_FSMAction($this, 'notOperatorAction');
+ $orOperatorAction = new Zend_Search_Lucene_FSMAction($this, 'orOperatorAction');
+ $literalAction = new Zend_Search_Lucene_FSMAction($this, 'literalAction');
+
+
+ $this->addEntryAction(self::ST_NOT_OPERATOR, $notOperatorAction);
+ $this->addEntryAction(self::ST_OR_OPERATOR, $orOperatorAction);
+ $this->addEntryAction(self::ST_LITERAL, $literalAction);
+ }
+
+
+ /**
+ * Process next operator.
+ *
+ * Operators are defined by class constants: IN_AND_OPERATOR, IN_OR_OPERATOR and IN_NOT_OPERATOR
+ *
+ * @param integer $operator
+ */
+ public function processOperator($operator)
+ {
+ $this->process($operator);
+ }
+
+ /**
+ * Process expression literal.
+ *
+ * @param integer $operator
+ */
+ public function processLiteral($literal)
+ {
+ $this->_literal = $literal;
+
+ $this->process(self::IN_LITERAL);
+ }
+
+ /**
+ * Finish an expression and return result
+ *
+ * Result is a set of boolean query conjunctions
+ *
+ * Each conjunction is an array of conjunction elements
+ * Each conjunction element is presented with two-elements array:
+ * array(<literal>, <is_negative>)
+ *
+ * So, it has a structure:
+ * array( array( array(<literal>, <is_negative>), // first literal of first conjuction
+ * array(<literal>, <is_negative>), // second literal of first conjuction
+ * ...
+ * array(<literal>, <is_negative>)
+ * ), // end of first conjuction
+ * array( array(<literal>, <is_negative>), // first literal of second conjuction
+ * array(<literal>, <is_negative>), // second literal of second conjuction
+ * ...
+ * array(<literal>, <is_negative>)
+ * ), // end of second conjuction
+ * ...
+ * ) // end of structure
+ *
+ * @return array
+ * @throws Zend_Search_Lucene_Exception
+ */
+ public function finishExpression()
+ {
+ if ($this->getState() != self::ST_LITERAL) {
+ throw new Zend_Search_Lucene_Exception('Literal expected.');
+ }
+
+ $this->_conjunctions[] = $this->_currentConjunction;
+
+ return $this->_conjunctions;
+ }
+
+
+
+ /*********************************************************************
+ * Actions implementation
+ *********************************************************************/
+
+ /**
+ * default (omitted) operator processing
+ */
+ public function emptyOperatorAction()
+ {
+ if (Zend_Search_Lucene_Search_QueryParser::getDefaultOperator() == Zend_Search_Lucene_Search_QueryParser::B_AND) {
+ // Do nothing
+ } else {
+ $this->orOperatorAction();
+ }
+
+ // Process literal
+ $this->literalAction();
+ }
+
+ /**
+ * default (omitted) + NOT operator processing
+ */
+ public function emptyNotOperatorAction()
+ {
+ if (Zend_Search_Lucene_Search_QueryParser::getDefaultOperator() == Zend_Search_Lucene_Search_QueryParser::B_AND) {
+ // Do nothing
+ } else {
+ $this->orOperatorAction();
+ }
+
+ // Process NOT operator
+ $this->notOperatorAction();
+ }
+
+
+ /**
+ * NOT operator processing
+ */
+ public function notOperatorAction()
+ {
+ $this->_negativeLiteral = true;
+ }
+
+ /**
+ * OR operator processing
+ * Close current conjunction
+ */
+ public function orOperatorAction()
+ {
+ $this->_conjunctions[] = $this->_currentConjunction;
+ $this->_currentConjunction = array();
+ }
+
+ /**
+ * Literal processing
+ */
+ public function literalAction()
+ {
+ // Add literal to the current conjunction
+ $this->_currentConjunction[] = array($this->_literal, !$this->_negativeLiteral);
+
+ // Switch off negative signal
+ $this->_negativeLiteral = false;
+ }
+}
* @category Zend
* @package Zend_Search_Lucene
* @subpackage Search
- * @copyright Copyright (c) 2006 Zend Technologies USA Inc. (http://www.zend.com)
+ * @copyright Copyright (c) 2005-2007 Zend Technologies USA Inc. (http://www.zend.com)
* @license http://framework.zend.com/license/new-bsd New BSD License
*/
+/** Zend_Search_Lucene_Document_Html */
+require_once $CFG->dirroot.'/search/Zend/Search/Lucene/Document/Html.php';
+
/**
* @category Zend
* @package Zend_Search_Lucene
* @subpackage Search
- * @copyright Copyright (c) 2006 Zend Technologies USA Inc. (http://www.zend.com)
+ * @copyright Copyright (c) 2005-2007 Zend Technologies USA Inc. (http://www.zend.com)
* @license http://framework.zend.com/license/new-bsd New BSD License
*/
abstract class Zend_Search_Lucene_Search_Query
*
* @var float
*/
- private $_boost = 1.0;
+ private $_boost = 1;
/**
* Query weight
*
* @var Zend_Search_Lucene_Search_Weight
*/
- protected $_weight;
+ protected $_weight = null;
+
+ /**
+ * Current highlight color
+ *
+ * @var integer
+ */
+ private $_currentColorIndex = 0;
+
+ /**
+ * List of colors for text highlighting
+ *
+ * @var array
+ */
+ private $_highlightColors = array('#66ffff', '#ff66ff', '#ffff66',
+ '#ff8888', '#88ff88', '#8888ff',
+ '#88dddd', '#dd88dd', '#dddd88',
+ '#aaddff', '#aaffdd', '#ddaaff', '#ddffaa', '#ffaadd', '#ffddaa');
/**
* Score specified document
*
* @param integer $docId
- * @param Zend_Search_Lucene $reader
+ * @param Zend_Search_Lucene_Interface $reader
* @return float
*/
- abstract public function score($docId, $reader);
+ abstract public function score($docId, Zend_Search_Lucene_Interface $reader);
+
+ /**
+ * Get document ids likely matching the query
+ *
+ * It's an array with document ids as keys (performance considerations)
+ *
+ * @return array
+ */
+ abstract public function matchedDocs();
+
+ /**
+ * Execute query in context of index reader
+ * It also initializes necessary internal structures
+ *
+ * Query specific implementation
+ *
+ * @param Zend_Search_Lucene_Interface $reader
+ */
+ abstract public function execute(Zend_Search_Lucene_Interface $reader);
/**
* Constructs an appropriate Weight implementation for this query.
*
- * @param Zend_Search_Lucene $reader
+ * @param Zend_Search_Lucene_Interface $reader
* @return Zend_Search_Lucene_Search_Weight
*/
- abstract protected function _createWeight($reader);
+ abstract public function createWeight(Zend_Search_Lucene_Interface $reader);
/**
- * Constructs an initializes a Weight for a query.
+ * Constructs an initializes a Weight for a _top-level_query_.
*
- * @param Zend_Search_Lucene $reader
+ * @param Zend_Search_Lucene_Interface $reader
*/
- protected function _initWeight($reader)
+ protected function _initWeight(Zend_Search_Lucene_Interface $reader)
{
- $this->_weight = $this->_createWeight($reader);
+ // Check, that it's a top-level query and query weight is not initialized yet.
+ if ($this->_weight !== null) {
+ return $this->_weight;
+ }
+
+ $this->createWeight($reader);
$sum = $this->_weight->sumOfSquaredWeights();
$queryNorm = $reader->getSimilarity()->queryNorm($sum);
$this->_weight->normalize($queryNorm);
}
-}
\ No newline at end of file
+ /**
+ * Re-write query into primitive queries in the context of specified index
+ *
+ * @param Zend_Search_Lucene_Interface $index
+ * @return Zend_Search_Lucene_Search_Query
+ */
+ abstract public function rewrite(Zend_Search_Lucene_Interface $index);
+
+ /**
+ * Optimize query in the context of specified index
+ *
+ * @param Zend_Search_Lucene_Interface $index
+ * @return Zend_Search_Lucene_Search_Query
+ */
+ abstract public function optimize(Zend_Search_Lucene_Interface $index);
+
+ /**
+ * Reset query, so it can be reused within other queries or
+ * with other indeces
+ */
+ public function reset()
+ {
+ $this->_weight = null;
+ }
+
+
+ /**
+ * Print a query
+ *
+ * @return string
+ */
+ abstract public function __toString();
+
+ /**
+ * Return query terms
+ *
+ * @return array
+ */
+ abstract public function getQueryTerms();
+
+ /**
+ * Get highlight color and shift to next
+ *
+ * @param integer &$colorIndex
+ * @return string
+ */
+ protected function _getHighlightColor(&$colorIndex)
+ {
+ $color = $this->_highlightColors[$colorIndex++];
+
+ $colorIndex %= count($this->_highlightColors);
+
+ return $color;
+ }
+
+ /**
+ * Highlight query terms
+ *
+ * @param integer &$colorIndex
+ * @param Zend_Search_Lucene_Document_Html $doc
+ */
+ abstract public function highlightMatchesDOM(Zend_Search_Lucene_Document_Html $doc, &$colorIndex);
+
+ /**
+ * Highlight matches in $inputHTML
+ *
+ * @param string $inputHTML
+ * @return string
+ */
+ public function highlightMatches($inputHTML)
+ {
+ $doc = Zend_Search_Lucene_Document_Html::loadHTML($inputHTML);
+
+ $colorIndex = 0;
+ $this->highlightMatchesDOM($doc, $colorIndex);
+
+ return $doc->getHTML();
+ }
+}
+
--- /dev/null
+<?php
+/**
+ * Zend Framework
+ *
+ * LICENSE
+ *
+ * This source file is subject to the new BSD license that is bundled
+ * with this package in the file LICENSE.txt.
+ * It is also available through the world-wide-web at this URL:
+ * http://framework.zend.com/license/new-bsd
+ * If you did not receive a copy of the license and are unable to
+ * obtain it through the world-wide-web, please send an email
+ * to license@zend.com so we can send you a copy immediately.
+ *
+ * @category Zend
+ * @package Zend_Search_Lucene
+ * @subpackage Search
+ * @copyright Copyright (c) 2005-2007 Zend Technologies USA Inc. (http://www.zend.com)
+ * @license http://framework.zend.com/license/new-bsd New BSD License
+ */
+
+
+/** Zend_Search_Lucene_Search_Query */
+require_once $CFG->dirroot.'/search/Zend/Search/Lucene/Search/Query.php';
+
+/** Zend_Search_Lucene_Search_Weight_Boolean */
+require_once $CFG->dirroot.'/search/Zend/Search/Lucene/Search/Weight/Boolean.php';
+
+
+/**
+ * @category Zend
+ * @package Zend_Search_Lucene
+ * @subpackage Search
+ * @copyright Copyright (c) 2005-2007 Zend Technologies USA Inc. (http://www.zend.com)
+ * @license http://framework.zend.com/license/new-bsd New BSD License
+ */
+class Zend_Search_Lucene_Search_Query_Boolean extends Zend_Search_Lucene_Search_Query
+{
+
+ /**
+ * Subqueries
+ * Array of Zend_Search_Lucene_Query
+ *
+ * @var array
+ */
+ private $_subqueries = array();
+
+ /**
+ * Subqueries signs.
+ * If true then subquery is required.
+ * If false then subquery is prohibited.
+ * If null then subquery is neither prohibited, nor required
+ *
+ * If array is null then all subqueries are required
+ *
+ * @var array
+ */
+ private $_signs = array();
+
+ /**
+ * Result vector.
+ *
+ * @var array
+ */
+ private $_resVector = null;
+
+ /**
+ * A score factor based on the fraction of all query subqueries
+ * that a document contains.
+ * float for conjunction queries
+ * array of float for non conjunction queries
+ *
+ * @var mixed
+ */
+ private $_coord = null;
+
+
+ /**
+ * Class constructor. Create a new Boolean query object.
+ *
+ * if $signs array is omitted then all subqueries are required
+ * it differs from addSubquery() behavior, but should never be used
+ *
+ * @param array $subqueries Array of Zend_Search_Search_Query objects
+ * @param array $signs Array of signs. Sign is boolean|null.
+ * @return void
+ */
+ public function __construct($subqueries = null, $signs = null)
+ {
+ if (is_array($subqueries)) {
+ $this->_subqueries = $subqueries;
+
+ $this->_signs = null;
+ // Check if all subqueries are required
+ if (is_array($signs)) {
+ foreach ($signs as $sign ) {
+ if ($sign !== true) {
+ $this->_signs = $signs;
+ break;
+ }
+ }
+ }
+ }
+ }
+
+
+ /**
+ * Add a $subquery (Zend_Search_Lucene_Query) to this query.
+ *
+ * The sign is specified as:
+ * TRUE - subquery is required
+ * FALSE - subquery is prohibited
+ * NULL - subquery is neither prohibited, nor required
+ *
+ * @param Zend_Search_Lucene_Search_Query $subquery
+ * @param boolean|null $sign
+ * @return void
+ */
+ public function addSubquery(Zend_Search_Lucene_Search_Query $subquery, $sign=null) {
+ if ($sign !== true || $this->_signs !== null) { // Skip, if all subqueries are required
+ if ($this->_signs === null) { // Check, If all previous subqueries are required
+ foreach ($this->_subqueries as $prevSubquery) {
+ $this->_signs[] = true;
+ }
+ }
+ $this->_signs[] = $sign;
+ }
+
+ $this->_subqueries[] = $subquery;
+ }
+
+ /**
+ * Re-write queries into primitive queries
+ *
+ * @param Zend_Search_Lucene_Interface $index
+ * @return Zend_Search_Lucene_Search_Query
+ */
+ public function rewrite(Zend_Search_Lucene_Interface $index)
+ {
+ $query = new Zend_Search_Lucene_Search_Query_Boolean();
+ $query->setBoost($this->getBoost());
+
+ foreach ($this->_subqueries as $subqueryId => $subquery) {
+ $query->addSubquery($subquery->rewrite($index),
+ ($this->_signs === null)? true : $this->_signs[$subqueryId]);
+ }
+
+ return $query;
+ }
+
+ /**
+ * Optimize query in the context of specified index
+ *
+ * @param Zend_Search_Lucene_Interface $index
+ * @return Zend_Search_Lucene_Search_Query
+ */
+ public function optimize(Zend_Search_Lucene_Interface $index)
+ {
+ $subqueries = array();
+ $signs = array();
+
+ // Optimize all subqueries
+ foreach ($this->_subqueries as $id => $subquery) {
+ $subqueries[] = $subquery->optimize($index);
+ $signs[] = ($this->_signs === null)? true : $this->_signs[$id];
+ }
+
+ // Check for empty subqueries
+ foreach ($subqueries as $id => $subquery) {
+ if ($subquery instanceof Zend_Search_Lucene_Search_Query_Empty) {
+ if ($signs[$id] === true) {
+ // Matching is required, but is actually empty
+ return new Zend_Search_Lucene_Search_Query_Empty();
+ } else {
+ // Matching is optional or prohibited, but is empty
+ // Remove it from subqueries and signs list
+ unset($subqueries[$id]);
+ unset($signs[$id]);
+ }
+ }
+ }
+
+
+ // Check if all non-empty subqueries are prohibited
+ $allProhibited = true;
+ foreach ($signs as $sign) {
+ if ($sign !== false) {
+ $allProhibited = false;
+ break;
+ }
+ }
+ if ($allProhibited) {
+ return new Zend_Search_Lucene_Search_Query_Empty();
+ }
+
+
+ // Check, if reduced subqueries list has only one entry
+ if (count($subqueries) == 1) {
+ // It's a query with only one required or optional clause
+ // (it's already checked, that it's not a prohibited clause)
+
+ if ($this->getBoost() == 1) {
+ return reset($subqueries);
+ }
+
+ $optimizedQuery = clone reset($subqueries);
+ $optimizedQuery->setBoost($optimizedQuery->getBoost()*$this->getBoost());
+
+ return $optimizedQuery;
+ }
+
+
+ // Check, if reduced subqueries list is empty
+ if (count($subqueries) == 0) {
+ return new Zend_Search_Lucene_Search_Query_Empty();
+ }
+
+
+ // Prepare first candidate for optimized query
+ $optimizedQuery = new Zend_Search_Lucene_Search_Query_Boolean($subqueries, $signs);
+ $optimizedQuery->setBoost($this->getBoost());
+
+
+ $terms = array();
+ $tsigns = array();
+ $boostFactors = array();
+
+ // Try to decompose term and multi-term subqueries
+ foreach ($subqueries as $id => $subquery) {
+ if ($subquery instanceof Zend_Search_Lucene_Search_Query_Term) {
+ $terms[] = $subquery->getTerm();
+ $tsigns[] = $signs[$id];
+ $boostFactors[] = $subquery->getBoost();
+
+ // remove subquery from a subqueries list
+ unset($subqueries[$id]);
+ unset($signs[$id]);
+ } else if ($subquery instanceof Zend_Search_Lucene_Search_Query_MultiTerm) {
+ $subTerms = $subquery->getTerms();
+ $subSigns = $subquery->getSigns();
+
+ if ($signs[$id] === true) {
+ // It's a required multi-term subquery.
+ // Something like '... +(+term1 -term2 term3 ...) ...'
+
+ // Multi-term required subquery can be decomposed only if it contains
+ // required terms and doesn't contain prohibited terms:
+ // ... +(+term1 term2 ...) ... => ... +term1 term2 ...
+ //
+ // Check this
+ $hasRequired = false;
+ $hasProhibited = false;
+ if ($subSigns === null) {
+ // All subterms are required
+ $hasRequired = true;
+ } else {
+ foreach ($subSigns as $sign) {
+ if ($sign === true) {
+ $hasRequired = true;
+ } else if ($sign === false) {
+ $hasProhibited = true;
+ break;
+ }
+ }
+ }
+ // Continue if subquery has prohibited terms or doesn't have required terms
+ if ($hasProhibited || !$hasRequired) {
+ continue;
+ }
+
+ foreach ($subTerms as $termId => $term) {
+ $terms[] = $term;
+ $tsigns[] = ($subSigns === null)? true : $subSigns[$termId];
+ $boostFactors[] = $subquery->getBoost();
+ }
+
+ // remove subquery from a subqueries list
+ unset($subqueries[$id]);
+ unset($signs[$id]);
+
+ } else { // $signs[$id] === null || $signs[$id] === false
+ // It's an optional or prohibited multi-term subquery.
+ // Something like '... (+term1 -term2 term3 ...) ...'
+ // or
+ // something like '... -(+term1 -term2 term3 ...) ...'
+
+ // Multi-term optional and required subqueries can be decomposed
+ // only if all terms are optional.
+ //
+ // Check if all terms are optional.
+ $onlyOptional = true;
+ if ($subSigns === null) {
+ // All subterms are required
+ $onlyOptional = false;
+ } else {
+ foreach ($subSigns as $sign) {
+ if ($sign !== null) {
+ $onlyOptional = false;
+ break;
+ }
+ }
+ }
+
+ // Continue if non-optional terms are presented in this multi-term subquery
+ if (!$onlyOptional) {
+ continue;
+ }
+
+ foreach ($subTerms as $termId => $term) {
+ $terms[] = $term;
+ $tsigns[] = ($signs[$id] === null)? null /* optional */ :
+ false /* prohibited */;
+ $boostFactors[] = $subquery->getBoost();
+ }
+
+ // remove subquery from a subqueries list
+ unset($subqueries[$id]);
+ unset($signs[$id]);
+ }
+ }
+ }
+
+
+ // Check, if there are no decomposed subqueries
+ if (count($terms) == 0 ) {
+ // return prepared candidate
+ return $optimizedQuery;
+ }
+
+
+ // Check, if all subqueries have been decomposed and all terms has the same boost factor
+ if (count($subqueries) == 0 && count(array_unique($boostFactors)) == 1) {
+ $optimizedQuery = new Zend_Search_Lucene_Search_Query_MultiTerm($terms, $tsigns);
+ $optimizedQuery->setBoost(reset($boostFactors)*$this->getBoost());
+
+ return $optimizedQuery;
+ }
+
+
+ // This boolean query can't be transformed to Term/MultiTerm query and still contains
+ // several subqueries
+
+ // Separate prohibited terms
+ $prohibitedTerms = array();
+ foreach ($terms as $id => $term) {
+ if ($tsigns[$id] === false) {
+ $prohibitedTerms[] = $term;
+
+ unset($terms[$id]);
+ unset($tsigns[$id]);
+ unset($boostFactors[$id]);
+ }
+ }
+
+ if (count($terms) == 1) {
+ $clause = new Zend_Search_Lucene_Search_Query_Term(reset($terms));
+ $clause->setBoost(reset($boostFactors));
+
+ $subqueries[] = $clause;
+ $signs[] = reset($tsigns);
+
+ // Clear terms list
+ $terms = array();
+ } else if (count($terms) > 1 && count(array_unique($boostFactors)) == 1) {
+ $clause = new Zend_Search_Lucene_Search_Query_MultiTerm($terms, $tsigns);
+ $clause->setBoost(reset($boostFactors));
+
+ $subqueries[] = $clause;
+ // Clause sign is 'required' if clause contains required terms. 'Optional' otherwise.
+ $signs[] = (in_array(true, $tsigns))? true : null;
+
+ // Clear terms list
+ $terms = array();
+ }
+
+ if (count($prohibitedTerms) == 1) {
+ // (boost factors are not significant for prohibited clauses)
+ $subqueries[] = new Zend_Search_Lucene_Search_Query_Term(reset($prohibitedTerms));
+ $signs[] = false;
+
+ // Clear prohibited terms list
+ $prohibitedTerms = array();
+ } else if (count($prohibitedTerms) > 1) {
+ // prepare signs array
+ $prohibitedSigns = array();
+ foreach ($prohibitedTerms as $id => $term) {
+ // all prohibited term are grouped as optional into multi-term query
+ $prohibitedSigns[$id] = null;
+ }
+
+ // (boost factors are not significant for prohibited clauses)
+ $subqueries[] = new Zend_Search_Lucene_Search_Query_MultiTerm($prohibitedTerms, $prohibitedSigns);
+ // Clause sign is 'prohibited'
+ $signs[] = false;
+
+ // Clear terms list
+ $prohibitedTerms = array();
+ }
+
+ /** @todo Group terms with the same boost factors together */
+
+ // Check, that all terms are processed
+ // Replace candidate for optimized query
+ if (count($terms) == 0 && count($prohibitedTerms) == 0) {
+ $optimizedQuery = new Zend_Search_Lucene_Search_Query_Boolean($subqueries, $signs);
+ $optimizedQuery->setBoost($this->getBoost());
+ }
+
+ return $optimizedQuery;
+ }
+
+ /**
+ * Returns subqueries
+ *
+ * @return array
+ */
+ public function getSubqueries()
+ {
+ return $this->_subqueries;
+ }
+
+
+ /**
+ * Return subqueries signs
+ *
+ * @return array
+ */
+ public function getSigns()
+ {
+ return $this->_signs;
+ }
+
+
+ /**
+ * Constructs an appropriate Weight implementation for this query.
+ *
+ * @param Zend_Search_Lucene_Interface $reader
+ * @return Zend_Search_Lucene_Search_Weight
+ */
+ public function createWeight(Zend_Search_Lucene_Interface $reader)
+ {
+ $this->_weight = new Zend_Search_Lucene_Search_Weight_Boolean($this, $reader);
+ return $this->_weight;
+ }
+
+
+ /**
+ * Calculate result vector for Conjunction query
+ * (like '<subquery1> AND <subquery2> AND <subquery3>')
+ */
+ private function _calculateConjunctionResult()
+ {
+ $this->_resVector = null;
+
+ if (count($this->_subqueries) == 0) {
+ $this->_resVector = array();
+ }
+
+ foreach ($this->_subqueries as $subquery) {
+ if($this->_resVector === null) {
+ $this->_resVector = $subquery->matchedDocs();
+ } else {
+ $this->_resVector = array_intersect_key($this->_resVector, $subquery->matchedDocs());
+ }
+
+ if (count($this->_resVector) == 0) {
+ // Empty result set, we don't need to check other terms
+ break;
+ }
+ }
+
+ ksort($this->_resVector, SORT_NUMERIC);
+ }
+
+
+ /**
+ * Calculate result vector for non Conjunction query
+ * (like '<subquery1> AND <subquery2> AND NOT <subquery3> OR <subquery4>')
+ */
+ private function _calculateNonConjunctionResult()
+ {
+ $required = null;
+ $optional = array();
+
+ foreach ($this->_subqueries as $subqueryId => $subquery) {
+ $docs = $subquery->matchedDocs();
+
+ if ($this->_signs[$subqueryId] === true) {
+ // required
+ if ($required !== null) {
+ // array intersection
+ $required = array_intersect_key($required, $docs);
+ } else {
+ $required = $docs;
+ }
+ } elseif ($this->_signs[$subqueryId] === false) {
+ // prohibited
+ // Do nothing. matchedDocs() may include non-matching id's
+ } else {
+ // neither required, nor prohibited
+ // array union
+ $optional += $docs;
+ }
+ }
+
+ if ($required !== null) {
+ $this->_resVector = &$required;
+ } else {
+ $this->_resVector = &$optional;
+ }
+
+ ksort($this->_resVector, SORT_NUMERIC);
+ }
+
+
+ /**
+ * Score calculator for conjunction queries (all subqueries are required)
+ *
+ * @param integer $docId
+ * @param Zend_Search_Lucene_Interface $reader
+ * @return float
+ */
+ public function _conjunctionScore($docId, Zend_Search_Lucene_Interface $reader)
+ {
+ if ($this->_coord === null) {
+ $this->_coord = $reader->getSimilarity()->coord(count($this->_subqueries),
+ count($this->_subqueries) );
+ }
+
+ $score = 0;
+
+ foreach ($this->_subqueries as $subquery) {
+ $subscore = $subquery->score($docId, $reader);
+
+ if ($subscore == 0) {
+ return 0;
+ }
+
+ $score += $subquery->score($docId, $reader) * $this->_coord;
+ }
+
+ return $score * $this->_coord * $this->getBoost();
+ }
+
+
+ /**
+ * Score calculator for non conjunction queries (not all subqueries are required)
+ *
+ * @param integer $docId
+ * @param Zend_Search_Lucene_Interface $reader
+ * @return float
+ */
+ public function _nonConjunctionScore($docId, Zend_Search_Lucene_Interface $reader)
+ {
+ if ($this->_coord === null) {
+ $this->_coord = array();
+
+ $maxCoord = 0;
+ foreach ($this->_signs as $sign) {
+ if ($sign !== false /* not prohibited */) {
+ $maxCoord++;
+ }
+ }
+
+ for ($count = 0; $count <= $maxCoord; $count++) {
+ $this->_coord[$count] = $reader->getSimilarity()->coord($count, $maxCoord);
+ }
+ }
+
+ $score = 0;
+ $matchedSubqueries = 0;
+ foreach ($this->_subqueries as $subqueryId => $subquery) {
+ $subscore = $subquery->score($docId, $reader);
+
+ // Prohibited
+ if ($this->_signs[$subqueryId] === false && $subscore != 0) {
+ return 0;
+ }
+
+ // is required, but doen't match
+ if ($this->_signs[$subqueryId] === true && $subscore == 0) {
+ return 0;
+ }
+
+ if ($subscore != 0) {
+ $matchedSubqueries++;
+ $score += $subscore;
+ }
+ }
+
+ return $score * $this->_coord[$matchedSubqueries] * $this->getBoost();
+ }
+
+ /**
+ * Execute query in context of index reader
+ * It also initializes necessary internal structures
+ *
+ * @param Zend_Search_Lucene_Interface $reader
+ */
+ public function execute(Zend_Search_Lucene_Interface $reader)
+ {
+ // Initialize weight if it's not done yet
+ $this->_initWeight($reader);
+
+ foreach ($this->_subqueries as $subquery) {
+ $subquery->execute($reader);
+ }
+
+ if ($this->_signs === null) {
+ $this->_calculateConjunctionResult();
+ } else {
+ $this->_calculateNonConjunctionResult();
+ }
+ }
+
+
+
+ /**
+ * Get document ids likely matching the query
+ *
+ * It's an array with document ids as keys (performance considerations)
+ *
+ * @return array
+ */
+ public function matchedDocs()
+ {
+ return $this->_resVector;
+ }
+
+ /**
+ * Score specified document
+ *
+ * @param integer $docId
+ * @param Zend_Search_Lucene_Interface $reader
+ * @return float
+ */
+ public function score($docId, Zend_Search_Lucene_Interface $reader)
+ {
+ if (isset($this->_resVector[$docId])) {
+ if ($this->_signs === null) {
+ return $this->_conjunctionScore($docId, $reader);
+ } else {
+ return $this->_nonConjunctionScore($docId, $reader);
+ }
+ } else {
+ return 0;
+ }
+ }
+
+ /**
+ * Return query terms
+ *
+ * @return array
+ */
+ public function getQueryTerms()
+ {
+ $terms = array();
+
+ foreach ($this->_subqueries as $id => $subquery) {
+ if ($this->_signs === null || $this->_signs[$id] !== false) {
+ $terms = array_merge($terms, $subquery->getQueryTerms());
+ }
+ }
+
+ return $terms;
+ }
+
+ /**
+ * Highlight query terms
+ *
+ * @param integer &$colorIndex
+ * @param Zend_Search_Lucene_Document_Html $doc
+ */
+ public function highlightMatchesDOM(Zend_Search_Lucene_Document_Html $doc, &$colorIndex)
+ {
+ foreach ($this->_subqueries as $id => $subquery) {
+ if ($this->_signs === null || $this->_signs[$id] !== false) {
+ $subquery->highlightMatchesDOM($doc, $colorIndex);
+ }
+ }
+ }
+
+ /**
+ * Print a query
+ *
+ * @return string
+ */
+ public function __toString()
+ {
+ // It's used only for query visualisation, so we don't care about characters escaping
+
+ $query = '';
+
+ foreach ($this->_subqueries as $id => $subquery) {
+ if ($id != 0) {
+ $query .= ' ';
+ }
+
+ if ($this->_signs === null || $this->_signs[$id] === true) {
+ $query .= '+';
+ } else if ($this->_signs[$id] === false) {
+ $query .= '-';
+ }
+
+ $query .= '(' . $subquery->__toString() . ')';
+
+ if ($subquery->getBoost() != 1) {
+ $query .= '^' . $subquery->getBoost();
+ }
+ }
+
+ return $query;
+ }
+}
+
--- /dev/null
+<?php
+/**
+ * Zend Framework
+ *
+ * LICENSE
+ *
+ * This source file is subject to the new BSD license that is bundled
+ * with this package in the file LICENSE.txt.
+ * It is also available through the world-wide-web at this URL:
+ * http://framework.zend.com/license/new-bsd
+ * If you did not receive a copy of the license and are unable to
+ * obtain it through the world-wide-web, please send an email
+ * to license@zend.com so we can send you a copy immediately.
+ *
+ * @category Zend
+ * @package Zend_Search_Lucene
+ * @subpackage Search
+ * @copyright Copyright (c) 2005-2007 Zend Technologies USA Inc. (http://www.zend.com)
+ * @license http://framework.zend.com/license/new-bsd New BSD License
+ */
+
+
+/** Zend_Search_Lucene_Search_Query */
+require_once $CFG->dirroot.'/search/Zend/Search/Lucene/Search/Query.php';
+
+/** Zend_Search_Lucene_Search_Weight_Empty */
+require_once $CFG->dirroot.'/search/Zend/Search/Lucene/Search/Weight/Empty.php';
+
+
+/**
+ * @category Zend
+ * @package Zend_Search_Lucene
+ * @subpackage Search
+ * @copyright Copyright (c) 2005-2007 Zend Technologies USA Inc. (http://www.zend.com)
+ * @license http://framework.zend.com/license/new-bsd New BSD License
+ */
+class Zend_Search_Lucene_Search_Query_Empty extends Zend_Search_Lucene_Search_Query
+{
+ /**
+ * Re-write query into primitive queries in the context of specified index
+ *
+ * @param Zend_Search_Lucene_Interface $index
+ * @return Zend_Search_Lucene_Search_Query
+ */
+ public function rewrite(Zend_Search_Lucene_Interface $index)
+ {
+ return $this;
+ }
+
+ /**
+ * Optimize query in the context of specified index
+ *
+ * @param Zend_Search_Lucene_Interface $index
+ * @return Zend_Search_Lucene_Search_Query
+ */
+ public function optimize(Zend_Search_Lucene_Interface $index)
+ {
+ // "Empty" query is a primitive query and don't need to be optimized
+ return $this;
+ }
+
+ /**
+ * Constructs an appropriate Weight implementation for this query.
+ *
+ * @param Zend_Search_Lucene_Interface $reader
+ * @return Zend_Search_Lucene_Search_Weight
+ */
+ public function createWeight(Zend_Search_Lucene_Interface $reader)
+ {
+ return new Zend_Search_Lucene_Search_Weight_Empty();
+ }
+
+ /**
+ * Execute query in context of index reader
+ * It also initializes necessary internal structures
+ *
+ * @param Zend_Search_Lucene_Interface $reader
+ */
+ public function execute(Zend_Search_Lucene_Interface $reader)
+ {
+ // Do nothing
+ }
+
+ /**
+ * Get document ids likely matching the query
+ *
+ * It's an array with document ids as keys (performance considerations)
+ *
+ * @return array
+ */
+ public function matchedDocs()
+ {
+ return array();
+ }
+
+ /**
+ * Score specified document
+ *
+ * @param integer $docId
+ * @param Zend_Search_Lucene_Interface $reader
+ * @return float
+ */
+ public function score($docId, Zend_Search_Lucene_Interface $reader)
+ {
+ return 0;
+ }
+
+ /**
+ * Return query terms
+ *
+ * @return array
+ */
+ public function getQueryTerms()
+ {
+ return array();
+ }
+
+ /**
+ * Highlight query terms
+ *
+ * @param integer &$colorIndex
+ * @param Zend_Search_Lucene_Document_Html $doc
+ */
+ public function highlightMatchesDOM(Zend_Search_Lucene_Document_Html $doc, &$colorIndex)
+ {
+ // Do nothing
+ }
+
+ /**
+ * Print a query
+ *
+ * @return string
+ */
+ public function __toString()
+ {
+ return '<EmptyQuery>';
+ }
+}
+
* @category Zend
* @package Zend_Search_Lucene
* @subpackage Search
- * @copyright Copyright (c) 2006 Zend Technologies USA Inc. (http://www.zend.com)
+ * @copyright Copyright (c) 2005-2007 Zend Technologies USA Inc. (http://www.zend.com)
* @license http://framework.zend.com/license/new-bsd New BSD License
*/
/** Zend_Search_Lucene_Search_Query */
-require_once 'Zend/Search/Lucene/Search/Query.php';
+require_once $CFG->dirroot.'/search/Zend/Search/Lucene/Search/Query.php';
/** Zend_Search_Lucene_Search_Weight_MultiTerm */
-require_once 'Zend/Search/Lucene/Search/Weight/MultiTerm.php';
+require_once $CFG->dirroot.'/search/Zend/Search/Lucene/Search/Weight/MultiTerm.php';
/**
* @category Zend
* @package Zend_Search_Lucene
* @subpackage Search
- * @copyright Copyright (c) 2006 Zend Technologies USA Inc. (http://www.zend.com)
+ * @copyright Copyright (c) 2005-2007 Zend Technologies USA Inc. (http://www.zend.com)
* @license http://framework.zend.com/license/new-bsd New BSD License
*/
class Zend_Search_Lucene_Search_Query_MultiTerm extends Zend_Search_Lucene_Search_Query
*
* @var array
*/
-
- private $_signs = array();
+ private $_signs;
/**
* Result vector.
- * Bitset or array of document IDs
- * (depending from Bitset extension availability).
*
- * @var mixed
+ * @var array
*/
private $_resVector = null;
/**
* Terms positions vectors.
* Array of Arrays:
- * term1Id => (docId => array( pos1, pos2, ... ), ...)
- * term2Id => (docId => array( pos1, pos2, ... ), ...)
+ * term1Id => (docId => freq, ...)
+ * term2Id => (docId => freq, ...)
*
* @var array
*/
- private $_termsPositions = array();
+ private $_termsFreqs = array();
/**
/**
* Class constructor. Create a new multi-term query object.
*
+ * if $signs array is omitted then all terms are required
+ * it differs from addTerm() behavior, but should never be used
+ *
* @param array $terms Array of Zend_Search_Lucene_Index_Term objects
* @param array $signs Array of signs. Sign is boolean|null.
* @return void
*/
public function __construct($terms = null, $signs = null)
{
- /**
- * @todo Check contents of $terms and $signs before adding them.
- */
if (is_array($terms)) {
$this->_terms = $terms;
foreach ($signs as $sign ) {
if ($sign !== true) {
$this->_signs = $signs;
- continue;
+ break;
}
}
}
* @param boolean|null $sign
* @return void
*/
- public function addTerm(Zend_Search_Lucene_Index_Term $term, $sign=null) {
+ public function addTerm(Zend_Search_Lucene_Index_Term $term, $sign = null) {
+ if ($sign !== true || $this->_signs !== null) { // Skip, if all terms are required
+ if ($this->_signs === null) { // Check, If all previous terms are required
+ foreach ($this->_terms as $prevTerm) {
+ $this->_signs[] = true;
+ }
+ }
+ $this->_signs[] = $sign;
+ }
+
$this->_terms[] = $term;
+ }
- /**
- * @todo This is not good. Sometimes $this->_signs is an array, sometimes
- * it is null, even when there are terms. It will be changed so that
- * it is always an array.
- */
- if ($this->_signs === null) {
- if ($sign !== null) {
- $this->_signs = array();
- foreach ($this->_terms as $term) {
- $this->_signs[] = null;
+
+ /**
+ * Re-write query into primitive queries in the context of specified index
+ *
+ * @param Zend_Search_Lucene_Interface $index
+ * @return Zend_Search_Lucene_Search_Query
+ */
+ public function rewrite(Zend_Search_Lucene_Interface $index)
+ {
+ if (count($this->_terms) == 0) {
+ return new Zend_Search_Lucene_Search_Query_Empty();
+ }
+
+ // Check, that all fields are qualified
+ $allQualified = true;
+ foreach ($this->_terms as $term) {
+ if ($term->field === null) {
+ $allQualified = false;
+ break;
+ }
+ }
+
+ if ($allQualified) {
+ return $this;
+ } else {
+ /** transform multiterm query to boolean and apply rewrite() method to subqueries. */
+ $query = new Zend_Search_Lucene_Search_Query_Boolean();
+ $query->setBoost($this->getBoost());
+
+ foreach ($this->_terms as $termId => $term) {
+ $subquery = new Zend_Search_Lucene_Search_Query_Term($term);
+
+ $query->addSubquery($subquery->rewrite($index),
+ ($this->_signs === null)? true : $this->_signs[$termId]);
+ }
+
+ return $query;
+ }
+ }
+
+ /**
+ * Optimize query in the context of specified index
+ *
+ * @param Zend_Search_Lucene_Interface $index
+ * @return Zend_Search_Lucene_Search_Query
+ */
+ public function optimize(Zend_Search_Lucene_Interface $index)
+ {
+ $terms = $this->_terms;
+ $signs = $this->_signs;
+
+ foreach ($terms as $id => $term) {
+ if (!$index->hasTerm($term)) {
+ if ($signs === null || $signs[$id] === true) {
+ // Term is required
+ return new Zend_Search_Lucene_Search_Query_Empty();
+ } else {
+ // Term is optional or prohibited
+ // Remove it from terms and signs list
+ unset($terms[$id]);
+ unset($signs[$id]);
}
- $this->_signs[] = $sign;
}
+ }
+
+ // Check if all presented terms are prohibited
+ $allProhibited = true;
+ if ($signs === null) {
+ $allProhibited = false;
} else {
- $this->_signs[] = $sign;
+ foreach ($signs as $sign) {
+ if ($sign !== false) {
+ $allProhibited = false;
+ break;
+ }
+ }
+ }
+ if ($allProhibited) {
+ return new Zend_Search_Lucene_Search_Query_Empty();
+ }
+
+ /**
+ * @todo make an optimization for repeated terms
+ * (they may have different signs)
+ */
+
+ if (count($terms) == 1) {
+ // It's already checked, that it's not a prohibited term
+
+ // It's one term query with one required or optional element
+ $optimizedQuery = new Zend_Search_Lucene_Search_Query_Term(reset($terms));
+ $optimizedQuery->setBoost($this->getBoost());
+
+ return $optimizedQuery;
}
+
+ if (count($terms) == 0) {
+ return new Zend_Search_Lucene_Search_Query_Empty();
+ }
+
+ $optimizedQuery = new Zend_Search_Lucene_Search_Query_MultiTerm($terms, $signs);
+ $optimizedQuery->setBoost($this->getBoost());
+ return $optimizedQuery;
}
/**
* Constructs an appropriate Weight implementation for this query.
*
- * @param Zend_Search_Lucene $reader
+ * @param Zend_Search_Lucene_Interface $reader
* @return Zend_Search_Lucene_Search_Weight
*/
- protected function _createWeight($reader)
+ public function createWeight(Zend_Search_Lucene_Interface $reader)
{
- return new Zend_Search_Lucene_Search_Weight_MultiTerm($this, $reader);
+ $this->_weight = new Zend_Search_Lucene_Search_Weight_MultiTerm($this, $reader);
+ return $this->_weight;
}
* Calculate result vector for Conjunction query
* (like '+something +another')
*
- * @param Zend_Search_Lucene $reader
+ * @param Zend_Search_Lucene_Interface $reader
*/
- private function _calculateConjunctionResult($reader)
+ private function _calculateConjunctionResult(Zend_Search_Lucene_Interface $reader)
{
- if (extension_loaded('bitset')) {
- foreach( $this->_terms as $termId=>$term ) {
- if($this->_resVector === null) {
- $this->_resVector = bitset_from_array($reader->termDocs($term));
- } else {
- $this->_resVector = bitset_intersection(
- $this->_resVector,
- bitset_from_array($reader->termDocs($term)) );
- }
+ $this->_resVector = null;
- $this->_termsPositions[$termId] = $reader->termPositions($term);
+ if (count($this->_terms) == 0) {
+ $this->_resVector = array();
+ }
+
+ foreach( $this->_terms as $termId=>$term ) {
+ if($this->_resVector === null) {
+ $this->_resVector = array_flip($reader->termDocs($term));
+ } else {
+ $this->_resVector = array_intersect_key($this->_resVector, array_flip($reader->termDocs($term)));
}
- } else {
- foreach( $this->_terms as $termId=>$term ) {
- if($this->_resVector === null) {
- $this->_resVector = array_flip($reader->termDocs($term));
- } else {
- $termDocs = array_flip($reader->termDocs($term));
- foreach($this->_resVector as $key=>$value) {
- if (!isset( $termDocs[$key] )) {
- unset( $this->_resVector[$key] );
- }
- }
- }
- $this->_termsPositions[$termId] = $reader->termPositions($term);
+ if (count($this->_resVector) == 0) {
+ // Empty result set, we don't need to check other terms
+ break;
}
+
+ $this->_termsFreqs[$termId] = $reader->termFreqs($term);
}
+
+ ksort($this->_resVector, SORT_NUMERIC);
}
* Calculate result vector for non Conjunction query
* (like '+something -another')
*
- * @param Zend_Search_Lucene $reader
+ * @param Zend_Search_Lucene_Interface $reader
*/
- private function _calculateNonConjunctionResult($reader)
+ private function _calculateNonConjunctionResult(Zend_Search_Lucene_Interface $reader)
{
- if (extension_loaded('bitset')) {
- $required = null;
- $neither = bitset_empty();
- $prohibited = bitset_empty();
-
- foreach ($this->_terms as $termId => $term) {
- $termDocs = bitset_from_array($reader->termDocs($term));
-
- if ($this->_signs[$termId] === true) {
- // required
- if ($required !== null) {
- $required = bitset_intersection($required, $termDocs);
- } else {
- $required = $termDocs;
- }
- } elseif ($this->_signs[$termId] === false) {
- // prohibited
- $prohibited = bitset_union($prohibited, $termDocs);
+ $required = null;
+ $optional = array();
+ $prohibited = array();
+
+ foreach ($this->_terms as $termId => $term) {
+ $termDocs = array_flip($reader->termDocs($term));
+
+ if ($this->_signs[$termId] === true) {
+ // required
+ if ($required !== null) {
+ // array intersection
+ $required = array_intersect_key($required, $termDocs);
} else {
- // neither required, nor prohibited
- $neither = bitset_union($neither, $termDocs);
+ $required = $termDocs;
}
-
- $this->_termsPositions[$termId] = $reader->termPositions($term);
- }
-
- if ($required === null) {
- $required = $neither;
- }
- $this->_resVector = bitset_intersection( $required,
- bitset_invert($prohibited, $reader->count()) );
- } else {
- $required = null;
- $neither = array();
- $prohibited = array();
-
- foreach ($this->_terms as $termId => $term) {
- $termDocs = array_flip($reader->termDocs($term));
-
- if ($this->_signs[$termId] === true) {
- // required
- if ($required !== null) {
- // substitute for bitset_intersection
- foreach ($required as $key => $value) {
- if (!isset( $termDocs[$key] )) {
- unset($required[$key]);
- }
- }
- } else {
- $required = $termDocs;
- }
- } elseif ($this->_signs[$termId] === false) {
- // prohibited
- // substitute for bitset_union
- foreach ($termDocs as $key => $value) {
- $prohibited[$key] = $value;
- }
- } else {
- // neither required, nor prohibited
- // substitute for bitset_union
- foreach ($termDocs as $key => $value) {
- $neither[$key] = $value;
- }
- }
-
- $this->_termsPositions[$termId] = $reader->termPositions($term);
+ } elseif ($this->_signs[$termId] === false) {
+ // prohibited
+ // array union
+ $prohibited += $termDocs;
+ } else {
+ // neither required, nor prohibited
+ // array union
+ $optional += $termDocs;
}
- if ($required === null) {
- $required = $neither;
- }
+ $this->_termsFreqs[$termId] = $reader->termFreqs($term);
+ }
- foreach ($required as $key=>$value) {
- if (isset( $prohibited[$key] )) {
- unset($required[$key]);
- }
- }
- $this->_resVector = $required;
+ if ($required !== null) {
+ $this->_resVector = (count($prohibited) > 0) ?
+ array_diff_key($required, $prohibited) :
+ $required;
+ } else {
+ $this->_resVector = (count($prohibited) > 0) ?
+ array_diff_key($optional, $prohibited) :
+ $optional;
}
+
+ ksort($this->_resVector, SORT_NUMERIC);
}
* Score calculator for conjunction queries (all terms are required)
*
* @param integer $docId
- * @param Zend_Search_Lucene $reader
+ * @param Zend_Search_Lucene_Interface $reader
* @return float
*/
- public function _conjunctionScore($docId, $reader)
+ public function _conjunctionScore($docId, Zend_Search_Lucene_Interface $reader)
{
if ($this->_coord === null) {
$this->_coord = $reader->getSimilarity()->coord(count($this->_terms),
$score = 0.0;
foreach ($this->_terms as $termId=>$term) {
- $score += $reader->getSimilarity()->tf(count($this->_termsPositions[$termId][$docId]) ) *
+ /**
+ * We don't need to check that term freq is not 0
+ * Score calculation is performed only for matched docs
+ */
+ $score += $reader->getSimilarity()->tf($this->_termsFreqs[$termId][$docId]) *
$this->_weights[$termId]->getValue() *
$reader->norm($docId, $term->field);
}
- return $score * $this->_coord;
+ return $score * $this->_coord * $this->getBoost();
}
* Score calculator for non conjunction queries (not all terms are required)
*
* @param integer $docId
- * @param Zend_Search_Lucene $reader
+ * @param Zend_Search_Lucene_Interface $reader
* @return float
*/
public function _nonConjunctionScore($docId, $reader)
$matchedTerms = 0;
foreach ($this->_terms as $termId=>$term) {
// Check if term is
- if ($this->_signs[$termId] !== false && // not prohibited
- isset($this->_termsPositions[$termId][$docId]) // matched
+ if ($this->_signs[$termId] !== false && // not prohibited
+ isset($this->_termsFreqs[$termId][$docId]) // matched
) {
$matchedTerms++;
+
+ /**
+ * We don't need to check that term freq is not 0
+ * Score calculation is performed only for matched docs
+ */
$score +=
- $reader->getSimilarity()->tf(count($this->_termsPositions[$termId][$docId]) ) *
+ $reader->getSimilarity()->tf($this->_termsFreqs[$termId][$docId]) *
$this->_weights[$termId]->getValue() *
$reader->norm($docId, $term->field);
}
}
- return $score * $this->_coord[$matchedTerms];
+ return $score * $this->_coord[$matchedTerms] * $this->getBoost();
+ }
+
+ /**
+ * Execute query in context of index reader
+ * It also initializes necessary internal structures
+ *
+ * @param Zend_Search_Lucene_Interface $reader
+ */
+ public function execute(Zend_Search_Lucene_Interface $reader)
+ {
+ if ($this->_signs === null) {
+ $this->_calculateConjunctionResult($reader);
+ } else {
+ $this->_calculateNonConjunctionResult($reader);
+ }
+
+ // Initialize weight if it's not done yet
+ $this->_initWeight($reader);
+ }
+
+ /**
+ * Get document ids likely matching the query
+ *
+ * It's an array with document ids as keys (performance considerations)
+ *
+ * @return array
+ */
+ public function matchedDocs()
+ {
+ return $this->_resVector;
}
/**
* Score specified document
*
* @param integer $docId
- * @param Zend_Search_Lucene $reader
+ * @param Zend_Search_Lucene_Interface $reader
* @return float
*/
- public function score($docId, $reader)
+ public function score($docId, Zend_Search_Lucene_Interface $reader)
{
- if($this->_resVector === null) {
+ if (isset($this->_resVector[$docId])) {
if ($this->_signs === null) {
- $this->_calculateConjunctionResult($reader);
+ return $this->_conjunctionScore($docId, $reader);
} else {
- $this->_calculateNonConjunctionResult($reader);
+ return $this->_nonConjunctionScore($docId, $reader);
}
+ } else {
+ return 0;
+ }
+ }
+
+ /**
+ * Return query terms
+ *
+ * @return array
+ */
+ public function getQueryTerms()
+ {
+ if ($this->_signs === null) {
+ return $this->_terms;
+ }
+
+ $terms = array();
- $this->_initWeight($reader);
+ foreach ($this->_signs as $id => $sign) {
+ if ($sign !== false) {
+ $terms[] = $this->_terms[$id];
+ }
}
- if ( (extension_loaded('bitset')) ?
- bitset_in($this->_resVector, $docId) :
- isset($this->_resVector[$docId]) ) {
- if ($this->_signs === null) {
- return $this->_conjunctionScore($docId, $reader);
- } else {
- return $this->_nonConjunctionScore($docId, $reader);
+ return $terms;
+ }
+
+ /**
+ * Highlight query terms
+ *
+ * @param integer &$colorIndex
+ * @param Zend_Search_Lucene_Document_Html $doc
+ */
+ public function highlightMatchesDOM(Zend_Search_Lucene_Document_Html $doc, &$colorIndex)
+ {
+ $words = array();
+
+ if ($this->_signs === null) {
+ foreach ($this->_terms as $term) {
+ $words[] = $term->text;
}
} else {
- return 0;
+ foreach ($this->_signs as $id => $sign) {
+ if ($sign !== false) {
+ $words[] = $this->_terms[$id]->text;
+ }
+ }
}
+
+ $doc->highlight($words, $this->_getHighlightColor($colorIndex));
+ }
+
+ /**
+ * Print a query
+ *
+ * @return string
+ */
+ public function __toString()
+ {
+ // It's used only for query visualisation, so we don't care about characters escaping
+
+ $query = '';
+
+ foreach ($this->_terms as $id => $term) {
+ if ($id != 0) {
+ $query .= ' ';
+ }
+
+ if ($this->_signs === null || $this->_signs[$id] === true) {
+ $query .= '+';
+ } else if ($this->_signs[$id] === false) {
+ $query .= '-';
+ }
+
+ if ($term->field !== null) {
+ $query .= $term->field . ':';
+ }
+ $query .= $term->text;
+ }
+
+ if ($this->getBoost() != 1) {
+ $query = '(' . $query . ')^' . $this->getBoost();
+ }
+
+ return $query;
}
}
* @category Zend
* @package Zend_Search_Lucene
* @subpackage Search
- * @copyright Copyright (c) 2006 Zend Technologies USA Inc. (http://www.zend.com)
+ * @copyright Copyright (c) 2005-2007 Zend Technologies USA Inc. (http://www.zend.com)
* @license http://framework.zend.com/license/new-bsd New BSD License
*/
/**
* Zend_Search_Lucene_Search_Query
*/
-require_once 'Zend/Search/Lucene/Search/Query.php';
+require_once $CFG->dirroot.'/search/Zend/Search/Lucene/Search/Query.php';
/**
* Zend_Search_Lucene_Search_Weight_MultiTerm
*/
-require_once 'Zend/Search/Lucene/Search/Weight/Phrase.php';
+require_once $CFG->dirroot.'/search/Zend/Search/Lucene/Search/Weight/Phrase.php';
/**
* @category Zend
* @package Zend_Search_Lucene
* @subpackage Search
- * @copyright Copyright (c) 2006 Zend Technologies USA Inc. (http://www.zend.com)
+ * @copyright Copyright (c) 2005-2007 Zend Technologies USA Inc. (http://www.zend.com)
* @license http://framework.zend.com/license/new-bsd New BSD License
*/
class Zend_Search_Lucene_Search_Query_Phrase extends Zend_Search_Lucene_Search_Query
*
* The slop is zero by default, requiring exact matches.
*
- * @var unknown_type
+ * @var integer
*/
private $_slop;
/**
* Result vector.
- * Bitset or array of document IDs
- * (depending from Bitset extension availability).
*
- * @var mixed
+ * @var array
*/
private $_resVector = null;
}
+ /**
+ * Re-write query into primitive queries in the context of specified index
+ *
+ * @param Zend_Search_Lucene_Interface $index
+ * @return Zend_Search_Lucene_Search_Query
+ */
+ public function rewrite(Zend_Search_Lucene_Interface $index)
+ {
+ if (count($this->_terms) == 0) {
+ return new Zend_Search_Lucene_Search_Query_Empty();
+ } else if ($this->_terms[0]->field !== null) {
+ return $this;
+ } else {
+ $query = new Zend_Search_Lucene_Search_Query_Boolean();
+ $query->setBoost($this->getBoost());
+
+ foreach ($index->getFieldNames(true) as $fieldName) {
+ $subquery = new Zend_Search_Lucene_Search_Query_Phrase();
+ $subquery->setSlop($this->getSlop());
+
+ foreach ($this->_terms as $termId => $term) {
+ $qualifiedTerm = new Zend_Search_Lucene_Index_Term($term->text, $fieldName);
+
+ $subquery->addTerm($qualifiedTerm, $this->_offsets[$termId]);
+ }
+
+ $query->addSubquery($subquery);
+ }
+
+ return $query;
+ }
+ }
+
+ /**
+ * Optimize query in the context of specified index
+ *
+ * @param Zend_Search_Lucene_Interface $index
+ * @return Zend_Search_Lucene_Search_Query
+ */
+ public function optimize(Zend_Search_Lucene_Interface $index)
+ {
+ // Check, that index contains all phrase terms
+ foreach ($this->_terms as $term) {
+ if (!$index->hasTerm($term)) {
+ return new Zend_Search_Lucene_Search_Query_Empty();
+ }
+ }
+
+ if (count($this->_terms) == 1) {
+ // It's one term query
+ $optimizedQuery = new Zend_Search_Lucene_Search_Query_Term(reset($this->_terms));
+ $optimizedQuery->setBoost($this->getBoost());
+
+ return $optimizedQuery;
+ }
+
+ if (count($this->_terms) == 0) {
+ return new Zend_Search_Lucene_Search_Query_Empty();
+ }
+
+
+ return $this;
+ }
+
/**
* Returns query term
*
/**
* Constructs an appropriate Weight implementation for this query.
*
- * @param Zend_Search_Lucene $reader
+ * @param Zend_Search_Lucene_Interface $reader
* @return Zend_Search_Lucene_Search_Weight
*/
- protected function _createWeight($reader)
+ public function createWeight(Zend_Search_Lucene_Interface $reader)
{
- return new Zend_Search_Lucene_Search_Weight_Phrase($this, $reader);
- }
-
-
- /**
- * Calculate result vector
- *
- * @param Zend_Search_Lucene $reader
- */
- private function _calculateResult($reader)
- {
- if (extension_loaded('bitset')) {
- foreach( $this->_terms as $termId=>$term ) {
- if($this->_resVector === null) {
- $this->_resVector = bitset_from_array($reader->termDocs($term));
- } else {
- $this->_resVector = bitset_intersection(
- $this->_resVector,
- bitset_from_array($reader->termDocs($term)) );
- }
-
- $this->_termsPositions[$termId] = $reader->termPositions($term);
- }
- } else {
- foreach( $this->_terms as $termId=>$term ) {
- if($this->_resVector === null) {
- $this->_resVector = array_flip($reader->termDocs($term));
- } else {
- $termDocs = array_flip($reader->termDocs($term));
- foreach($this->_resVector as $key=>$value) {
- if (!isset( $termDocs[$key] )) {
- unset( $this->_resVector[$key] );
- }
- }
- }
-
- $this->_termsPositions[$termId] = $reader->termPositions($term);
- }
- }
+ $this->_weight = new Zend_Search_Lucene_Search_Weight_Phrase($this, $reader);
+ return $this->_weight;
}
* Score calculator for sloppy phrase queries (terms sequence is fixed)
*
* @param integer $docId
- * @param Zend_Search_Lucene $reader
+ * @param Zend_Search_Lucene_Interface $reader
* @return float
*/
- public function _sloppyPhraseFreq($docId, Zend_Search_Lucene $reader)
+ public function _sloppyPhraseFreq($docId, Zend_Search_Lucene_Interface $reader)
{
$freq = 0;
return $freq;
}
-
/**
- * Score specified document
+ * Execute query in context of index reader
+ * It also initializes necessary internal structures
*
- * @param integer $docId
- * @param Zend_Search_Lucene $reader
- * @return float
+ * @param Zend_Search_Lucene_Interface $reader
*/
- public function score($docId, $reader)
+ public function execute(Zend_Search_Lucene_Interface $reader)
{
- // optimize zero-term case
+ $this->_resVector = null;
+
if (count($this->_terms) == 0) {
- return 0;
+ $this->_resVector = array();
}
- if($this->_resVector === null) {
- $this->_calculateResult($reader);
- $this->_initWeight($reader);
+ foreach( $this->_terms as $termId=>$term ) {
+ if($this->_resVector === null) {
+ $this->_resVector = array_flip($reader->termDocs($term));
+ } else {
+ $this->_resVector = array_intersect_key($this->_resVector, array_flip($reader->termDocs($term)));
+ }
+
+ if (count($this->_resVector) == 0) {
+ // Empty result set, we don't need to check other terms
+ break;
+ }
+
+ $this->_termsPositions[$termId] = $reader->termPositions($term);
}
- if ( (extension_loaded('bitset')) ?
- bitset_in($this->_resVector, $docId) :
- isset($this->_resVector[$docId]) ) {
+ ksort($this->_resVector, SORT_NUMERIC);
+
+ // Initialize weight if it's not done yet
+ $this->_initWeight($reader);
+ }
+
+ /**
+ * Get document ids likely matching the query
+ *
+ * It's an array with document ids as keys (performance considerations)
+ *
+ * @return array
+ */
+ public function matchedDocs()
+ {
+ return $this->_resVector;
+ }
+
+ /**
+ * Score specified document
+ *
+ * @param integer $docId
+ * @param Zend_Search_Lucene_Interface $reader
+ * @return float
+ */
+ public function score($docId, Zend_Search_Lucene_Interface $reader)
+ {
+ if (isset($this->_resVector[$docId])) {
if ($this->_slop == 0) {
$freq = $this->_exactPhraseFreq($docId);
} else {
$freq = $this->_sloppyPhraseFreq($docId, $reader);
}
-/*
- return $reader->getSimilarity()->tf($freq) *
- $this->_weight->getValue() *
- $reader->norm($docId, reset($this->_terms)->field);
-*/
if ($freq != 0) {
$tf = $reader->getSimilarity()->tf($freq);
$weight = $this->_weight->getValue();
$norm = $reader->norm($docId, reset($this->_terms)->field);
- return $tf*$weight*$norm;
+ return $tf * $weight * $norm * $this->getBoost();
}
+
+ // Included in result, but culculated freq is zero
+ return 0;
} else {
return 0;
}
}
+
+ /**
+ * Return query terms
+ *
+ * @return array
+ */
+ public function getQueryTerms()
+ {
+ return $this->_terms;
+ }
+
+ /**
+ * Highlight query terms
+ *
+ * @param integer &$colorIndex
+ * @param Zend_Search_Lucene_Document_Html $doc
+ */
+ public function highlightMatchesDOM(Zend_Search_Lucene_Document_Html $doc, &$colorIndex)
+ {
+ $words = array();
+ foreach ($this->_terms as $term) {
+ $words[] = $term->text;
+ }
+
+ $doc->highlight($words, $this->_getHighlightColor($colorIndex));
+ }
+
+ /**
+ * Print a query
+ *
+ * @return string
+ */
+ public function __toString()
+ {
+ // It's used only for query visualisation, so we don't care about characters escaping
+
+ $query = '';
+
+ if (isset($this->_terms[0]) && $this->_terms[0]->field !== null) {
+ $query .= $this->_terms[0]->field . ':';
+ }
+
+ $query .= '"';
+
+ foreach ($this->_terms as $id => $term) {
+ if ($id != 0) {
+ $query .= ' ';
+ }
+ $query .= $term->text;
+ }
+
+ $query .= '"';
+
+ if ($this->_slop != 0) {
+ $query .= '~' . $this->_slop;
+ }
+
+ return $query;
+ }
}
* @category Zend
* @package Zend_Search_Lucene
* @subpackage Search
- * @copyright Copyright (c) 2006 Zend Technologies USA Inc. (http://www.zend.com)
+ * @copyright Copyright (c) 2005-2007 Zend Technologies USA Inc. (http://www.zend.com)
* @license http://framework.zend.com/license/new-bsd New BSD License
*/
/** Zend_Search_Lucene_Search_Query */
-require_once 'Zend/Search/Lucene/Search/Query.php';
+require_once $CFG->dirroot.'/search/Zend/Search/Lucene/Search/Query.php';
/** Zend_Search_Lucene_Search_Weight_Term */
-require_once 'Zend/Search/Lucene/Search/Weight/Term.php';
+require_once $CFG->dirroot.'/search/Zend/Search/Lucene/Search/Weight/Term.php';
/**
* @category Zend
* @package Zend_Search_Lucene
* @subpackage Search
- * @copyright Copyright (c) 2006 Zend Technologies USA Inc. (http://www.zend.com)
+ * @copyright Copyright (c) 2005-2007 Zend Technologies USA Inc. (http://www.zend.com)
* @license http://framework.zend.com/license/new-bsd New BSD License
*/
class Zend_Search_Lucene_Search_Query_Term extends Zend_Search_Lucene_Search_Query
*/
private $_term;
- /**
- * Term sign.
- * If true then term is required
- * If false then term is prohibited.
- *
- * @var bool
- */
- private $_sign;
-
/**
* Documents vector.
- * Bitset or array of document IDs
- * (depending from Bitset extension availability).
*
- * @var mixed
+ * @var array
*/
private $_docVector = null;
/**
- * Term positions vector.
- * Array: docId => array( pos1, pos2, ... )
+ * Term freqs vector.
+ * array(docId => freq, ...)
*
* @var array
*/
- private $_termPositions;
+ private $_termFreqs;
/**
* @param Zend_Search_Lucene_Index_Term $term
* @param boolean $sign
*/
- public function __construct( $term, $sign = true )
+ public function __construct($term)
{
$this->_term = $term;
- $this->_sign = $sign;
+ }
+
+ /**
+ * Re-write query into primitive queries in the context of specified index
+ *
+ * @param Zend_Search_Lucene_Interface $index
+ * @return Zend_Search_Lucene_Search_Query
+ */
+ public function rewrite(Zend_Search_Lucene_Interface $index)
+ {
+ if ($this->_term->field != null) {
+ return $this;
+ } else {
+ $query = new Zend_Search_Lucene_Search_Query_MultiTerm();
+ $query->setBoost($this->getBoost());
+
+ foreach ($index->getFieldNames(true) as $fieldName) {
+ $term = new Zend_Search_Lucene_Index_Term($this->_term->text, $fieldName);
+
+ $query->addTerm($term);
+ }
+
+ return $query->rewrite($index);
+ }
+ }
+
+ /**
+ * Optimize query in the context of specified index
+ *
+ * @param Zend_Search_Lucene_Interface $index
+ * @return Zend_Search_Lucene_Search_Query
+ */
+ public function optimize(Zend_Search_Lucene_Interface $index)
+ {
+ // Check, that index contains specified term
+ if (!$index->hasTerm($this->_term)) {
+ return new Zend_Search_Lucene_Search_Query_Empty();
+ }
+
+ return $this;
}
/**
* Constructs an appropriate Weight implementation for this query.
*
- * @param Zend_Search_Lucene $reader
+ * @param Zend_Search_Lucene_Interface $reader
* @return Zend_Search_Lucene_Search_Weight
*/
- protected function _createWeight($reader)
+ public function createWeight(Zend_Search_Lucene_Interface $reader)
+ {
+ $this->_weight = new Zend_Search_Lucene_Search_Weight_Term($this->_term, $this, $reader);
+ return $this->_weight;
+ }
+
+ /**
+ * Execute query in context of index reader
+ * It also initializes necessary internal structures
+ *
+ * @param Zend_Search_Lucene_Interface $reader
+ */
+ public function execute(Zend_Search_Lucene_Interface $reader)
{
- return new Zend_Search_Lucene_Search_Weight_Term($this->_term, $this, $reader);
+ $this->_docVector = array_flip($reader->termDocs($this->_term));
+ $this->_termFreqs = $reader->termFreqs($this->_term);
+
+ // Initialize weight if it's not done yet
+ $this->_initWeight($reader);
+ }
+
+ /**
+ * Get document ids likely matching the query
+ *
+ * It's an array with document ids as keys (performance considerations)
+ *
+ * @return array
+ */
+ public function matchedDocs()
+ {
+ return $this->_docVector;
}
/**
* Score specified document
*
* @param integer $docId
- * @param Zend_Search_Lucene $reader
+ * @param Zend_Search_Lucene_Interface $reader
* @return float
*/
- public function score( $docId, $reader )
+ public function score($docId, Zend_Search_Lucene_Interface $reader)
{
- if($this->_docVector===null) {
- if (extension_loaded('bitset')) {
- $this->_docVector = bitset_from_array( $reader->termDocs($this->_term) );
- } else {
- $this->_docVector = array_flip($reader->termDocs($this->_term));
- }
-
- $this->_termPositions = $reader->termPositions($this->_term);
- $this->_initWeight($reader);
- }
-
- $match = extension_loaded('bitset') ? bitset_in($this->_docVector, $docId) :
- isset($this->_docVector[$docId]);
- if ($this->_sign && $match) {
- return $reader->getSimilarity()->tf(count($this->_termPositions[$docId]) ) *
+ if (isset($this->_docVector[$docId])) {
+ return $reader->getSimilarity()->tf($this->_termFreqs[$docId]) *
$this->_weight->getValue() *
- $reader->norm($docId, $this->_term->field);
+ $reader->norm($docId, $this->_term->field) *
+ $this->getBoost();
} else {
return 0;
}
}
+
+ /**
+ * Return query terms
+ *
+ * @return array
+ */
+ public function getQueryTerms()
+ {
+ return array($this->_term);
+ }
+
+ /**
+ * Return query term
+ *
+ * @return Zend_Search_Lucene_Index_Term
+ */
+ public function getTerm()
+ {
+ return $this->_term;
+ }
+
+ /**
+ * Returns query term
+ *
+ * @return array
+ */
+ public function getTerms()
+ {
+ return $this->_terms;
+ }
+
+ /**
+ * Highlight query terms
+ *
+ * @param integer &$colorIndex
+ * @param Zend_Search_Lucene_Document_Html $doc
+ */
+ public function highlightMatchesDOM(Zend_Search_Lucene_Document_Html $doc, &$colorIndex)
+ {
+ $doc->highlight($this->_term->text, $this->_getHighlightColor($colorIndex));
+ }
+
+ /**
+ * Print a query
+ *
+ * @return string
+ */
+ public function __toString()
+ {
+ // It's used only for query visualisation, so we don't care about characters escaping
+ return (($this->_term->field === null)? '':$this->_term->field . ':') . $this->_term->text;
+ }
}
--- /dev/null
+<?php
+/**
+ * Zend Framework
+ *
+ * LICENSE
+ *
+ * This source file is subject to the new BSD license that is bundled
+ * with this package in the file LICENSE.txt.
+ * It is also available through the world-wide-web at this URL:
+ * http://framework.zend.com/license/new-bsd
+ * If you did not receive a copy of the license and are unable to
+ * obtain it through the world-wide-web, please send an email
+ * to license@zend.com so we can send you a copy immediately.
+ *
+ * @category Zend
+ * @package Zend_Search_Lucene
+ * @subpackage Search
+ * @copyright Copyright (c) 2005-2007 Zend Technologies USA Inc. (http://www.zend.com)
+ * @license http://framework.zend.com/license/new-bsd New BSD License
+ */
+
+
+/** Zend_Search_Lucene_Index_Term */
+require_once $CFG->dirroot.'/search/Zend/Search/Lucene/Index/Term.php';
+
+/** Zend_Search_Lucene_Exception */
+require_once $CFG->dirroot.'/search/Zend/Search/Lucene/Exception.php';
+
+/** Zend_Search_Lucene_Search_QueryEntry_Term */
+require_once $CFG->dirroot.'/search/Zend/Search/Lucene/Search/QueryEntry/Term.php';
+
+/** Zend_Search_Lucene_Search_QueryEntry_Phrase */
+require_once $CFG->dirroot.'/search/Zend/Search/Lucene/Search/QueryEntry/Phrase.php';
+
+/** Zend_Search_Lucene_Search_QueryEntry_Subquery */
+require_once $CFG->dirroot.'/search/Zend/Search/Lucene/Search/QueryEntry/Subquery.php';
+
+
+/** Zend_Search_Lucene_Search_QueryParserException */
+require_once $CFG->dirroot.'/search/Zend/Search/Lucene/Search/QueryParserException.php';
+
+
+/**
+ * @category Zend
+ * @package Zend_Search_Lucene
+ * @subpackage Search
+ * @copyright Copyright (c) 2005-2007 Zend Technologies USA Inc. (http://www.zend.com)
+ * @license http://framework.zend.com/license/new-bsd New BSD License
+ */
+abstract class Zend_Search_Lucene_Search_QueryEntry
+{
+ /**
+ * Query entry boost factor
+ *
+ * @var float
+ */
+ protected $_boost = 1.0;
+
+
+ /**
+ * Process modifier ('~')
+ *
+ * @param mixed $parameter
+ */
+ abstract public function processFuzzyProximityModifier($parameter = null);
+
+
+ /**
+ * Transform entry to a subquery
+ *
+ * @param string $encoding
+ * @return Zend_Search_Lucene_Search_Query
+ */
+ abstract public function getQuery($encoding);
+
+ /**
+ * Boost query entry
+ *
+ * @param float $boostFactor
+ */
+ public function boost($boostFactor)
+ {
+ $this->_boost *= $boostFactor;
+ }
+
+
+}
--- /dev/null
+<?php
+/**
+ * Zend Framework
+ *
+ * LICENSE
+ *
+ * This source file is subject to the new BSD license that is bundled
+ * with this package in the file LICENSE.txt.
+ * It is also available through the world-wide-web at this URL:
+ * http://framework.zend.com/license/new-bsd
+ * If you did not receive a copy of the license and are unable to
+ * obtain it through the world-wide-web, please send an email
+ * to license@zend.com so we can send you a copy immediately.
+ *
+ * @category Zend
+ * @package Zend_Search_Lucene
+ * @subpackage Search
+ * @copyright Copyright (c) 2005-2007 Zend Technologies USA Inc. (http://www.zend.com)
+ * @license http://framework.zend.com/license/new-bsd New BSD License
+ */
+
+
+/** Zend_Search_Lucene_Index_Term */
+require_once $CFG->dirroot.'/search/Zend/Search/Lucene/Index/Term.php';
+
+/** Zend_Search_Lucene_Exception */
+require_once $CFG->dirroot.'/search/Zend/Search/Lucene/Exception.php';
+
+/** Zend_Search_Lucene_Search_QueryEntry */
+require_once $CFG->dirroot.'/search/Zend/Search/Lucene/Search/QueryEntry.php';
+
+/** Zend_Search_Lucene_Search_QueryParserException */
+require_once $CFG->dirroot.'/search/Zend/Search/Lucene/Search/QueryParserException.php';
+
+/** Zend_Search_Lucene_Analysis_Analyzer */
+require_once $CFG->dirroot.'/search/Zend/Search/Lucene/Analysis/Analyzer.php';
+
+
+
+/**
+ * @category Zend
+ * @package Zend_Search_Lucene
+ * @subpackage Search
+ * @copyright Copyright (c) 2005-2007 Zend Technologies USA Inc. (http://www.zend.com)
+ * @license http://framework.zend.com/license/new-bsd New BSD License
+ */
+class Zend_Search_Lucene_Search_QueryEntry_Phrase extends Zend_Search_Lucene_Search_QueryEntry
+{
+ /**
+ * Phrase value
+ *
+ * @var string
+ */
+ private $_phrase;
+
+ /**
+ * Field
+ *
+ * @var string|null
+ */
+ private $_field;
+
+
+ /**
+ * Proximity phrase query
+ *
+ * @var boolean
+ */
+ private $_proximityQuery = false;
+
+ /**
+ * Words distance, used for proximiti queries
+ *
+ * @var integer
+ */
+ private $_wordsDistance = 0;
+
+
+ /**
+ * Object constractor
+ *
+ * @param string $phrase
+ * @param string $field
+ */
+ public function __construct($phrase, $field)
+ {
+ $this->_phrase = $phrase;
+ $this->_field = $field;
+ }
+
+ /**
+ * Process modifier ('~')
+ *
+ * @param mixed $parameter
+ */
+ public function processFuzzyProximityModifier($parameter = null)
+ {
+ $this->_proximityQuery = true;
+
+ if ($parameter !== null) {
+ $this->_wordsDistance = $parameter;
+ }
+ }
+
+ /**
+ * Transform entry to a subquery
+ *
+ * @param string $encoding
+ * @return Zend_Search_Lucene_Search_Query
+ * @throws Zend_Search_Lucene_Search_QueryParserException
+ */
+ public function getQuery($encoding)
+ {
+ if (strpos($this->_phrase, '?') !== false || strpos($this->_phrase, '*') !== false) {
+ throw new Zend_Search_Lucene_Search_QueryParserException('Wildcards are only allowed in a single terms.');
+ }
+
+ $tokens = Zend_Search_Lucene_Analysis_Analyzer::getDefault()->tokenize($this->_phrase, $encoding);
+
+ if (count($tokens) == 0) {
+ return new Zend_Search_Lucene_Search_Query_Empty();
+ }
+
+ if (count($tokens) == 1) {
+ $term = new Zend_Search_Lucene_Index_Term($tokens[0]->getTermText(), $this->_field);
+ $query = new Zend_Search_Lucene_Search_Query_Term($term);
+ $query->setBoost($this->_boost);
+
+ return $query;
+ }
+
+ //It's not empty or one term query
+ $query = new Zend_Search_Lucene_Search_Query_Phrase();
+ foreach ($tokens as $token) {
+ $term = new Zend_Search_Lucene_Index_Term($token->getTermText(), $this->_field);
+ $query->addTerm($term);
+ }
+
+ if ($this->_proximityQuery) {
+ $query->setSlop($this->_wordsDistance);
+ }
+
+ $query->setBoost($this->_boost);
+
+ return $query;
+ }
+}
--- /dev/null
+<?php
+/**
+ * Zend Framework
+ *
+ * LICENSE
+ *
+ * This source file is subject to the new BSD license that is bundled
+ * with this package in the file LICENSE.txt.
+ * It is also available through the world-wide-web at this URL:
+ * http://framework.zend.com/license/new-bsd
+ * If you did not receive a copy of the license and are unable to
+ * obtain it through the world-wide-web, please send an email
+ * to license@zend.com so we can send you a copy immediately.
+ *
+ * @category Zend
+ * @package Zend_Search_Lucene
+ * @subpackage Search
+ * @copyright Copyright (c) 2005-2007 Zend Technologies USA Inc. (http://www.zend.com)
+ * @license http://framework.zend.com/license/new-bsd New BSD License
+ */
+
+
+/** Zend_Search_Lucene_Index_Term */
+require_once $CFG->dirroot.'/search/Zend/Search/Lucene/Index/Term.php';
+
+/** Zend_Search_Lucene_Exception */
+require_once $CFG->dirroot.'/search/Zend/Search/Lucene/Exception.php';
+
+/** Zend_Search_Lucene_Search_QueryEntry */
+require_once $CFG->dirroot.'/search/Zend/Search/Lucene/Search/QueryEntry.php';
+
+/** Zend_Search_Lucene_Search_QueryParserException */
+require_once $CFG->dirroot.'/search/Zend/Search/Lucene/Search/QueryParserException.php';
+
+
+/**
+ * @category Zend
+ * @package Zend_Search_Lucene
+ * @subpackage Search
+ * @copyright Copyright (c) 2005-2007 Zend Technologies USA Inc. (http://www.zend.com)
+ * @license http://framework.zend.com/license/new-bsd New BSD License
+ */
+class Zend_Search_Lucene_Search_QueryEntry_Subquery extends Zend_Search_Lucene_Search_QueryEntry
+{
+ /**
+ * Query
+ *
+ * @var Zend_Search_Lucene_Search_Query
+ */
+ private $_query;
+
+ /**
+ * Object constractor
+ *
+ * @param Zend_Search_Lucene_Search_Query $query
+ */
+ public function __construct(Zend_Search_Lucene_Search_Query $query)
+ {
+ $this->_query = $query;
+ }
+
+ /**
+ * Process modifier ('~')
+ *
+ * @param mixed $parameter
+ * @throws Zend_Search_Lucene_Search_QueryParserException
+ */
+ public function processFuzzyProximityModifier($parameter = null)
+ {
+ throw new Zend_Search_Lucene_Search_QueryParserException('\'~\' sign must follow term or phrase');
+ }
+
+
+ /**
+ * Transform entry to a subquery
+ *
+ * @param string $encoding
+ * @return Zend_Search_Lucene_Search_Query
+ */
+ public function getQuery($encoding)
+ {
+ $this->_query->setBoost($this->_boost);
+
+ return $this->_query;
+ }
+}
--- /dev/null
+<?php
+/**
+ * Zend Framework
+ *
+ * LICENSE
+ *
+ * This source file is subject to the new BSD license that is bundled
+ * with this package in the file LICENSE.txt.
+ * It is also available through the world-wide-web at this URL:
+ * http://framework.zend.com/license/new-bsd
+ * If you did not receive a copy of the license and are unable to
+ * obtain it through the world-wide-web, please send an email
+ * to license@zend.com so we can send you a copy immediately.
+ *
+ * @category Zend
+ * @package Zend_Search_Lucene
+ * @subpackage Search
+ * @copyright Copyright (c) 2005-2007 Zend Technologies USA Inc. (http://www.zend.com)
+ * @license http://framework.zend.com/license/new-bsd New BSD License
+ */
+
+
+/** Zend_Search_Lucene_Index_Term */
+require_once $CFG->dirroot.'/search/Zend/Search/Lucene/Index/Term.php';
+
+/** Zend_Search_Lucene_Exception */
+require_once $CFG->dirroot.'/search/Zend/Search/Lucene/Exception.php';
+
+/** Zend_Search_Lucene_Search_QueryEntry */
+require_once $CFG->dirroot.'/search/Zend/Search/Lucene/Search/QueryEntry.php';
+
+/** Zend_Search_Lucene_Search_QueryParserException */
+require_once $CFG->dirroot.'/search/Zend/Search/Lucene/Search/QueryParserException.php';
+
+/** Zend_Search_Lucene_Analysis_Analyzer */
+require_once $CFG->dirroot.'/search/Zend/Search/Lucene/Analysis/Analyzer.php';
+
+
+
+/**
+ * @category Zend
+ * @package Zend_Search_Lucene
+ * @subpackage Search
+ * @copyright Copyright (c) 2005-2007 Zend Technologies USA Inc. (http://www.zend.com)
+ * @license http://framework.zend.com/license/new-bsd New BSD License
+ */
+class Zend_Search_Lucene_Search_QueryEntry_Term extends Zend_Search_Lucene_Search_QueryEntry
+{
+ /**
+ * Term value
+ *
+ * @var string
+ */
+ private $_term;
+
+ /**
+ * Field
+ *
+ * @var string|null
+ */
+ private $_field;
+
+
+ /**
+ * Fuzzy search query
+ *
+ * @var boolean
+ */
+ private $_fuzzyQuery = false;
+
+ /**
+ * Similarity
+ *
+ * @var float
+ */
+ private $_similarity = 1.;
+
+
+ /**
+ * Object constractor
+ *
+ * @param string $term
+ * @param string $field
+ */
+ public function __construct($term, $field)
+ {
+ $this->_term = $term;
+ $this->_field = $field;
+ }
+
+ /**
+ * Process modifier ('~')
+ *
+ * @param mixed $parameter
+ */
+ public function processFuzzyProximityModifier($parameter = null)
+ {
+ $this->_fuzzyQuery = true;
+
+ if ($parameter !== null) {
+ $this->_similarity = $parameter;
+ } else {
+ $this->_similarity = 0.5;
+ }
+ }
+
+ /**
+ * Transform entry to a subquery
+ *
+ * @param string $encoding
+ * @return Zend_Search_Lucene_Search_Query
+ * @throws Zend_Search_Lucene_Search_QueryParserException
+ */
+ public function getQuery($encoding)
+ {
+ if ($this->_fuzzyQuery) {
+ throw new Zend_Search_Lucene_Search_QueryParserException('Fuzzy search is not supported yet.');
+ }
+
+ if (strpos($this->_term, '?') !== false || strpos($this->_term, '*') !== false) {
+ throw new Zend_Search_Lucene_Search_QueryParserException('Wildcard queries are not supported yet.');
+ }
+
+ $tokens = Zend_Search_Lucene_Analysis_Analyzer::getDefault()->tokenize($this->_term, $encoding);
+
+ if (count($tokens) == 0) {
+ return new Zend_Search_Lucene_Search_Query_Empty();
+ }
+
+ if (count($tokens) == 1) {
+ $term = new Zend_Search_Lucene_Index_Term($tokens[0]->getTermText(), $this->_field);
+ $query = new Zend_Search_Lucene_Search_Query_Term($term);
+ $query->setBoost($this->_boost);
+
+ return $query;
+ }
+
+ //It's not empty or one term query
+ $query = new Zend_Search_Lucene_Search_Query_MultiTerm();
+
+ /**
+ * @todo Process $token->getPositionIncrement() to support stemming, synonyms and other
+ * analizer design features
+ */
+ foreach ($tokens as $token) {
+ $term = new Zend_Search_Lucene_Index_Term($token->getTermText(), $this->_field);
+ $query->addTerm($term, true); // all subterms are required
+ }
+
+ $query->setBoost($this->_boost);
+
+ return $query;
+ }
+}
* @category Zend
* @package Zend_Search_Lucene
* @subpackage Search
- * @copyright Copyright (c) 2006 Zend Technologies USA Inc. (http://www.zend.com)
+ * @copyright Copyright (c) 2005-2007 Zend Technologies USA Inc. (http://www.zend.com)
* @license http://framework.zend.com/license/new-bsd New BSD License
*/
* @category Zend
* @package Zend_Search_Lucene
* @subpackage Search
- * @copyright Copyright (c) 2006 Zend Technologies USA Inc. (http://www.zend.com)
+ * @copyright Copyright (c) 2005-2007 Zend Technologies USA Inc. (http://www.zend.com)
* @license http://framework.zend.com/license/new-bsd New BSD License
*/
class Zend_Search_Lucene_Search_QueryHit
{
/**
* Object handle of the index
- * @var Zend_Search_Lucene
+ * @var Zend_Search_Lucene_Interface
*/
protected $_index = null;
/**
- * Constructor - pass object handle of Zend_Search_Lucene index that produced
+ * Constructor - pass object handle of Zend_Search_Lucene_Interface index that produced
* the hit so the document can be retrieved easily from the hit.
*
- * @param Zend_Search_Lucene $index
+ * @param Zend_Search_Lucene_Interface $index
*/
- public function __construct(Zend_Search_Lucene $index)
+ public function __construct(Zend_Search_Lucene_Interface $index)
{
- $this->_index = $index;
+ $this->_index = new Zend_Search_Lucene_Proxy($index);
}
/**
* Return the index object for this hit
*
- * @return Zend_Search_Lucene
+ * @return Zend_Search_Lucene_Interface
*/
public function getIndex()
{
--- /dev/null
+<?php
+/**
+ * Zend Framework
+ *
+ * LICENSE
+ *
+ * This source file is subject to the new BSD license that is bundled
+ * with this package in the file LICENSE.txt.
+ * It is also available through the world-wide-web at this URL:
+ * http://framework.zend.com/license/new-bsd
+ * If you did not receive a copy of the license and are unable to
+ * obtain it through the world-wide-web, please send an email
+ * to license@zend.com so we can send you a copy immediately.
+ *
+ * @category Zend
+ * @package Zend_Search_Lucene
+ * @subpackage Search
+ * @copyright Copyright (c) 2005-2007 Zend Technologies USA Inc. (http://www.zend.com)
+ * @license http://framework.zend.com/license/new-bsd New BSD License
+ */
+
+
+/** Zend_Search_Lucene_FSM */
+require_once $CFG->dirroot.'/search/Zend/Search/Lucene/FSM.php';
+
+/** Zend_Search_Lucene_Search_QueryParser */
+require_once $CFG->dirroot.'/search/Zend/Search/Lucene/Search/QueryToken.php';
+
+/** Zend_Search_Lucene_Exception */
+require_once $CFG->dirroot.'/search/Zend/Search/Lucene/Exception.php';
+
+/** Zend_Search_Lucene_Search_QueryParserException */
+require_once $CFG->dirroot.'/search/Zend/Search/Lucene/Search/QueryParserException.php';
+
+
+/**
+ * @category Zend
+ * @package Zend_Search_Lucene
+ * @subpackage Search
+ * @copyright Copyright (c) 2005-2007 Zend Technologies USA Inc. (http://www.zend.com)
+ * @license http://framework.zend.com/license/new-bsd New BSD License
+ */
+class Zend_Search_Lucene_Search_QueryLexer extends Zend_Search_Lucene_FSM
+{
+ /** State Machine states */
+ const ST_WHITE_SPACE = 0;
+ const ST_SYNT_LEXEME = 1;
+ const ST_LEXEME = 2;
+ const ST_QUOTED_LEXEME = 3;
+ const ST_ESCAPED_CHAR = 4;
+ const ST_ESCAPED_QCHAR = 5;
+ const ST_LEXEME_MODIFIER = 6;
+ const ST_NUMBER = 7;
+ const ST_MANTISSA = 8;
+ const ST_ERROR = 9;
+
+ /** Input symbols */
+ const IN_WHITE_SPACE = 0;
+ const IN_SYNT_CHAR = 1;
+ const IN_LEXEME_MODIFIER = 2;
+ const IN_ESCAPE_CHAR = 3;
+ const IN_QUOTE = 4;
+ const IN_DECIMAL_POINT = 5;
+ const IN_ASCII_DIGIT = 6;
+ const IN_CHAR = 7;
+ const IN_MUTABLE_CHAR = 8;
+
+ const QUERY_WHITE_SPACE_CHARS = " \n\r\t";
+ const QUERY_SYNT_CHARS = ':()[]{}!|&';
+ const QUERY_MUTABLE_CHARS = '+-';
+ const QUERY_DOUBLECHARLEXEME_CHARS = '|&';
+ const QUERY_LEXEMEMODIFIER_CHARS = '~^';
+ const QUERY_ASCIIDIGITS_CHARS = '0123456789';
+
+ /**
+ * List of recognized lexemes
+ *
+ * @var array
+ */
+ private $_lexemes;
+
+ /**
+ * Query string (array of single- or non single-byte characters)
+ *
+ * @var array
+ */
+ private $_queryString;
+
+ /**
+ * Current position within a query string
+ * Used to create appropriate error messages
+ *
+ * @var integer
+ */
+ private $_queryStringPosition;
+
+ /**
+ * Recognized part of current lexeme
+ *
+ * @var string
+ */
+ private $_currentLexeme;
+
+ public function __construct()
+ {
+ parent::__construct( array(self::ST_WHITE_SPACE,
+ self::ST_SYNT_LEXEME,
+ self::ST_LEXEME,
+ self::ST_QUOTED_LEXEME,
+ self::ST_ESCAPED_CHAR,
+ self::ST_ESCAPED_QCHAR,
+ self::ST_LEXEME_MODIFIER,
+ self::ST_NUMBER,
+ self::ST_MANTISSA,
+ self::ST_ERROR),
+ array(self::IN_WHITE_SPACE,
+ self::IN_SYNT_CHAR,
+ self::IN_MUTABLE_CHAR,
+ self::IN_LEXEME_MODIFIER,
+ self::IN_ESCAPE_CHAR,
+ self::IN_QUOTE,
+ self::IN_DECIMAL_POINT,
+ self::IN_ASCII_DIGIT,
+ self::IN_CHAR));
+
+
+ $lexemeModifierErrorAction = new Zend_Search_Lucene_FSMAction($this, 'lexModifierErrException');
+ $quoteWithinLexemeErrorAction = new Zend_Search_Lucene_FSMAction($this, 'quoteWithinLexemeErrException');
+ $wrongNumberErrorAction = new Zend_Search_Lucene_FSMAction($this, 'wrongNumberErrException');
+
+
+
+ $this->addRules(array( array(self::ST_WHITE_SPACE, self::IN_WHITE_SPACE, self::ST_WHITE_SPACE),
+ array(self::ST_WHITE_SPACE, self::IN_SYNT_CHAR, self::ST_SYNT_LEXEME),
+ array(self::ST_WHITE_SPACE, self::IN_MUTABLE_CHAR, self::ST_SYNT_LEXEME),
+ array(self::ST_WHITE_SPACE, self::IN_LEXEME_MODIFIER, self::ST_LEXEME_MODIFIER),
+ array(self::ST_WHITE_SPACE, self::IN_ESCAPE_CHAR, self::ST_ESCAPED_CHAR),
+ array(self::ST_WHITE_SPACE, self::IN_QUOTE, self::ST_QUOTED_LEXEME),
+ array(self::ST_WHITE_SPACE, self::IN_DECIMAL_POINT, self::ST_LEXEME),
+ array(self::ST_WHITE_SPACE, self::IN_ASCII_DIGIT, self::ST_LEXEME),
+ array(self::ST_WHITE_SPACE, self::IN_CHAR, self::ST_LEXEME)
+ ));
+ $this->addRules(array( array(self::ST_SYNT_LEXEME, self::IN_WHITE_SPACE, self::ST_WHITE_SPACE),
+ array(self::ST_SYNT_LEXEME, self::IN_SYNT_CHAR, self::ST_SYNT_LEXEME),
+ array(self::ST_SYNT_LEXEME, self::IN_MUTABLE_CHAR, self::ST_SYNT_LEXEME),
+ array(self::ST_SYNT_LEXEME, self::IN_LEXEME_MODIFIER, self::ST_LEXEME_MODIFIER),
+ array(self::ST_SYNT_LEXEME, self::IN_ESCAPE_CHAR, self::ST_ESCAPED_CHAR),
+ array(self::ST_SYNT_LEXEME, self::IN_QUOTE, self::ST_QUOTED_LEXEME),
+ array(self::ST_SYNT_LEXEME, self::IN_DECIMAL_POINT, self::ST_LEXEME),
+ array(self::ST_SYNT_LEXEME, self::IN_ASCII_DIGIT, self::ST_LEXEME),
+ array(self::ST_SYNT_LEXEME, self::IN_CHAR, self::ST_LEXEME)
+ ));
+ $this->addRules(array( array(self::ST_LEXEME, self::IN_WHITE_SPACE, self::ST_WHITE_SPACE),
+ array(self::ST_LEXEME, self::IN_SYNT_CHAR, self::ST_SYNT_LEXEME),
+ array(self::ST_LEXEME, self::IN_MUTABLE_CHAR, self::ST_LEXEME),
+ array(self::ST_LEXEME, self::IN_LEXEME_MODIFIER, self::ST_LEXEME_MODIFIER),
+ array(self::ST_LEXEME, self::IN_ESCAPE_CHAR, self::ST_ESCAPED_CHAR),
+
+ // IN_QUOTE not allowed
+ array(self::ST_LEXEME, self::IN_QUOTE, self::ST_ERROR, $quoteWithinLexemeErrorAction),
+
+ array(self::ST_LEXEME, self::IN_DECIMAL_POINT, self::ST_LEXEME),
+ array(self::ST_LEXEME, self::IN_ASCII_DIGIT, self::ST_LEXEME),
+ array(self::ST_LEXEME, self::IN_CHAR, self::ST_LEXEME)
+ ));
+ $this->addRules(array( array(self::ST_QUOTED_LEXEME, self::IN_WHITE_SPACE, self::ST_QUOTED_LEXEME),
+ array(self::ST_QUOTED_LEXEME, self::IN_SYNT_CHAR, self::ST_QUOTED_LEXEME),
+ array(self::ST_QUOTED_LEXEME, self::IN_MUTABLE_CHAR, self::ST_QUOTED_LEXEME),
+ array(self::ST_QUOTED_LEXEME, self::IN_LEXEME_MODIFIER, self::ST_QUOTED_LEXEME),
+ array(self::ST_QUOTED_LEXEME, self::IN_ESCAPE_CHAR, self::ST_ESCAPED_QCHAR),
+ array(self::ST_QUOTED_LEXEME, self::IN_QUOTE, self::ST_WHITE_SPACE),
+ array(self::ST_QUOTED_LEXEME, self::IN_DECIMAL_POINT, self::ST_QUOTED_LEXEME),
+ array(self::ST_QUOTED_LEXEME, self::IN_ASCII_DIGIT, self::ST_QUOTED_LEXEME),
+ array(self::ST_QUOTED_LEXEME, self::IN_CHAR, self::ST_QUOTED_LEXEME)
+ ));
+ $this->addRules(array( array(self::ST_ESCAPED_CHAR, self::IN_WHITE_SPACE, self::ST_LEXEME),
+ array(self::ST_ESCAPED_CHAR, self::IN_SYNT_CHAR, self::ST_LEXEME),
+ array(self::ST_ESCAPED_CHAR, self::IN_MUTABLE_CHAR, self::ST_LEXEME),
+ array(self::ST_ESCAPED_CHAR, self::IN_LEXEME_MODIFIER, self::ST_LEXEME),
+ array(self::ST_ESCAPED_CHAR, self::IN_ESCAPE_CHAR, self::ST_LEXEME),
+ array(self::ST_ESCAPED_CHAR, self::IN_QUOTE, self::ST_LEXEME),
+ array(self::ST_ESCAPED_CHAR, self::IN_DECIMAL_POINT, self::ST_LEXEME),
+ array(self::ST_ESCAPED_CHAR, self::IN_ASCII_DIGIT, self::ST_LEXEME),
+ array(self::ST_ESCAPED_CHAR, self::IN_CHAR, self::ST_LEXEME)
+ ));
+ $this->addRules(array( array(self::ST_ESCAPED_QCHAR, self::IN_WHITE_SPACE, self::ST_QUOTED_LEXEME),
+ array(self::ST_ESCAPED_QCHAR, self::IN_SYNT_CHAR, self::ST_QUOTED_LEXEME),
+ array(self::ST_ESCAPED_QCHAR, self::IN_MUTABLE_CHAR, self::ST_QUOTED_LEXEME),
+ array(self::ST_ESCAPED_QCHAR, self::IN_LEXEME_MODIFIER, self::ST_QUOTED_LEXEME),
+ array(self::ST_ESCAPED_QCHAR, self::IN_ESCAPE_CHAR, self::ST_QUOTED_LEXEME),
+ array(self::ST_ESCAPED_QCHAR, self::IN_QUOTE, self::ST_QUOTED_LEXEME),
+ array(self::ST_ESCAPED_QCHAR, self::IN_DECIMAL_POINT, self::ST_QUOTED_LEXEME),
+ array(self::ST_ESCAPED_QCHAR, self::IN_ASCII_DIGIT, self::ST_QUOTED_LEXEME),
+ array(self::ST_ESCAPED_QCHAR, self::IN_CHAR, self::ST_QUOTED_LEXEME)
+ ));
+ $this->addRules(array( array(self::ST_LEXEME_MODIFIER, self::IN_WHITE_SPACE, self::ST_WHITE_SPACE),
+ array(self::ST_LEXEME_MODIFIER, self::IN_SYNT_CHAR, self::ST_SYNT_LEXEME),
+ array(self::ST_LEXEME_MODIFIER, self::IN_MUTABLE_CHAR, self::ST_SYNT_LEXEME),
+ array(self::ST_LEXEME_MODIFIER, self::IN_LEXEME_MODIFIER, self::ST_LEXEME_MODIFIER),
+
+ // IN_ESCAPE_CHAR not allowed
+ array(self::ST_LEXEME_MODIFIER, self::IN_ESCAPE_CHAR, self::ST_ERROR, $lexemeModifierErrorAction),
+
+ // IN_QUOTE not allowed
+ array(self::ST_LEXEME_MODIFIER, self::IN_QUOTE, self::ST_ERROR, $lexemeModifierErrorAction),
+
+
+ array(self::ST_LEXEME_MODIFIER, self::IN_DECIMAL_POINT, self::ST_MANTISSA),
+ array(self::ST_LEXEME_MODIFIER, self::IN_ASCII_DIGIT, self::ST_NUMBER),
+
+ // IN_CHAR not allowed
+ array(self::ST_LEXEME_MODIFIER, self::IN_CHAR, self::ST_ERROR, $lexemeModifierErrorAction),
+ ));
+ $this->addRules(array( array(self::ST_NUMBER, self::IN_WHITE_SPACE, self::ST_WHITE_SPACE),
+ array(self::ST_NUMBER, self::IN_SYNT_CHAR, self::ST_SYNT_LEXEME),
+ array(self::ST_NUMBER, self::IN_MUTABLE_CHAR, self::ST_SYNT_LEXEME),
+ array(self::ST_NUMBER, self::IN_LEXEME_MODIFIER, self::ST_LEXEME_MODIFIER),
+
+ // IN_ESCAPE_CHAR not allowed
+ array(self::ST_NUMBER, self::IN_ESCAPE_CHAR, self::ST_ERROR, $wrongNumberErrorAction),
+
+ // IN_QUOTE not allowed
+ array(self::ST_NUMBER, self::IN_QUOTE, self::ST_ERROR, $wrongNumberErrorAction),
+
+ array(self::ST_NUMBER, self::IN_DECIMAL_POINT, self::ST_MANTISSA),
+ array(self::ST_NUMBER, self::IN_ASCII_DIGIT, self::ST_NUMBER),
+
+ // IN_CHAR not allowed
+ array(self::ST_NUMBER, self::IN_CHAR, self::ST_ERROR, $wrongNumberErrorAction),
+ ));
+ $this->addRules(array( array(self::ST_MANTISSA, self::IN_WHITE_SPACE, self::ST_WHITE_SPACE),
+ array(self::ST_MANTISSA, self::IN_SYNT_CHAR, self::ST_SYNT_LEXEME),
+ array(self::ST_MANTISSA, self::IN_MUTABLE_CHAR, self::ST_SYNT_LEXEME),
+ array(self::ST_MANTISSA, self::IN_LEXEME_MODIFIER, self::ST_LEXEME_MODIFIER),
+
+ // IN_ESCAPE_CHAR not allowed
+ array(self::ST_MANTISSA, self::IN_ESCAPE_CHAR, self::ST_ERROR, $wrongNumberErrorAction),
+
+ // IN_QUOTE not allowed
+ array(self::ST_MANTISSA, self::IN_QUOTE, self::ST_ERROR, $wrongNumberErrorAction),
+
+ // IN_DECIMAL_POINT not allowed
+ array(self::ST_MANTISSA, self::IN_DECIMAL_POINT, self::ST_ERROR, $wrongNumberErrorAction),
+
+ array(self::ST_MANTISSA, self::IN_ASCII_DIGIT, self::ST_MANTISSA),
+
+ // IN_CHAR not allowed
+ array(self::ST_MANTISSA, self::IN_CHAR, self::ST_ERROR, $wrongNumberErrorAction),
+ ));
+
+
+ /** Actions */
+ $syntaxLexemeAction = new Zend_Search_Lucene_FSMAction($this, 'addQuerySyntaxLexeme');
+ $lexemeModifierAction = new Zend_Search_Lucene_FSMAction($this, 'addLexemeModifier');
+ $addLexemeAction = new Zend_Search_Lucene_FSMAction($this, 'addLexeme');
+ $addQuotedLexemeAction = new Zend_Search_Lucene_FSMAction($this, 'addQuotedLexeme');
+ $addNumberLexemeAction = new Zend_Search_Lucene_FSMAction($this, 'addNumberLexeme');
+ $addLexemeCharAction = new Zend_Search_Lucene_FSMAction($this, 'addLexemeChar');
+
+
+ /** Syntax lexeme */
+ $this->addEntryAction(self::ST_SYNT_LEXEME, $syntaxLexemeAction);
+ // Two lexemes in succession
+ $this->addTransitionAction(self::ST_SYNT_LEXEME, self::ST_SYNT_LEXEME, $syntaxLexemeAction);
+
+
+ /** Lexeme */
+ $this->addEntryAction(self::ST_LEXEME, $addLexemeCharAction);
+ $this->addTransitionAction(self::ST_LEXEME, self::ST_LEXEME, $addLexemeCharAction);
+ // ST_ESCAPED_CHAR => ST_LEXEME transition is covered by ST_LEXEME entry action
+
+ $this->addTransitionAction(self::ST_LEXEME, self::ST_WHITE_SPACE, $addLexemeAction);
+ $this->addTransitionAction(self::ST_LEXEME, self::ST_SYNT_LEXEME, $addLexemeAction);
+ $this->addTransitionAction(self::ST_LEXEME, self::ST_QUOTED_LEXEME, $addLexemeAction);
+ $this->addTransitionAction(self::ST_LEXEME, self::ST_LEXEME_MODIFIER, $addLexemeAction);
+ $this->addTransitionAction(self::ST_LEXEME, self::ST_NUMBER, $addLexemeAction);
+ $this->addTransitionAction(self::ST_LEXEME, self::ST_MANTISSA, $addLexemeAction);
+
+
+ /** Quoted lexeme */
+ // We don't need entry action (skeep quote)
+ $this->addTransitionAction(self::ST_QUOTED_LEXEME, self::ST_QUOTED_LEXEME, $addLexemeCharAction);
+ $this->addTransitionAction(self::ST_ESCAPED_QCHAR, self::ST_QUOTED_LEXEME, $addLexemeCharAction);
+ // Closing quote changes state to the ST_WHITE_SPACE other states are not used
+ $this->addTransitionAction(self::ST_QUOTED_LEXEME, self::ST_WHITE_SPACE, $addQuotedLexemeAction);
+
+
+ /** Lexeme modifier */
+ $this->addEntryAction(self::ST_LEXEME_MODIFIER, $lexemeModifierAction);
+
+
+ /** Number */
+ $this->addEntryAction(self::ST_NUMBER, $addLexemeCharAction);
+ $this->addEntryAction(self::ST_MANTISSA, $addLexemeCharAction);
+ $this->addTransitionAction(self::ST_NUMBER, self::ST_NUMBER, $addLexemeCharAction);
+ // ST_NUMBER => ST_MANTISSA transition is covered by ST_MANTISSA entry action
+ $this->addTransitionAction(self::ST_MANTISSA, self::ST_MANTISSA, $addLexemeCharAction);
+
+ $this->addTransitionAction(self::ST_NUMBER, self::ST_WHITE_SPACE, $addNumberLexemeAction);
+ $this->addTransitionAction(self::ST_NUMBER, self::ST_SYNT_LEXEME, $addNumberLexemeAction);
+ $this->addTransitionAction(self::ST_NUMBER, self::ST_LEXEME_MODIFIER, $addNumberLexemeAction);
+ $this->addTransitionAction(self::ST_MANTISSA, self::ST_WHITE_SPACE, $addNumberLexemeAction);
+ $this->addTransitionAction(self::ST_MANTISSA, self::ST_SYNT_LEXEME, $addNumberLexemeAction);
+ $this->addTransitionAction(self::ST_MANTISSA, self::ST_LEXEME_MODIFIER, $addNumberLexemeAction);
+ }
+
+
+
+
+ /**
+ * Translate input char to an input symbol of state machine
+ *
+ * @param string $char
+ * @return integer
+ */
+ private function _translateInput($char)
+ {
+ if (strpos(self::QUERY_WHITE_SPACE_CHARS, $char) !== false) { return self::IN_WHITE_SPACE;
+ } else if (strpos(self::QUERY_SYNT_CHARS, $char) !== false) { return self::IN_SYNT_CHAR;
+ } else if (strpos(self::QUERY_MUTABLE_CHARS, $char) !== false) { return self::IN_MUTABLE_CHAR;
+ } else if (strpos(self::QUERY_LEXEMEMODIFIER_CHARS, $char) !== false) { return self::IN_LEXEME_MODIFIER;
+ } else if (strpos(self::QUERY_ASCIIDIGITS_CHARS, $char) !== false) { return self::IN_ASCII_DIGIT;
+ } else if ($char === '"' ) { return self::IN_QUOTE;
+ } else if ($char === '.' ) { return self::IN_DECIMAL_POINT;
+ } else if ($char === '\\') { return self::IN_ESCAPE_CHAR;
+ } else { return self::IN_CHAR;
+ }
+ }
+
+
+ /**
+ * This method is used to tokenize query string into lexemes
+ *
+ * @param string $inputString
+ * @param string $encoding
+ * @return array
+ * @throws Zend_Search_Lucene_Search_QueryParserException
+ */
+ public function tokenize($inputString, $encoding)
+ {
+ $this->reset();
+
+ $this->_lexemes = array();
+ $this->_queryString = array();
+
+ $strLength = iconv_strlen($inputString, $encoding);
+
+ // Workaround for iconv_substr bug
+ $inputString .= ' ';
+
+ for ($count = 0; $count < $strLength; $count++) {
+ $this->_queryString[$count] = iconv_substr($inputString, $count, 1, $encoding);
+ }
+
+ for ($this->_queryStringPosition = 0;
+ $this->_queryStringPosition < count($this->_queryString);
+ $this->_queryStringPosition++) {
+ $this->process($this->_translateInput($this->_queryString[$this->_queryStringPosition]));
+ }
+
+ $this->process(self::IN_WHITE_SPACE);
+
+ if ($this->getState() != self::ST_WHITE_SPACE) {
+ throw new Zend_Search_Lucene_Search_QueryParserException('Unexpected end of query');
+ }
+
+ $this->_queryString = null;
+
+ return $this->_lexemes;
+ }
+
+
+
+ /*********************************************************************
+ * Actions implementation
+ *
+ * Actions affect on recognized lexemes list
+ *********************************************************************/
+
+ /**
+ * Add query syntax lexeme
+ *
+ * @throws Zend_Search_Lucene_Search_QueryParserException
+ */
+ public function addQuerySyntaxLexeme()
+ {
+ $lexeme = $this->_queryString[$this->_queryStringPosition];
+
+ // Process two char lexemes
+ if (strpos(self::QUERY_DOUBLECHARLEXEME_CHARS, $lexeme) !== false) {
+ // increase current position in a query string
+ $this->_queryStringPosition++;
+
+ // check,
+ if ($this->_queryStringPosition == count($this->_queryString) ||
+ $this->_queryString[$this->_queryStringPosition] != $lexeme) {
+ throw new Zend_Search_Lucene_Search_QueryParserException('Two chars lexeme expected. ' . $this->_positionMsg());
+ }
+
+ // duplicate character
+ $lexeme .= $lexeme;
+ }
+
+ $token = new Zend_Search_Lucene_Search_QueryToken(
+ Zend_Search_Lucene_Search_QueryToken::TC_SYNTAX_ELEMENT,
+ $lexeme,
+ $this->_queryStringPosition);
+
+ // Skip this lexeme if it's a field indicator ':' and treat previous as 'field' instead of 'word'
+ if ($token->type == Zend_Search_Lucene_Search_QueryToken::TT_FIELD_INDICATOR) {
+ $token = array_pop($this->_lexemes);
+ if ($token === null || $token->type != Zend_Search_Lucene_Search_QueryToken::TT_WORD) {
+ throw new Zend_Search_Lucene_Search_QueryParserException('Field mark \':\' must follow field name. ' . $this->_positionMsg());
+ }
+
+ $token->type = Zend_Search_Lucene_Search_QueryToken::TT_FIELD;
+ }
+
+ $this->_lexemes[] = $token;
+ }
+
+ /**
+ * Add lexeme modifier
+ */
+ public function addLexemeModifier()
+ {
+ $this->_lexemes[] = new Zend_Search_Lucene_Search_QueryToken(
+ Zend_Search_Lucene_Search_QueryToken::TC_SYNTAX_ELEMENT,
+ $this->_queryString[$this->_queryStringPosition],
+ $this->_queryStringPosition);
+ }
+
+
+ /**
+ * Add lexeme
+ */
+ public function addLexeme()
+ {
+ $this->_lexemes[] = new Zend_Search_Lucene_Search_QueryToken(
+ Zend_Search_Lucene_Search_QueryToken::TC_WORD,
+ $this->_currentLexeme,
+ $this->_queryStringPosition - 1);
+
+ $this->_currentLexeme = '';
+ }
+
+ /**
+ * Add quoted lexeme
+ */
+ public function addQuotedLexeme()
+ {
+ $this->_lexemes[] = new Zend_Search_Lucene_Search_QueryToken(
+ Zend_Search_Lucene_Search_QueryToken::TC_PHRASE,
+ $this->_currentLexeme,
+ $this->_queryStringPosition);
+
+ $this->_currentLexeme = '';
+ }
+
+ /**
+ * Add number lexeme
+ */
+ public function addNumberLexeme()
+ {
+ $this->_lexemes[] = new Zend_Search_Lucene_Search_QueryToken(
+ Zend_Search_Lucene_Search_QueryToken::TC_NUMBER,
+ $this->_currentLexeme,
+ $this->_queryStringPosition - 1);
+ $this->_currentLexeme = '';
+ }
+
+ /**
+ * Extend lexeme by one char
+ */
+ public function addLexemeChar()
+ {
+ $this->_currentLexeme .= $this->_queryString[$this->_queryStringPosition];
+ }
+
+
+ /**
+ * Position message
+ *
+ * @return string
+ */
+ private function _positionMsg()
+ {
+ return 'Position is ' . $this->_queryStringPosition . '.';
+ }
+
+
+ /*********************************************************************
+ * Syntax errors actions
+ *********************************************************************/
+ public function lexModifierErrException()
+ {
+ throw new Zend_Search_Lucene_Search_QueryParserException('Lexeme modifier character can be followed only by number, white space or query syntax element. ' . $this->_positionMsg());
+ }
+ public function quoteWithinLexemeErrException()
+ {
+ throw new Zend_Search_Lucene_Search_QueryParserException('Quote within lexeme must be escaped by \'\\\' char. ' . $this->_positionMsg());
+ }
+ public function wrongNumberErrException()
+ {
+ throw new Zend_Search_Lucene_Search_QueryParserException('Wrong number syntax.' . $this->_positionMsg());
+ }
+}
+
* @category Zend
* @package Zend_Search_Lucene
* @subpackage Search
- * @copyright Copyright (c) 2006 Zend Technologies USA Inc. (http://www.zend.com)
+ * @copyright Copyright (c) 2005-2007 Zend Technologies USA Inc. (http://www.zend.com)
* @license http://framework.zend.com/license/new-bsd New BSD License
*/
-/** Zend_Search_Lucene_Search_QueryTokenizer */
-require_once 'Zend/Search/Lucene/Search/QueryTokenizer.php';
-
/** Zend_Search_Lucene_Index_Term */
-require_once 'Zend/Search/Lucene/Index/Term.php';
+require_once $CFG->dirroot.'/search/Zend/Search/Lucene/Index/Term.php';
/** Zend_Search_Lucene_Search_Query_Term */
-require_once 'Zend/Search/Lucene/Search/Query/Term.php';
+require_once $CFG->dirroot.'/search/Zend/Search/Lucene/Search/Query/Term.php';
/** Zend_Search_Lucene_Search_Query_MultiTerm */
-require_once 'Zend/Search/Lucene/Search/Query/MultiTerm.php';
+require_once $CFG->dirroot.'/search/Zend/Search/Lucene/Search/Query/MultiTerm.php';
+
+/** Zend_Search_Lucene_Search_Query_Boolean */
+require_once $CFG->dirroot.'/search/Zend/Search/Lucene/Search/Query/Boolean.php';
/** Zend_Search_Lucene_Search_Query_Phrase */
-require_once 'Zend/Search/Lucene/Search/Query/Phrase.php';
+require_once $CFG->dirroot.'/search/Zend/Search/Lucene/Search/Query/Phrase.php';
+
+/** Zend_Search_Lucene_Search_Query_Empty */
+require_once $CFG->dirroot.'/search/Zend/Search/Lucene/Search/Query/Empty.php';
+
+
+/** Zend_Search_Lucene_Search_QueryLexer */
+require_once $CFG->dirroot.'/search/Zend/Search/Lucene/Search/QueryLexer.php';
+
+/** Zend_Search_Lucene_Search_QueryParserContext */
+require_once $CFG->dirroot.'/search/Zend/Search/Lucene/Search/QueryParserContext.php';
+
+/** Zend_Search_Lucene_FSM */
+require_once $CFG->dirroot.'/search/Zend/Search/Lucene/FSM.php';
/** Zend_Search_Lucene_Exception */
-require_once 'Zend/Search/Lucene/Exception.php';
+require_once $CFG->dirroot.'/search/Zend/Search/Lucene/Exception.php';
+
+/** Zend_Search_Lucene_Search_QueryParserException */
+require_once $CFG->dirroot.'/search/Zend/Search/Lucene/Search/QueryParserException.php';
/**
* @category Zend
* @package Zend_Search_Lucene
* @subpackage Search
- * @copyright Copyright (c) 2006 Zend Technologies USA Inc. (http://www.zend.com)
+ * @copyright Copyright (c) 2005-2007 Zend Technologies USA Inc. (http://www.zend.com)
* @license http://framework.zend.com/license/new-bsd New BSD License
*/
-class Zend_Search_Lucene_Search_QueryParser
+class Zend_Search_Lucene_Search_QueryParser extends Zend_Search_Lucene_FSM
{
+ /**
+ * Parser instance
+ *
+ * @var Zend_Search_Lucene_Search_QueryParser
+ */
+ private static $_instance = null;
+
/**
- * Parses a query string, returning a Zend_Search_Lucene_Search_Query
+ * Query lexer
+ *
+ * @var Zend_Search_Lucene_Search_QueryLexer
+ */
+ private $_lexer;
+
+ /**
+ * Tokens list
+ * Array of Zend_Search_Lucene_Search_QueryToken objects
+ *
+ * @var array
+ */
+ private $_tokens;
+
+ /**
+ * Current token
+ *
+ * @var integer|string
+ */
+ private $_currentToken;
+
+ /**
+ * Last token
+ *
+ * It can be processed within FSM states, but this addirional state simplifies FSM
+ *
+ * @var Zend_Search_Lucene_Search_QueryToken
+ */
+ private $_lastToken = null;
+
+ /**
+ * Range query first term
+ *
+ * @var string
+ */
+ private $_rqFirstTerm = null;
+
+ /**
+ * Current query parser context
+ *
+ * @var Zend_Search_Lucene_Search_QueryParserContext
+ */
+ private $_context;
+
+ /**
+ * Context stack
+ *
+ * @var array
+ */
+ private $_contextStack;
+
+ /**
+ * Query string encoding
+ *
+ * @var string
+ */
+ private $_encoding;
+
+ /**
+ * Query string default encoding
+ *
+ * @var string
+ */
+ private $_defaultEncoding = '';
+
+
+ /**
+ * Boolean operators constants
+ */
+ const B_OR = 0;
+ const B_AND = 1;
+
+ /**
+ * Default boolean queries operator
+ *
+ * @var integer
+ */
+ private $_defaultOperator = self::B_OR;
+
+
+ /** Query parser State Machine states */
+ const ST_COMMON_QUERY_ELEMENT = 0; // Terms, phrases, operators
+ const ST_CLOSEDINT_RQ_START = 1; // Range query start (closed interval) - '['
+ const ST_CLOSEDINT_RQ_FIRST_TERM = 2; // First term in '[term1 to term2]' construction
+ const ST_CLOSEDINT_RQ_TO_TERM = 3; // 'TO' lexeme in '[term1 to term2]' construction
+ const ST_CLOSEDINT_RQ_LAST_TERM = 4; // Second term in '[term1 to term2]' construction
+ const ST_CLOSEDINT_RQ_END = 5; // Range query end (closed interval) - ']'
+ const ST_OPENEDINT_RQ_START = 6; // Range query start (opened interval) - '{'
+ const ST_OPENEDINT_RQ_FIRST_TERM = 7; // First term in '{term1 to term2}' construction
+ const ST_OPENEDINT_RQ_TO_TERM = 8; // 'TO' lexeme in '{term1 to term2}' construction
+ const ST_OPENEDINT_RQ_LAST_TERM = 9; // Second term in '{term1 to term2}' construction
+ const ST_OPENEDINT_RQ_END = 10; // Range query end (opened interval) - '}'
+
+ /**
+ * Parser constructor
+ */
+ public function __construct()
+ {
+ parent::__construct(array(self::ST_COMMON_QUERY_ELEMENT,
+ self::ST_CLOSEDINT_RQ_START,
+ self::ST_CLOSEDINT_RQ_FIRST_TERM,
+ self::ST_CLOSEDINT_RQ_TO_TERM,
+ self::ST_CLOSEDINT_RQ_LAST_TERM,
+ self::ST_CLOSEDINT_RQ_END,
+ self::ST_OPENEDINT_RQ_START,
+ self::ST_OPENEDINT_RQ_FIRST_TERM,
+ self::ST_OPENEDINT_RQ_TO_TERM,
+ self::ST_OPENEDINT_RQ_LAST_TERM,
+ self::ST_OPENEDINT_RQ_END
+ ),
+ Zend_Search_Lucene_Search_QueryToken::getTypes());
+
+ $this->addRules(
+ array(array(self::ST_COMMON_QUERY_ELEMENT, Zend_Search_Lucene_Search_QueryToken::TT_WORD, self::ST_COMMON_QUERY_ELEMENT),
+ array(self::ST_COMMON_QUERY_ELEMENT, Zend_Search_Lucene_Search_QueryToken::TT_PHRASE, self::ST_COMMON_QUERY_ELEMENT),
+ array(self::ST_COMMON_QUERY_ELEMENT, Zend_Search_Lucene_Search_QueryToken::TT_FIELD, self::ST_COMMON_QUERY_ELEMENT),
+ array(self::ST_COMMON_QUERY_ELEMENT, Zend_Search_Lucene_Search_QueryToken::TT_REQUIRED, self::ST_COMMON_QUERY_ELEMENT),
+ array(self::ST_COMMON_QUERY_ELEMENT, Zend_Search_Lucene_Search_QueryToken::TT_PROHIBITED, self::ST_COMMON_QUERY_ELEMENT),
+ array(self::ST_COMMON_QUERY_ELEMENT, Zend_Search_Lucene_Search_QueryToken::TT_FUZZY_PROX_MARK, self::ST_COMMON_QUERY_ELEMENT),
+ array(self::ST_COMMON_QUERY_ELEMENT, Zend_Search_Lucene_Search_QueryToken::TT_BOOSTING_MARK, self::ST_COMMON_QUERY_ELEMENT),
+ array(self::ST_COMMON_QUERY_ELEMENT, Zend_Search_Lucene_Search_QueryToken::TT_RANGE_INCL_START, self::ST_CLOSEDINT_RQ_START),
+ array(self::ST_COMMON_QUERY_ELEMENT, Zend_Search_Lucene_Search_QueryToken::TT_RANGE_EXCL_START, self::ST_OPENEDINT_RQ_START),
+ array(self::ST_COMMON_QUERY_ELEMENT, Zend_Search_Lucene_Search_QueryToken::TT_SUBQUERY_START, self::ST_COMMON_QUERY_ELEMENT),
+ array(self::ST_COMMON_QUERY_ELEMENT, Zend_Search_Lucene_Search_QueryToken::TT_SUBQUERY_END, self::ST_COMMON_QUERY_ELEMENT),
+ array(self::ST_COMMON_QUERY_ELEMENT, Zend_Search_Lucene_Search_QueryToken::TT_AND_LEXEME, self::ST_COMMON_QUERY_ELEMENT),
+ array(self::ST_COMMON_QUERY_ELEMENT, Zend_Search_Lucene_Search_QueryToken::TT_OR_LEXEME, self::ST_COMMON_QUERY_ELEMENT),
+ array(self::ST_COMMON_QUERY_ELEMENT, Zend_Search_Lucene_Search_QueryToken::TT_NOT_LEXEME, self::ST_COMMON_QUERY_ELEMENT),
+ array(self::ST_COMMON_QUERY_ELEMENT, Zend_Search_Lucene_Search_QueryToken::TT_NUMBER, self::ST_COMMON_QUERY_ELEMENT)
+ ));
+ $this->addRules(
+ array(array(self::ST_CLOSEDINT_RQ_START, Zend_Search_Lucene_Search_QueryToken::TT_WORD, self::ST_CLOSEDINT_RQ_FIRST_TERM),
+ array(self::ST_CLOSEDINT_RQ_FIRST_TERM, Zend_Search_Lucene_Search_QueryToken::TT_TO_LEXEME, self::ST_CLOSEDINT_RQ_TO_TERM),
+ array(self::ST_CLOSEDINT_RQ_TO_TERM, Zend_Search_Lucene_Search_QueryToken::TT_WORD, self::ST_CLOSEDINT_RQ_LAST_TERM),
+ array(self::ST_CLOSEDINT_RQ_LAST_TERM, Zend_Search_Lucene_Search_QueryToken::TT_RANGE_INCL_END, self::ST_COMMON_QUERY_ELEMENT)
+ ));
+ $this->addRules(
+ array(array(self::ST_OPENEDINT_RQ_START, Zend_Search_Lucene_Search_QueryToken::TT_WORD, self::ST_OPENEDINT_RQ_FIRST_TERM),
+ array(self::ST_OPENEDINT_RQ_FIRST_TERM, Zend_Search_Lucene_Search_QueryToken::TT_TO_LEXEME, self::ST_OPENEDINT_RQ_TO_TERM),
+ array(self::ST_OPENEDINT_RQ_TO_TERM, Zend_Search_Lucene_Search_QueryToken::TT_WORD, self::ST_OPENEDINT_RQ_LAST_TERM),
+ array(self::ST_OPENEDINT_RQ_LAST_TERM, Zend_Search_Lucene_Search_QueryToken::TT_RANGE_EXCL_END, self::ST_COMMON_QUERY_ELEMENT)
+ ));
+
+
+
+ $addTermEntryAction = new Zend_Search_Lucene_FSMAction($this, 'addTermEntry');
+ $addPhraseEntryAction = new Zend_Search_Lucene_FSMAction($this, 'addPhraseEntry');
+ $setFieldAction = new Zend_Search_Lucene_FSMAction($this, 'setField');
+ $setSignAction = new Zend_Search_Lucene_FSMAction($this, 'setSign');
+ $setFuzzyProxAction = new Zend_Search_Lucene_FSMAction($this, 'processFuzzyProximityModifier');
+ $processModifierParameterAction = new Zend_Search_Lucene_FSMAction($this, 'processModifierParameter');
+ $subqueryStartAction = new Zend_Search_Lucene_FSMAction($this, 'subqueryStart');
+ $subqueryEndAction = new Zend_Search_Lucene_FSMAction($this, 'subqueryEnd');
+ $logicalOperatorAction = new Zend_Search_Lucene_FSMAction($this, 'logicalOperator');
+ $openedRQFirstTermAction = new Zend_Search_Lucene_FSMAction($this, 'openedRQFirstTerm');
+ $openedRQLastTermAction = new Zend_Search_Lucene_FSMAction($this, 'openedRQLastTerm');
+ $closedRQFirstTermAction = new Zend_Search_Lucene_FSMAction($this, 'closedRQFirstTerm');
+ $closedRQLastTermAction = new Zend_Search_Lucene_FSMAction($this, 'closedRQLastTerm');
+
+
+ $this->addInputAction(self::ST_COMMON_QUERY_ELEMENT, Zend_Search_Lucene_Search_QueryToken::TT_WORD, $addTermEntryAction);
+ $this->addInputAction(self::ST_COMMON_QUERY_ELEMENT, Zend_Search_Lucene_Search_QueryToken::TT_PHRASE, $addPhraseEntryAction);
+ $this->addInputAction(self::ST_COMMON_QUERY_ELEMENT, Zend_Search_Lucene_Search_QueryToken::TT_FIELD, $setFieldAction);
+ $this->addInputAction(self::ST_COMMON_QUERY_ELEMENT, Zend_Search_Lucene_Search_QueryToken::TT_REQUIRED, $setSignAction);
+ $this->addInputAction(self::ST_COMMON_QUERY_ELEMENT, Zend_Search_Lucene_Search_QueryToken::TT_PROHIBITED, $setSignAction);
+ $this->addInputAction(self::ST_COMMON_QUERY_ELEMENT, Zend_Search_Lucene_Search_QueryToken::TT_FUZZY_PROX_MARK, $setFuzzyProxAction);
+ $this->addInputAction(self::ST_COMMON_QUERY_ELEMENT, Zend_Search_Lucene_Search_QueryToken::TT_NUMBER, $processModifierParameterAction);
+ $this->addInputAction(self::ST_COMMON_QUERY_ELEMENT, Zend_Search_Lucene_Search_QueryToken::TT_SUBQUERY_START, $subqueryStartAction);
+ $this->addInputAction(self::ST_COMMON_QUERY_ELEMENT, Zend_Search_Lucene_Search_QueryToken::TT_SUBQUERY_END, $subqueryEndAction);
+ $this->addInputAction(self::ST_COMMON_QUERY_ELEMENT, Zend_Search_Lucene_Search_QueryToken::TT_AND_LEXEME, $logicalOperatorAction);
+ $this->addInputAction(self::ST_COMMON_QUERY_ELEMENT, Zend_Search_Lucene_Search_QueryToken::TT_OR_LEXEME, $logicalOperatorAction);
+ $this->addInputAction(self::ST_COMMON_QUERY_ELEMENT, Zend_Search_Lucene_Search_QueryToken::TT_NOT_LEXEME, $logicalOperatorAction);
+
+ $this->addEntryAction(self::ST_OPENEDINT_RQ_FIRST_TERM, $openedRQFirstTermAction);
+ $this->addEntryAction(self::ST_OPENEDINT_RQ_LAST_TERM, $openedRQLastTermAction);
+ $this->addEntryAction(self::ST_CLOSEDINT_RQ_FIRST_TERM, $closedRQFirstTermAction);
+ $this->addEntryAction(self::ST_CLOSEDINT_RQ_LAST_TERM, $closedRQLastTermAction);
+
+
+
+ $this->_lexer = new Zend_Search_Lucene_Search_QueryLexer();
+ }
+
+
+ /**
+ * Set query string default encoding
+ *
+ * @param string $encoding
+ */
+ public static function setDefaultEncoding($encoding)
+ {
+ if (self::$_instance === null) {
+ self::$_instance = new Zend_Search_Lucene_Search_QueryParser();
+ }
+
+ self::$_instance->_defaultEncoding = $encoding;
+ }
+
+ /**
+ * Get query string default encoding
+ *
+ * @return string
+ */
+ public static function getDefaultEncoding()
+ {
+ if (self::$_instance === null) {
+ self::$_instance = new Zend_Search_Lucene_Search_QueryParser();
+ }
+
+ return self::$_instance->_defaultEncoding;
+ }
+
+ /**
+ * Set default boolean operator
+ *
+ * @param integer $operator
+ */
+ public static function setDefaultOperator($operator)
+ {
+ if (self::$_instance === null) {
+ self::$_instance = new Zend_Search_Lucene_Search_QueryParser();
+ }
+
+ self::$_instance->_defaultOperator = $operator;
+ }
+
+ /**
+ * Get default boolean operator
+ *
+ * @return integer
+ */
+ public static function getDefaultOperator()
+ {
+ if (self::$_instance === null) {
+ self::$_instance = new Zend_Search_Lucene_Search_QueryParser();
+ }
+
+ return self::$_instance->_defaultOperator;
+ }
+
+ /**
+ * Parses a query string
*
* @param string $strQuery
+ * @param string $encoding
* @return Zend_Search_Lucene_Search_Query
+ * @throws Zend_Search_Lucene_Search_QueryParserException
*/
- static public function parse($strQuery)
+ public static function parse($strQuery, $encoding = null)
{
- $tokens = new Zend_Search_Lucene_Search_QueryTokenizer($strQuery);
+ if (self::$_instance === null) {
+ self::$_instance = new Zend_Search_Lucene_Search_QueryParser();
+ }
+
+ self::$_instance->_encoding = ($encoding !== null) ? $encoding : self::$_instance->_defaultEncoding;
+ self::$_instance->_lastToken = null;
+ self::$_instance->_context = new Zend_Search_Lucene_Search_QueryParserContext(self::$_instance->_encoding);
+ self::$_instance->_contextStack = array();
+ self::$_instance->_tokens = self::$_instance->_lexer->tokenize($strQuery, self::$_instance->_encoding);
// Empty query
- if (!$tokens->count()) {
- throw new Zend_Search_Lucene_Exception('Syntax error: query string cannot be empty.');
+ if (count(self::$_instance->_tokens) == 0) {
+ return new Zend_Search_Lucene_Search_Query_Empty();
}
- // Term query
- if ($tokens->count() == 1) {
- if ($tokens->current()->type == Zend_Search_Lucene_Search_QueryToken::TOKTYPE_WORD) {
- return new Zend_Search_Lucene_Search_Query_Term(new Zend_Search_Lucene_Index_Term($tokens->current()->text, 'contents'));
- } else {
- throw new Zend_Search_Lucene_Exception('Syntax error: query string must contain at least one word.');
- }
- }
+ foreach (self::$_instance->_tokens as $token) {
+ try {
+ self::$_instance->_currentToken = $token;
+ self::$_instance->process($token->type);
+
+ self::$_instance->_lastToken = $token;
+ } catch (Exception $e) {
+ if (strpos($e->getMessage(), 'There is no any rule for') !== false) {
+ throw new Zend_Search_Lucene_Search_QueryParserException( 'Syntax error at char position ' . $token->position . '.' );
+ }
- /**
- * MultiTerm Query
- *
- * Process each token that was returned by the tokenizer.
- */
- $terms = array();
- $signs = array();
- $prevToken = null;
- $openBrackets = 0;
- $field = 'contents';
- foreach ($tokens as $token) {
- switch ($token->type) {
- case Zend_Search_Lucene_Search_QueryToken::TOKTYPE_WORD:
- $terms[] = new Zend_Search_Lucene_Index_Term($token->text, $field);
- $field = 'contents';
- if ($prevToken !== null &&
- $prevToken->type == Zend_Search_Lucene_Search_QueryToken::TOKTYPE_SIGN) {
- if ($prevToken->text == "+") {
- $signs[] = true;
- } else {
- $signs[] = false;
- }
- } else {
- $signs[] = null;
- }
- break;
- case Zend_Search_Lucene_Search_QueryToken::TOKTYPE_SIGN:
- if ($prevToken !== null &&
- $prevToken->type == Zend_Search_Lucene_Search_QueryToken::TOKTYPE_SIGN) {
- throw new Zend_Search_Lucene_Exception('Syntax error: sign operator must be followed by a word.');
- }
- break;
- case Zend_Search_Lucene_Search_QueryToken::TOKTYPE_FIELD:
- $field = $token->text;
- // let previous token to be signed as next $prevToken
- $token = $prevToken;
- break;
- case Zend_Search_Lucene_Search_QueryToken::TOKTYPE_BRACKET:
- $token->text=='(' ? $openBrackets++ : $openBrackets--;
+ throw $e;
}
- $prevToken = $token;
}
- // Finish up parsing: check the last token in the query for an opening sign or parenthesis.
- if ($prevToken->type == Zend_Search_Lucene_Search_QueryToken::TOKTYPE_SIGN) {
- throw new Zend_Search_Lucene_Exception('Syntax Error: sign operator must be followed by a word.');
+ if (count(self::$_instance->_contextStack) != 0) {
+ throw new Zend_Search_Lucene_Search_QueryParserException('Syntax Error: mismatched parentheses, every opening must have closing.' );
}
- // Finish up parsing: check that every opening bracket has a matching closing bracket.
- if ($openBrackets != 0) {
- throw new Zend_Search_Lucene_Exception('Syntax Error: mismatched parentheses, every opening must have closing.');
+ return self::$_instance->_context->getQuery();
+ }
+
+
+ /*********************************************************************
+ * Actions implementation
+ *
+ * Actions affect on recognized lexemes list
+ *********************************************************************/
+
+ /**
+ * Add term to a query
+ */
+ public function addTermEntry()
+ {
+ $entry = new Zend_Search_Lucene_Search_QueryEntry_Term($this->_currentToken->text, $this->_context->getField());
+ $this->_context->addEntry($entry);
+ }
+
+ /**
+ * Add phrase to a query
+ */
+ public function addPhraseEntry()
+ {
+ $entry = new Zend_Search_Lucene_Search_QueryEntry_Phrase($this->_currentToken->text, $this->_context->getField());
+ $this->_context->addEntry($entry);
+ }
+
+ /**
+ * Set entry field
+ */
+ public function setField()
+ {
+ $this->_context->setNextEntryField($this->_currentToken->text);
+ }
+
+ /**
+ * Set entry sign
+ */
+ public function setSign()
+ {
+ $this->_context->setNextEntrySign($this->_currentToken->type);
+ }
+
+
+ /**
+ * Process fuzzy search/proximity modifier - '~'
+ */
+ public function processFuzzyProximityModifier()
+ {
+ $this->_context->processFuzzyProximityModifier();
+ }
+
+ /**
+ * Process modifier parameter
+ *
+ * @throws Zend_Search_Lucene_Exception
+ */
+ public function processModifierParameter()
+ {
+ if ($this->_lastToken === null) {
+ throw new Zend_Search_Lucene_Search_QueryParserException('Lexeme modifier parameter must follow lexeme modifier. Char position 0.' );
}
- switch (count($terms)) {
- case 0:
- throw new Zend_Search_Lucene_Exception('Syntax error: bad term count.');
- case 1:
- return new Zend_Search_Lucene_Search_Query_Term($terms[0],$signs[0] !== false);
+ switch ($this->_lastToken->type) {
+ case Zend_Search_Lucene_Search_QueryToken::TT_FUZZY_PROX_MARK:
+ $this->_context->processFuzzyProximityModifier($this->_currentToken->text);
+ break;
+
+ case Zend_Search_Lucene_Search_QueryToken::TT_BOOSTING_MARK:
+ $this->_context->boost($this->_currentToken->text);
+ break;
+
default:
- return new Zend_Search_Lucene_Search_Query_MultiTerm($terms,$signs);
+ // It's not a user input exception
+ throw new Zend_Search_Lucene_Exception('Lexeme modifier parameter must follow lexeme modifier. Char position .' );
+ }
+ }
+
+
+ /**
+ * Start subquery
+ */
+ public function subqueryStart()
+ {
+ $this->_contextStack[] = $this->_context;
+ $this->_context = new Zend_Search_Lucene_Search_QueryParserContext($this->_encoding, $this->_context->getField());
+ }
+
+ /**
+ * End subquery
+ */
+ public function subqueryEnd()
+ {
+ if (count($this->_contextStack) == 0) {
+ throw new Zend_Search_Lucene_Search_QueryParserException('Syntax Error: mismatched parentheses, every opening must have closing. Char position ' . $this->_currentToken->position . '.' );
}
+
+ $query = $this->_context->getQuery();
+ $this->_context = array_pop($this->_contextStack);
+
+ $this->_context->addEntry(new Zend_Search_Lucene_Search_QueryEntry_Subquery($query));
+ }
+
+ /**
+ * Process logical operator
+ */
+ public function logicalOperator()
+ {
+ $this->_context->addLogicalOperator($this->_currentToken->type);
+ }
+
+ /**
+ * Process first range query term (opened interval)
+ */
+ public function openedRQFirstTerm()
+ {
+ $this->_rqFirstTerm = $this->_currentToken->text;
+ }
+
+ /**
+ * Process last range query term (opened interval)
+ *
+ * @throws Zend_Search_Lucene_Search_QueryParserException
+ */
+ public function openedRQLastTerm()
+ {
+ throw new Zend_Search_Lucene_Search_QueryParserException('Range queries are not supported yet.');
+
+ // $firstTerm = new Zend_Search_Lucene_Index_Term($this->_rqFirstTerm, $this->_context->getField());
+ // $lastTerm = new Zend_Search_Lucene_Index_Term($this->_currentToken->text, $this->_context->getField());
+
+ // $query = new Zend_Search_Lucene_Search_Query_Range($firstTerm, $lastTerm, false);
+ // $this->_context->addentry($query);
}
+ /**
+ * Process first range query term (closed interval)
+ */
+ public function closedRQFirstTerm()
+ {
+ $this->_rqFirstTerm = $this->_currentToken->text;
+ }
+
+ /**
+ * Process last range query term (closed interval)
+ *
+ * @throws Zend_Search_Lucene_Search_QueryParserException
+ */
+ public function closedRQLastTerm()
+ {
+ throw new Zend_Search_Lucene_Search_QueryParserException('Range queries are not supported yet.');
+
+ // $firstTerm = new Zend_Search_Lucene_Index_Term($this->_rqFirstTerm, $this->_context->getField());
+ // $lastTerm = new Zend_Search_Lucene_Index_Term($this->_currentToken->text, $this->_context->getField());
+
+ // $query = new Zend_Search_Lucene_Search_Query_Range($firstTerm, $lastTerm, true);
+ // $this->_context->addentry($query);
+ }
}
--- /dev/null
+<?php
+/**
+ * Zend Framework
+ *
+ * LICENSE
+ *
+ * This source file is subject to the new BSD license that is bundled
+ * with this package in the file LICENSE.txt.
+ * It is also available through the world-wide-web at this URL:
+ * http://framework.zend.com/license/new-bsd
+ * If you did not receive a copy of the license and are unable to
+ * obtain it through the world-wide-web, please send an email
+ * to license@zend.com so we can send you a copy immediately.
+ *
+ * @category Zend
+ * @package Zend_Search_Lucene
+ * @subpackage Search
+ * @copyright Copyright (c) 2005-2007 Zend Technologies USA Inc. (http://www.zend.com)
+ * @license http://framework.zend.com/license/new-bsd New BSD License
+ */
+
+/** Zend_Search_Lucene_FSM */
+require_once $CFG->dirroot.'/search/Zend/Search/Lucene/FSM.php';
+
+
+/** Zend_Search_Lucene_Index_Term */
+require_once $CFG->dirroot.'/search/Zend/Search/Lucene/Index/Term.php';
+
+/** Zend_Search_Lucene_Search_QueryToken */
+require_once $CFG->dirroot.'/search/Zend/Search/Lucene/Search/QueryToken.php';
+
+/** Zend_Search_Lucene_Search_Query_Term */
+require_once $CFG->dirroot.'/search/Zend/Search/Lucene/Search/Query/Term.php';
+
+/** Zend_Search_Lucene_Search_Query_MultiTerm */
+require_once $CFG->dirroot.'/search/Zend/Search/Lucene/Search/Query/MultiTerm.php';
+
+/** Zend_Search_Lucene_Search_Query_Boolean */
+require_once $CFG->dirroot.'/search/Zend/Search/Lucene/Search/Query/Boolean.php';
+
+/** Zend_Search_Lucene_Search_Query_Phrase */
+require_once $CFG->dirroot.'/search/Zend/Search/Lucene/Search/Query/Phrase.php';
+
+/** Zend_Search_Lucene_Exception */
+require_once $CFG->dirroot.'/search/Zend/Search/Lucene/Exception.php';
+
+/** Zend_Search_Lucene_Search_QueryParserException */
+require_once $CFG->dirroot.'/search/Zend/Search/Lucene/Search/QueryParserException.php';
+
+/** Zend_Search_Lucene_Search_BooleanExpressionRecognizer */
+require_once $CFG->dirroot.'/search/Zend/Search/Lucene/Search/BooleanExpressionRecognizer.php';
+
+/** Zend_Search_Lucene_Search_QueryEntry */
+require_once $CFG->dirroot.'/search/Zend/Search/Lucene/Search/QueryEntry.php';
+
+
+/**
+ * @category Zend
+ * @package Zend_Search_Lucene
+ * @subpackage Search
+ * @copyright Copyright (c) 2005-2007 Zend Technologies USA Inc. (http://www.zend.com)
+ * @license http://framework.zend.com/license/new-bsd New BSD License
+ */
+class Zend_Search_Lucene_Search_QueryParserContext
+{
+ /**
+ * Default field for the context.
+ *
+ * null means, that term should be searched through all fields
+ * Zend_Search_Lucene_Search_Query::rewriteQuery($index) transletes such queries to several
+ *
+ * @var string|null
+ */
+ private $_defaultField;
+
+ /**
+ * Field specified for next entry
+ *
+ * @var string
+ */
+ private $_nextEntryField = null;
+
+ /**
+ * True means, that term is required.
+ * False means, that term is prohibited.
+ * null means, that term is neither prohibited, nor required
+ *
+ * @var boolean
+ */
+ private $_nextEntrySign = null;
+
+
+ /**
+ * Entries grouping mode
+ */
+ const GM_SIGNS = 0; // Signs mode: '+term1 term2 -term3 +(subquery1) -(subquery2)'
+ const GM_BOOLEAN = 1; // Boolean operators mode: 'term1 and term2 or (subquery1) and not (subquery2)'
+
+ /**
+ * Grouping mode
+ *
+ * @var integer
+ */
+ private $_mode = null;
+
+ /**
+ * Entries signs.
+ * Used in GM_SIGNS grouping mode
+ *
+ * @var arrays
+ */
+ private $_signs = array();
+
+ /**
+ * Query entries
+ * Each entry is a Zend_Search_Lucene_Search_QueryEntry object or
+ * boolean operator (Zend_Search_Lucene_Search_QueryToken class constant)
+ *
+ * @var array
+ */
+ private $_entries = array();
+
+ /**
+ * Query string encoding
+ *
+ * @var string
+ */
+ private $_encoding;
+
+
+ /**
+ * Context object constructor
+ *
+ * @param string $encoding
+ * @param string|null $defaultField
+ */
+ public function __construct($encoding, $defaultField = null)
+ {
+ $this->_encoding = $encoding;
+ $this->_defaultField = $defaultField;
+ }
+
+
+ /**
+ * Get context default field
+ *
+ * @return string|null
+ */
+ public function getField()
+ {
+ return ($this->_nextEntryField !== null) ? $this->_nextEntryField : $this->_defaultField;
+ }
+
+ /**
+ * Set field for next entry
+ *
+ * @param string $field
+ */
+ public function setNextEntryField($field)
+ {
+ $this->_nextEntryField = $field;
+ }
+
+
+ /**
+ * Set sign for next entry
+ *
+ * @param integer $sign
+ * @throws Zend_Search_Lucene_Exception
+ */
+ public function setNextEntrySign($sign)
+ {
+ if ($this->_mode === self::GM_BOOLEAN) {
+ throw new Zend_Search_Lucene_Search_QueryParserException('It\'s not allowed to mix boolean and signs styles in the same subquery.');
+ }
+
+ $this->_mode = self::GM_SIGNS;
+
+ if ($sign == Zend_Search_Lucene_Search_QueryToken::TT_REQUIRED) {
+ $this->_nextEntrySign = true;
+ } else if ($sign == Zend_Search_Lucene_Search_QueryToken::TT_PROHIBITED) {
+ $this->_nextEntrySign = false;
+ } else {
+ throw new Zend_Search_Lucene_Exception('Unrecognized sign type.');
+ }
+ }
+
+
+ /**
+ * Add entry to a query
+ *
+ * @param Zend_Search_Lucene_Search_QueryEntry $entry
+ */
+ public function addEntry(Zend_Search_Lucene_Search_QueryEntry $entry)
+ {
+ if ($this->_mode !== self::GM_BOOLEAN) {
+ $this->_signs[] = $this->_nextEntrySign;
+ }
+
+ $this->_entries[] = $entry;
+
+ $this->_nextEntryField = null;
+ $this->_nextEntrySign = null;
+ }
+
+
+ /**
+ * Process fuzzy search or proximity search modifier
+ *
+ * @throws Zend_Search_Lucene_Search_QueryParserException
+ */
+ public function processFuzzyProximityModifier($parameter = null)
+ {
+ // Check, that modifier has came just after word or phrase
+ if ($this->_nextEntryField !== null || $this->_nextEntrySign !== null) {
+ throw new Zend_Search_Lucene_Search_QueryParserException('\'~\' modifier must follow word or phrase.');
+ }
+
+ $lastEntry = array_pop($this->_entries);
+
+ if (!$lastEntry instanceof Zend_Search_Lucene_Search_QueryEntry) {
+ // there are no entries or last entry is boolean operator
+ throw new Zend_Search_Lucene_Search_QueryParserException('\'~\' modifier must follow word or phrase.');
+ }
+
+ $lastEntry->processFuzzyProximityModifier($parameter);
+
+ $this->_entries[] = $lastEntry;
+ }
+
+ /**
+ * Set boost factor to the entry
+ *
+ * @param float $boostFactor
+ */
+ public function boost($boostFactor)
+ {
+ // Check, that modifier has came just after word or phrase
+ if ($this->_nextEntryField !== null || $this->_nextEntrySign !== null) {
+ throw new Zend_Search_Lucene_Search_QueryParserException('\'^\' modifier must follow word, phrase or subquery.');
+ }
+
+ $lastEntry = array_pop($this->_entries);
+
+ if (!$lastEntry instanceof Zend_Search_Lucene_Search_QueryEntry) {
+ // there are no entries or last entry is boolean operator
+ throw new Zend_Search_Lucene_Search_QueryParserException('\'^\' modifier must follow word, phrase or subquery.');
+ }
+
+ $lastEntry->boost($boostFactor);
+
+ $this->_entries[] = $lastEntry;
+ }
+
+ /**
+ * Process logical operator
+ *
+ * @param integer $operator
+ */
+ public function addLogicalOperator($operator)
+ {
+ if ($this->_mode === self::GM_SIGNS) {
+ throw new Zend_Search_Lucene_Search_QueryParserException('It\'s not allowed to mix boolean and signs styles in the same subquery.');
+ }
+
+ $this->_mode = self::GM_BOOLEAN;
+
+ $this->_entries[] = $operator;
+ }
+
+
+ /**
+ * Generate 'signs style' query from the context
+ * '+term1 term2 -term3 +(<subquery1>) ...'
+ *
+ * @return Zend_Search_Lucene_Search_Query
+ */
+ public function _signStyleExpressionQuery()
+ {
+ $query = new Zend_Search_Lucene_Search_Query_Boolean();
+
+ if (Zend_Search_Lucene_Search_QueryParser::getDefaultOperator() == Zend_Search_Lucene_Search_QueryParser::B_AND) {
+ $defaultSign = true; // required
+ } else {
+ // Zend_Search_Lucene_Search_QueryParser::B_OR
+ $defaultSign = null; // optional
+ }
+
+ foreach ($this->_entries as $entryId => $entry) {
+ $sign = ($this->_signs[$entryId] !== null) ? $this->_signs[$entryId] : $defaultSign;
+ $query->addSubquery($entry->getQuery($this->_encoding), $sign);
+ }
+
+ return $query;
+ }
+
+
+ /**
+ * Generate 'boolean style' query from the context
+ * 'term1 and term2 or term3 and (<subquery1>) and not (<subquery2>)'
+ *
+ * @return Zend_Search_Lucene_Search_Query
+ * @throws Zend_Search_Lucene
+ */
+ private function _booleanExpressionQuery()
+ {
+ /**
+ * We treat each level of an expression as a boolean expression in
+ * a Disjunctive Normal Form
+ *
+ * AND operator has higher precedence than OR
+ *
+ * Thus logical query is a disjunction of one or more conjunctions of
+ * one or more query entries
+ */
+
+ $expressionRecognizer = new Zend_Search_Lucene_Search_BooleanExpressionRecognizer();
+
+ try {
+ foreach ($this->_entries as $entry) {
+ if ($entry instanceof Zend_Search_Lucene_Search_QueryEntry) {
+ $expressionRecognizer->processLiteral($entry);
+ } else {
+ switch ($entry) {
+ case Zend_Search_Lucene_Search_QueryToken::TT_AND_LEXEME:
+ $expressionRecognizer->processOperator(Zend_Search_Lucene_Search_BooleanExpressionRecognizer::IN_AND_OPERATOR);
+ break;
+
+ case Zend_Search_Lucene_Search_QueryToken::TT_OR_LEXEME:
+ $expressionRecognizer->processOperator(Zend_Search_Lucene_Search_BooleanExpressionRecognizer::IN_OR_OPERATOR);
+ break;
+
+ case Zend_Search_Lucene_Search_QueryToken::TT_NOT_LEXEME:
+ $expressionRecognizer->processOperator(Zend_Search_Lucene_Search_BooleanExpressionRecognizer::IN_NOT_OPERATOR);
+ break;
+
+ default:
+ throw new Zend_Search_Lucene('Boolean expression error. Unknown operator type.');
+ }
+ }
+ }
+
+ $conjuctions = $expressionRecognizer->finishExpression();
+ } catch (Zend_Search_Exception $e) {
+ // throw new Zend_Search_Lucene_Search_QueryParserException('Boolean expression error. Error message: \'' .
+ // $e->getMessage() . '\'.' );
+ // It's query syntax error message and it should be user friendly. So FSM message is omitted
+ throw new Zend_Search_Lucene_Search_QueryParserException('Boolean expression error.');
+ }
+
+ // Remove 'only negative' conjunctions
+ foreach ($conjuctions as $conjuctionId => $conjuction) {
+ $nonNegativeEntryFound = false;
+
+ foreach ($conjuction as $conjuctionEntry) {
+ if ($conjuctionEntry[1]) {
+ $nonNegativeEntryFound = true;
+ break;
+ }
+ }
+
+ if (!$nonNegativeEntryFound) {
+ unset($conjuctions[$conjuctionId]);
+ }
+ }
+
+
+ $subqueries = array();
+ foreach ($conjuctions as $conjuction) {
+ // Check, if it's a one term conjuction
+ if (count($conjuction) == 1) {
+ $subqueries[] = $conjuction[0][0]->getQuery($this->_encoding);
+ } else {
+ $subquery = new Zend_Search_Lucene_Search_Query_Boolean();
+
+ foreach ($conjuction as $conjuctionEntry) {
+ $subquery->addSubquery($conjuctionEntry[0]->getQuery($this->_encoding), $conjuctionEntry[1]);
+ }
+
+ $subqueries[] = $subquery;
+ }
+ }
+
+ if (count($subqueries) == 0) {
+ return new Zend_Search_Lucene_Search_Query_Empty();
+ }
+
+ if (count($subqueries) == 1) {
+ return $subqueries[0];
+ }
+
+
+ $query = new Zend_Search_Lucene_Search_Query_Boolean();
+
+ foreach ($subqueries as $subquery) {
+ // Non-requirered entry/subquery
+ $query->addSubquery($subquery);
+ }
+
+ return $query;
+ }
+
+ /**
+ * Generate query from current context
+ *
+ * @return Zend_Search_Lucene_Search_Query
+ */
+ public function getQuery()
+ {
+ if ($this->_mode === self::GM_BOOLEAN) {
+ return $this->_booleanExpressionQuery();
+ } else {
+ return $this->_signStyleExpressionQuery();
+ }
+ }
+}
--- /dev/null
+<?php
+/**
+ * Zend Framework
+ *
+ * LICENSE
+ *
+ * This source file is subject to the new BSD license that is bundled
+ * with this package in the file LICENSE.txt.
+ * It is also available through the world-wide-web at this URL:
+ * http://framework.zend.com/license/new-bsd
+ * If you did not receive a copy of the license and are unable to
+ * obtain it through the world-wide-web, please send an email
+ * to license@zend.com so we can send you a copy immediately.
+ *
+ * @category Zend
+ * @package Zend_Search_Lucene
+ * @subpackage Search
+ * @copyright Copyright (c) 2005-2007 Zend Technologies USA Inc. (http://www.zend.com)
+ * @license http://framework.zend.com/license/new-bsd New BSD License
+ */
+
+
+/**
+ * Zend_Search_Lucene base exception
+ */
+require_once $CFG->dirroot.'/search/Zend/Search/Lucene/Exception.php';
+
+
+/**
+ * @category Zend
+ * @package Zend_Search_Lucene
+ * @subpackage Search
+ * @copyright Copyright (c) 2005-2007 Zend Technologies USA Inc. (http://www.zend.com)
+ * @license http://framework.zend.com/license/new-bsd New BSD License
+ *
+ * Special exception type, which may be used to intercept wrong user input
+ */
+class Zend_Search_Lucene_Search_QueryParserException extends Zend_Search_Lucene_Exception
+{}
+
* @category Zend
* @package Zend_Search_Lucene
* @subpackage Search
- * @copyright Copyright (c) 2006 Zend Technologies USA Inc. (http://www.zend.com)
+ * @copyright Copyright (c) 2005-2007 Zend Technologies USA Inc. (http://www.zend.com)
* @license http://framework.zend.com/license/new-bsd New BSD License
*/
/** Zend_Search_Lucene_Exception */
-require_once 'Zend/Search/Lucene/Exception.php';
+require_once $CFG->dirroot.'/search/Zend/Search/Lucene/Exception.php';
/**
* @category Zend
* @package Zend_Search_Lucene
* @subpackage Search
- * @copyright Copyright (c) 2006 Zend Technologies USA Inc. (http://www.zend.com)
+ * @copyright Copyright (c) 2005-2007 Zend Technologies USA Inc. (http://www.zend.com)
* @license http://framework.zend.com/license/new-bsd New BSD License
*/
class Zend_Search_Lucene_Search_QueryToken
{
/**
- * Token type Word.
+ * Token types.
*/
- const TOKTYPE_WORD = 0;
+ const TT_WORD = 0; // Word
+ const TT_PHRASE = 1; // Phrase (one or several quoted words)
+ const TT_FIELD = 2; // Field name in 'field:word', field:<phrase> or field:(<subquery>) pairs
+ const TT_FIELD_INDICATOR = 3; // ':'
+ const TT_REQUIRED = 4; // '+'
+ const TT_PROHIBITED = 5; // '-'
+ const TT_FUZZY_PROX_MARK = 6; // '~'
+ const TT_BOOSTING_MARK = 7; // '^'
+ const TT_RANGE_INCL_START = 8; // '['
+ const TT_RANGE_INCL_END = 9; // ']'
+ const TT_RANGE_EXCL_START = 10; // '{'
+ const TT_RANGE_EXCL_END = 11; // '}'
+ const TT_SUBQUERY_START = 12; // '('
+ const TT_SUBQUERY_END = 13; // ')'
+ const TT_AND_LEXEME = 14; // 'AND' or 'and'
+ const TT_OR_LEXEME = 15; // 'OR' or 'or'
+ const TT_NOT_LEXEME = 16; // 'NOT' or 'not'
+ const TT_TO_LEXEME = 17; // 'TO' or 'to'
+ const TT_NUMBER = 18; // Number, like: 10, 0.8, .64, ....
- /**
- * Token type Field.
- * Field indicator in 'field:word' pair
- */
- const TOKTYPE_FIELD = 1;
/**
- * Token type Sign.
- * '+' (required) or '-' (absentee) sign
+ * Returns all possible lexeme types.
+ * It's used for syntax analyzer state machine initialization
+ *
+ * @return array
*/
- const TOKTYPE_SIGN = 2;
+ public static function getTypes()
+ {
+ return array( self::TT_WORD,
+ self::TT_PHRASE,
+ self::TT_FIELD,
+ self::TT_FIELD_INDICATOR,
+ self::TT_REQUIRED,
+ self::TT_PROHIBITED,
+ self::TT_FUZZY_PROX_MARK,
+ self::TT_BOOSTING_MARK,
+ self::TT_RANGE_INCL_START,
+ self::TT_RANGE_INCL_END,
+ self::TT_RANGE_EXCL_START,
+ self::TT_RANGE_EXCL_END,
+ self::TT_SUBQUERY_START,
+ self::TT_SUBQUERY_END,
+ self::TT_AND_LEXEME,
+ self::TT_OR_LEXEME,
+ self::TT_NOT_LEXEME,
+ self::TT_TO_LEXEME,
+ self::TT_NUMBER
+ );
+ }
+
/**
- * Token type Bracket.
- * '(' or ')'
+ * TokenCategories
*/
- const TOKTYPE_BRACKET = 3;
+ const TC_WORD = 0; // Word
+ const TC_PHRASE = 1; // Phrase (one or several quoted words)
+ const TC_NUMBER = 2; // Nubers, which are used with syntax elements. Ex. roam~0.8
+ const TC_SYNTAX_ELEMENT = 3; // + - ( ) [ ] { } ! || && ~ ^
/**
*/
public $text;
+ /**
+ * Token position within query.
+ *
+ * @var integer
+ */
+ public $position;
+
/**
* IndexReader constructor needs token type and token text as a parameters.
*
- * @param $tokType integer
- * @param $tokText string
+ * @param integer $tokenCategory
+ * @param string $tokText
+ * @param integer $position
*/
- public function __construct($tokType, $tokText)
+ public function __construct($tokenCategory, $tokenText, $position)
{
- switch ($tokType) {
- case self::TOKTYPE_BRACKET:
- // fall through to the next case
- case self::TOKTYPE_FIELD:
- // fall through to the next case
- case self::TOKTYPE_SIGN:
- // fall through to the next case
- case self::TOKTYPE_WORD:
+ $this->text = $tokenText;
+ $this->position = $position + 1; // Start from 1
+
+ switch ($tokenCategory) {
+ case self::TC_WORD:
+ if ( strtolower($tokenText) == 'and') {
+ $this->type = self::TT_AND_LEXEME;
+ } else if (strtolower($tokenText) == 'or') {
+ $this->type = self::TT_OR_LEXEME;
+ } else if (strtolower($tokenText) == 'not') {
+ $this->type = self::TT_NOT_LEXEME;
+ } else if (strtolower($tokenText) == 'to') {
+ $this->type = self::TT_TO_LEXEME;
+ } else {
+ $this->type = self::TT_WORD;
+ }
break;
- default:
- throw new Zend_Search_Lucene_Exception("Unrecognized token type \"$tokType\".");
- }
- if (!strlen($tokText)) {
- throw new Zend_Search_Lucene_Exception('Token text must be supplied.');
- }
+ case self::TC_PHRASE:
+ $this->type = self::TT_PHRASE;
+ break;
+
+ case self::TC_NUMBER:
+ $this->type = self::TT_NUMBER;
+ break;
+
+ case self::TC_SYNTAX_ELEMENT:
+ switch ($tokenText) {
+ case ':':
+ $this->type = self::TT_FIELD_INDICATOR;
+ break;
- $this->type = $tokType;
- $this->text = $tokText;
+ case '+':
+ $this->type = self::TT_REQUIRED;
+ break;
+
+ case '-':
+ $this->type = self::TT_PROHIBITED;
+ break;
+
+ case '~':
+ $this->type = self::TT_FUZZY_PROX_MARK;
+ break;
+
+ case '^':
+ $this->type = self::TT_BOOSTING_MARK;
+ break;
+
+ case '[':
+ $this->type = self::TT_RANGE_INCL_START;
+ break;
+
+ case ']':
+ $this->type = self::TT_RANGE_INCL_END;
+ break;
+
+ case '{':
+ $this->type = self::TT_RANGE_EXCL_START;
+ break;
+
+ case '}':
+ $this->type = self::TT_RANGE_EXCL_END;
+ break;
+
+ case '(':
+ $this->type = self::TT_SUBQUERY_START;
+ break;
+
+ case ')':
+ $this->type = self::TT_SUBQUERY_END;
+ break;
+
+ case '!':
+ $this->type = self::TT_NOT_LEXEME;
+ break;
+
+ case '&&':
+ $this->type = self::TT_AND_LEXEME;
+ break;
+
+ case '||':
+ $this->type = self::TT_OR_LEXEME;
+ break;
+
+ default:
+ throw new Zend_Search_Lucene_Exception('Unrecognized query syntax lexeme: \'' . $tokenText . '\'');
+ }
+ break;
+
+ case self::TC_NUMBER:
+ $this->type = self::TT_NUMBER;
+
+ default:
+ throw new Zend_Search_Lucene_Exception('Unrecognized lexeme type: \'' . $tokenCategory . '\'');
+ }
}
}
/** Zend_Search_Lucene_Search_QueryToken */
-require_once 'Zend/Search/Lucene/Search/QueryToken.php';
+require_once $CFG->dirroot.'/search/Zend/Search/Lucene/Search/QueryToken.php';
/** Zend_Search_Lucene_Exception */
-require_once 'Zend/Search/Lucene/Exception.php';
+require_once $CFG->dirroot.'/search/Zend/Search/Lucene/Exception.php';
/**
* @category Zend
* @package Zend_Search_Lucene
* @subpackage Search
- * @copyright Copyright (c) 2006 Zend Technologies USA Inc. (http://www.zend.com)
+ * @copyright Copyright (c) 2005-2007 Zend Technologies USA Inc. (http://www.zend.com)
* @license http://framework.zend.com/license/new-bsd New BSD License
*/
/** Zend_Search_Lucene_Search_Similarity_Default */
-require_once 'Zend/Search/Lucene/Search/Similarity/Default.php';
+require_once $CFG->dirroot.'/search/Zend/Search/Lucene/Search/Similarity/Default.php';
/**
* @category Zend
* @package Zend_Search_Lucene
* @subpackage Search
- * @copyright Copyright (c) 2006 Zend Technologies USA Inc. (http://www.zend.com)
+ * @copyright Copyright (c) 2005-2007 Zend Technologies USA Inc. (http://www.zend.com)
* @license http://framework.zend.com/license/new-bsd New BSD License
*/
abstract class Zend_Search_Lucene_Search_Similarity
*
* @var Zend_Search_Lucene_Search_Similarity
*/
- static private $_defaultImpl;
+ private static $_defaultImpl;
/**
* Cache of decoded bytes.
*
* @var array
*/
- static private $_normTable = array( 0 => 0.0,
+ private static $_normTable = array( 0 => 0.0,
1 => 5.820766E-10,
2 => 6.9849193E-10,
3 => 8.1490725E-10,
*
* @param Zend_Search_Lucene_Search_Similarity $similarity
*/
- static public function setDefault(Zend_Search_Lucene_Search_Similarity $similarity)
+ public static function setDefault(Zend_Search_Lucene_Search_Similarity $similarity)
{
self::$_defaultImpl = $similarity;
}
*
* @return Zend_Search_Lucene_Search_Similarity
*/
- static public function getDefault()
+ public static function getDefault()
{
if (!self::$_defaultImpl instanceof Zend_Search_Lucene_Search_Similarity) {
self::$_defaultImpl = new Zend_Search_Lucene_Search_Similarity_Default();
* @param integer $byte
* @return float
*/
- static public function decodeNorm($byte)
+ public static function decodeNorm($byte)
{
return self::$_normTable[$byte & 0xFF];
}
* @param integer $b
* @return float
*/
- static private function _floatToByte($f)
+ private static function _floatToByte($f)
{
// round negatives up to zero
if ($f <= 0.0) {
* Returns a score factor for the term
*
* @param mixed $input
- * @param Zend_Search_Lucene $reader
+ * @param Zend_Search_Lucene_Interface $reader
* @return a score factor for the term
*/
- public function idf($input, $reader)
+ public function idf($input, Zend_Search_Lucene_Interface $reader)
{
if (!is_array($input)) {
return $this->idfFreq($reader->docFreq($input), $reader->count());
* @category Zend
* @package Zend_Search_Lucene
* @subpackage Search
- * @copyright Copyright (c) 2006 Zend Technologies USA Inc. (http://www.zend.com)
+ * @copyright Copyright (c) 2005-2007 Zend Technologies USA Inc. (http://www.zend.com)
* @license http://framework.zend.com/license/new-bsd New BSD License
*/
+/** Zend_Search_Lucene_Search_Similarity */
+require_once $CFG->dirroot.'/search/Zend/Search/Lucene/Search/Similarity.php';
+
+
/**
* @category Zend
* @package Zend_Search_Lucene
* @subpackage Search
- * @copyright Copyright (c) 2006 Zend Technologies USA Inc. (http://www.zend.com)
+ * @copyright Copyright (c) 2005-2007 Zend Technologies USA Inc. (http://www.zend.com)
* @license http://framework.zend.com/license/new-bsd New BSD License
*/
class Zend_Search_Lucene_Search_Similarity_Default extends Zend_Search_Lucene_Search_Similarity
* @category Zend
* @package Zend_Search_Lucene
* @subpackage Search
- * @copyright Copyright (c) 2006 Zend Technologies USA Inc. (http://www.zend.com)
+ * @copyright Copyright (c) 2005-2007 Zend Technologies USA Inc. (http://www.zend.com)
* @license http://framework.zend.com/license/new-bsd New BSD License
*/
* @category Zend
* @package Zend_Search_Lucene
* @subpackage Search
- * @copyright Copyright (c) 2006 Zend Technologies USA Inc. (http://www.zend.com)
+ * @copyright Copyright (c) 2005-2007 Zend Technologies USA Inc. (http://www.zend.com)
* @license http://framework.zend.com/license/new-bsd New BSD License
*/
abstract class Zend_Search_Lucene_Search_Weight
{
+ /**
+ * Normalization factor.
+ * This value is stored only for query expanation purpose and not used in any other place
+ *
+ * @var float
+ */
+ protected $_queryNorm;
+
+ /**
+ * Weight value
+ *
+ * Weight value may be initialized in sumOfSquaredWeights() or normalize()
+ * because they both are invoked either in Query::_initWeight (for top-level query) or
+ * in corresponding methods of parent query's weights
+ *
+ * @var float
+ */
+ protected $_value;
+
+
/**
* The weight for this query.
*
* @return float
*/
- abstract public function getValue();
+ public function getValue()
+ {
+ return $this->_value;
+ }
/**
* The sum of squared weights of contained query clauses.
--- /dev/null
+<?php
+/**
+ * Zend Framework
+ *
+ * LICENSE
+ *
+ * This source file is subject to the new BSD license that is bundled
+ * with this package in the file LICENSE.txt.
+ * It is also available through the world-wide-web at this URL:
+ * http://framework.zend.com/license/new-bsd
+ * If you did not receive a copy of the license and are unable to
+ * obtain it through the world-wide-web, please send an email
+ * to license@zend.com so we can send you a copy immediately.
+ *
+ * @category Zend
+ * @package Zend_Search_Lucene
+ * @subpackage Search
+ * @copyright Copyright (c) 2005-2007 Zend Technologies USA Inc. (http://www.zend.com)
+ * @license http://framework.zend.com/license/new-bsd New BSD License
+ */
+
+
+/** Zend_Search_Lucene_Search_Weight */
+require_once $CFG->dirroot.'/search/Zend/Search/Lucene/Search/Weight.php';
+
+
+/**
+ * @category Zend
+ * @package Zend_Search_Lucene
+ * @subpackage Search
+ * @copyright Copyright (c) 2005-2007 Zend Technologies USA Inc. (http://www.zend.com)
+ * @license http://framework.zend.com/license/new-bsd New BSD License
+ */
+class Zend_Search_Lucene_Search_Weight_Boolean extends Zend_Search_Lucene_Search_Weight
+{
+ /**
+ * IndexReader.
+ *
+ * @var Zend_Search_Lucene_Interface
+ */
+ private $_reader;
+
+ /**
+ * The query that this concerns.
+ *
+ * @var Zend_Search_Lucene_Search_Query
+ */
+ private $_query;
+
+ /**
+ * Queries weights
+ * Array of Zend_Search_Lucene_Search_Weight
+ *
+ * @var array
+ */
+ private $_weights;
+
+
+ /**
+ * Zend_Search_Lucene_Search_Weight_Boolean constructor
+ * query - the query that this concerns.
+ * reader - index reader
+ *
+ * @param Zend_Search_Lucene_Search_Query $query
+ * @param Zend_Search_Lucene_Interface $reader
+ */
+ public function __construct(Zend_Search_Lucene_Search_Query $query,
+ Zend_Search_Lucene_Interface $reader)
+ {
+ $this->_query = $query;
+ $this->_reader = $reader;
+ $this->_weights = array();
+
+ $signs = $query->getSigns();
+
+ foreach ($query->getSubqueries() as $num => $subquery) {
+ if ($signs === null || $signs[$num] === null || $signs[$num]) {
+ $this->_weights[$num] = $subquery->createWeight($reader);
+ }
+ }
+ }
+
+
+ /**
+ * The weight for this query
+ * Standard Weight::$_value is not used for boolean queries
+ *
+ * @return float
+ */
+ public function getValue()
+ {
+ return $this->_query->getBoost();
+ }
+
+
+ /**
+ * The sum of squared weights of contained query clauses.
+ *
+ * @return float
+ */
+ public function sumOfSquaredWeights()
+ {
+ $sum = 0;
+ foreach ($this->_weights as $weight) {
+ // sum sub weights
+ $sum += $weight->sumOfSquaredWeights();
+ }
+
+ // boost each sub-weight
+ $sum *= $this->_query->getBoost() * $this->_query->getBoost();
+
+ // check for empty query (like '-something -another')
+ if ($sum == 0) {
+ $sum = 1.0;
+ }
+ return $sum;
+ }
+
+
+ /**
+ * Assigns the query normalization factor to this.
+ *
+ * @param float $queryNorm
+ */
+ public function normalize($queryNorm)
+ {
+ // incorporate boost
+ $queryNorm *= $this->_query->getBoost();
+
+ foreach ($this->_weights as $weight) {
+ $weight->normalize($queryNorm);
+ }
+ }
+}
+
+
--- /dev/null
+<?php
+/**
+ * Zend Framework
+ *
+ * LICENSE
+ *
+ * This source file is subject to the new BSD license that is bundled
+ * with this package in the file LICENSE.txt.
+ * It is also available through the world-wide-web at this URL:
+ * http://framework.zend.com/license/new-bsd
+ * If you did not receive a copy of the license and are unable to
+ * obtain it through the world-wide-web, please send an email
+ * to license@zend.com so we can send you a copy immediately.
+ *
+ * @category Zend
+ * @package Zend_Search_Lucene
+ * @subpackage Search
+ * @copyright Copyright (c) 2005-2007 Zend Technologies USA Inc. (http://www.zend.com)
+ * @license http://framework.zend.com/license/new-bsd New BSD License
+ */
+
+
+/** Zend_Search_Lucene_Search_Weight */
+require_once $CFG->dirroot.'/search/Zend/Search/Lucene/Search/Weight.php';
+
+
+/**
+ * @category Zend
+ * @package Zend_Search_Lucene
+ * @subpackage Search
+ * @copyright Copyright (c) 2005-2007 Zend Technologies USA Inc. (http://www.zend.com)
+ * @license http://framework.zend.com/license/new-bsd New BSD License
+ */
+class Zend_Search_Lucene_Search_Weight_Empty extends Zend_Search_Lucene_Search_Weight
+{
+ /**
+ * The sum of squared weights of contained query clauses.
+ *
+ * @return float
+ */
+ public function sumOfSquaredWeights()
+ {
+ return 1;
+ }
+
+
+ /**
+ * Assigns the query normalization factor to this.
+ *
+ * @param float $queryNorm
+ */
+ public function normalize($queryNorm)
+ {
+ }
+}
+
* @category Zend
* @package Zend_Search_Lucene
* @subpackage Search
- * @copyright Copyright (c) 2006 Zend Technologies USA Inc. (http://www.zend.com)
+ * @copyright Copyright (c) 2005-2007 Zend Technologies USA Inc. (http://www.zend.com)
* @license http://framework.zend.com/license/new-bsd New BSD License
*/
/** Zend_Search_Lucene_Search_Weight */
-require_once 'Zend/Search/Lucene/Search/Weight.php';
+require_once $CFG->dirroot.'/search/Zend/Search/Lucene/Search/Weight.php';
/**
* @category Zend
* @package Zend_Search_Lucene
* @subpackage Search
- * @copyright Copyright (c) 2006 Zend Technologies USA Inc. (http://www.zend.com)
+ * @copyright Copyright (c) 2005-2007 Zend Technologies USA Inc. (http://www.zend.com)
* @license http://framework.zend.com/license/new-bsd New BSD License
*/
class Zend_Search_Lucene_Search_Weight_MultiTerm extends Zend_Search_Lucene_Search_Weight
/**
* IndexReader.
*
- * @var Zend_Search_Lucene
+ * @var Zend_Search_Lucene_Interface
*/
private $_reader;
/**
* The query that this concerns.
*
- * @var Zend_Search_Lucene_Search_Query_MultiTerm
+ * @var Zend_Search_Lucene_Search_Query
*/
private $_query;
* query - the query that this concerns.
* reader - index reader
*
- * @param Zend_Search_Lucene_Search_Query_MultiTerm $query
- * @param Zend_Search_Lucene $reader
+ * @param Zend_Search_Lucene_Search_Query $query
+ * @param Zend_Search_Lucene_Interface $reader
*/
- public function __construct($query, $reader)
+ public function __construct(Zend_Search_Lucene_Search_Query $query,
+ Zend_Search_Lucene_Interface $reader)
{
$this->_query = $query;
$this->_reader = $reader;
$signs = $query->getSigns();
- foreach ($query->getTerms() as $num => $term) {
- if ($signs === null || $signs[$num] === null || $signs[$num]) {
- $this->_weights[$num] = new Zend_Search_Lucene_Search_Weight_Term($term, $query, $reader);
- $query->setWeight($num, $this->_weights[$num]);
+ foreach ($query->getTerms() as $id => $term) {
+ if ($signs === null || $signs[$id] === null || $signs[$id]) {
+ $this->_weights[$id] = new Zend_Search_Lucene_Search_Weight_Term($term, $query, $reader);
+ $query->setWeight($id, $this->_weights[$id]);
}
}
}
/**
* The weight for this query
+ * Standard Weight::$_value is not used for boolean queries
*
* @return float
*/
* @category Zend
* @package Zend_Search_Lucene
* @subpackage Search
- * @copyright Copyright (c) 2006 Zend Technologies USA Inc. (http://www.zend.com)
+ * @copyright Copyright (c) 2005-2007 Zend Technologies USA Inc. (http://www.zend.com)
* @license http://framework.zend.com/license/new-bsd New BSD License
*/
/**
* Zend_Search_Lucene_Search_Weight
*/
-require_once 'Zend/Search/Lucene/Search/Weight.php';
+require_once $CFG->dirroot.'/search/Zend/Search/Lucene/Search/Weight.php';
/**
* @category Zend
* @package Zend_Search_Lucene
* @subpackage Search
- * @copyright Copyright (c) 2006 Zend Technologies USA Inc. (http://www.zend.com)
+ * @copyright Copyright (c) 2005-2007 Zend Technologies USA Inc. (http://www.zend.com)
* @license http://framework.zend.com/license/new-bsd New BSD License
*/
class Zend_Search_Lucene_Search_Weight_Phrase extends Zend_Search_Lucene_Search_Weight
/**
* IndexReader.
*
- * @var Zend_Search_Lucene
+ * @var Zend_Search_Lucene_Interface
*/
private $_reader;
*/
private $_query;
- /**
- * Weight value
- *
- * @var float
- */
- private $_value;
-
/**
* Score factor
*
*/
private $_idf;
- /**
- * Normalization factor
- *
- * @var float
- */
- private $_queryNorm;
-
-
- /**
- * Query weight
- *
- * @var float
- */
- private $_queryWeight;
-
-
/**
* Zend_Search_Lucene_Search_Weight_Phrase constructor
*
* @param Zend_Search_Lucene_Search_Query_Phrase $query
- * @param Zend_Search_Lucene $reader
+ * @param Zend_Search_Lucene_Interface $reader
*/
- public function __construct(Zend_Search_Lucene_Search_Query_Phrase $query, Zend_Search_Lucene $reader)
+ public function __construct(Zend_Search_Lucene_Search_Query_Phrase $query,
+ Zend_Search_Lucene_Interface $reader)
{
$this->_query = $query;
$this->_reader = $reader;
}
-
- /**
- * The weight for this query
- *
- * @return float
- */
- public function getValue()
- {
- return $this->_value;
- }
-
-
/**
* The sum of squared weights of contained query clauses.
*
* @category Zend
* @package Zend_Search_Lucene
* @subpackage Search
- * @copyright Copyright (c) 2006 Zend Technologies USA Inc. (http://www.zend.com)
+ * @copyright Copyright (c) 2005-2007 Zend Technologies USA Inc. (http://www.zend.com)
* @license http://framework.zend.com/license/new-bsd New BSD License
*/
/** Zend_Search_Lucene_Search_Weight */
-require_once 'Zend/Search/Lucene/Search/Weight.php';
+require_once $CFG->dirroot.'/search/Zend/Search/Lucene/Search/Weight.php';
/**
* @category Zend
* @package Zend_Search_Lucene
* @subpackage Search
- * @copyright Copyright (c) 2006 Zend Technologies USA Inc. (http://www.zend.com)
+ * @copyright Copyright (c) 2005-2007 Zend Technologies USA Inc. (http://www.zend.com)
* @license http://framework.zend.com/license/new-bsd New BSD License
*/
class Zend_Search_Lucene_Search_Weight_Term extends Zend_Search_Lucene_Search_Weight
/**
* IndexReader.
*
- * @var Zend_Search_Lucene
+ * @var Zend_Search_Lucene_Interface
*/
private $_reader;
*/
private $_query;
- /**
- * Weight value
- *
- * @var float
- */
- private $_value;
-
/**
* Score factor
*
*/
private $_idf;
- /**
- * Normalization factor
- *
- * @var float
- */
- private $_queryNorm;
-
-
/**
* Query weight
*
* Zend_Search_Lucene_Search_Weight_Term constructor
* reader - index reader
*
- * @param Zend_Search_Lucene $reader
+ * @param Zend_Search_Lucene_Index_Term $term
+ * @param Zend_Search_Lucene_Search_Query $query
+ * @param Zend_Search_Lucene_Interface $reader
*/
- public function __construct($term, $query, $reader)
+ public function __construct(Zend_Search_Lucene_Index_Term $term,
+ Zend_Search_Lucene_Search_Query $query,
+ Zend_Search_Lucene_Interface $reader)
{
$this->_term = $term;
$this->_query = $query;
}
- /**
- * The weight for this query
- *
- * @return float
- */
- public function getValue()
- {
- return $this->_value;
- }
-
-
/**
* The sum of squared weights of contained query clauses.
*
* @category Zend
* @package Zend_Search_Lucene
* @subpackage Storage
- * @copyright Copyright (c) 2006 Zend Technologies USA Inc. (http://www.zend.com)
+ * @copyright Copyright (c) 2005-2007 Zend Technologies USA Inc. (http://www.zend.com)
* @license http://framework.zend.com/license/new-bsd New BSD License
*/
* @category Zend
* @package Zend_Search_Lucene
* @subpackage Storage
- * @copyright Copyright (c) 2006 Zend Technologies USA Inc. (http://www.zend.com)
+ * @copyright Copyright (c) 2005-2007 Zend Technologies USA Inc. (http://www.zend.com)
* @license http://framework.zend.com/license/new-bsd New BSD License
*/
abstract class Zend_Search_Lucene_Storage_Directory
/**
* Returns a Zend_Search_Lucene_Storage_File object for a given $filename in the directory.
*
+ * If $shareHandler option is true, then file handler can be shared between File Object
+ * requests. It speed-ups performance, but makes problems with file position.
+ * Shared handler are good for short atomic requests.
+ * Non-shared handlers are useful for stream file reading (especial for compound files).
+ *
* @param string $filename
+ * @param boolean $shareHandler
* @return Zend_Search_Lucene_Storage_File
*/
- abstract public function getFileObject($filename);
+ abstract public function getFileObject($filename, $shareHandler = true);
}
* @category Zend
* @package Zend_Search_Lucene
* @subpackage Storage
- * @copyright Copyright (c) 2006 Zend Technologies USA Inc. (http://www.zend.com)
+ * @copyright Copyright (c) 2005-2007 Zend Technologies USA Inc. (http://www.zend.com)
* @license http://framework.zend.com/license/new-bsd New BSD License
*/
/** Zend_Search_Lucene_Storage_Directory */
-require_once 'Zend/Search/Lucene/Storage/Directory.php';
+require_once $CFG->dirroot.'/search/Zend/Search/Lucene/Storage/Directory.php';
/** Zend_Search_Lucene_Storage_File_Filesystem */
-require_once 'Zend/Search/Lucene/Storage/File/Filesystem.php';
+require_once $CFG->dirroot.'/search/Zend/Search/Lucene/Storage/File/Filesystem.php';
/**
* @category Zend
* @package Zend_Search_Lucene
* @subpackage Storage
- * @copyright Copyright (c) 2006 Zend Technologies USA Inc. (http://www.zend.com)
+ * @copyright Copyright (c) 2005-2007 Zend Technologies USA Inc. (http://www.zend.com)
* @license http://framework.zend.com/license/new-bsd New BSD License
*/
class Zend_Search_Lucene_Storage_Directory_Filesystem extends Zend_Search_Lucene_Storage_Directory
* @return boolean
*/
- static public function mkdirs($dir, $mode = 0777, $recursive = true)
+ public static function mkdirs($dir, $mode = 0777, $recursive = true)
{
if (is_null($dir) || $dir === '') {
return false;
$fileObject->close();
}
- unset($this->_fileHandlers);
+ $this->_fileHandlers = array();
}
$result = array();
$dirContent = opendir( $this->_dirPath );
- while ($file = readdir($dirContent)) {
+ while (($file = readdir($dirContent)) !== false) {
if (($file == '..')||($file == '.')) continue;
- $fullName = $this->_dirPath . '/' . $file;
-
if( !is_dir($this->_dirPath . '/' . $file) ) {
$result[] = $file;
}
}
+ closedir($dirContent);
return $result;
}
*/
public function deleteFile($filename)
{
+ /**
+ * @todo add support of "deletable" file
+ * "deletable" is used on Windows systems if file can't be deleted
+ * (while it is still open).
+ */
+
if (isset($this->_fileHandlers[$filename])) {
$this->_fileHandlers[$filename]->close();
}
unset($this->_fileHandlers[$filename]);
- unlink($this->_dirPath .'/'. $filename);
+ unlink($this->_dirPath . '/' . $filename);
}
* @param string $from
* @param string $to
* @return void
+ * @throws Zend_Search_Lucene_Exception
*/
public function renameFile($from, $to)
{
- if ($this->_fileHandlers[$from] !== null) {
+ global $php_errormsg;
+
+ if (isset($this->_fileHandlers[$from])) {
$this->_fileHandlers[$from]->close();
}
unset($this->_fileHandlers[$from]);
- if ($this->_fileHandlers[$to] !== null) {
+ if (isset($this->_fileHandlers[$to])) {
$this->_fileHandlers[$to]->close();
}
unset($this->_fileHandlers[$to]);
if (file_exists($this->_dirPath . '/' . $to)) {
- unlink($this->_dirPath . '/' . $to);
+ if (!unlink($this->_dirPath . '/' . $to)) {
+ throw new Zend_Search_Lucene_Exception('Delete operation failed');
+ }
+ }
+
+ $trackErrors = ini_get('track_errors');
+ ini_set('track_errors', '1');
+
+ $success = @rename($this->_dirPath . '/' . $from, $this->_dirPath . '/' . $to);
+ if (!$success) {
+ ini_set('track_errors', $trackErrors);
+ throw new Zend_Search_Lucene_Exception($php_errormsg);
}
- return @rename($this->_dirPath . '/' . $from, $this->_dirPath . '/' . $to);
+ ini_set('track_errors', $trackErrors);
+
+ return $success;
}
/**
* Returns a Zend_Search_Lucene_Storage_File object for a given $filename in the directory.
*
+ * If $shareHandler option is true, then file handler can be shared between File Object
+ * requests. It speed-ups performance, but makes problems with file position.
+ * Shared handler are good for short atomic requests.
+ * Non-shared handlers are useful for stream file reading (especial for compound files).
+ *
* @param string $filename
+ * @param boolean $shareHandler
* @return Zend_Search_Lucene_Storage_File
*/
- public function getFileObject($filename)
+ public function getFileObject($filename, $shareHandler = true)
{
+ $fullFilename = $this->_dirPath . '/' . $filename;
+
+ if (!$shareHandler) {
+ return new Zend_Search_Lucene_Storage_File_Filesystem($fullFilename);
+ }
+
if (isset( $this->_fileHandlers[$filename] )) {
$this->_fileHandlers[$filename]->seek(0);
return $this->_fileHandlers[$filename];
}
- $this->_fileHandlers[$filename] = new Zend_Search_Lucene_Storage_File_Filesystem($this->_dirPath . '/' . $filename);
+ $this->_fileHandlers[$filename] = new Zend_Search_Lucene_Storage_File_Filesystem($fullFilename);
return $this->_fileHandlers[$filename];
}
}
* @category Zend
* @package Zend_Search_Lucene
* @subpackage Storage
- * @copyright Copyright (c) 2006 Zend Technologies USA Inc. (http://www.zend.com)
+ * @copyright Copyright (c) 2005-2007 Zend Technologies USA Inc. (http://www.zend.com)
* @license http://framework.zend.com/license/new-bsd New BSD License
*/
/** Zend_Search_Lucene_Exception */
-require_once 'Zend/Search/Lucene/Exception.php';
+require_once $CFG->dirroot.'/search/Zend/Search/Lucene/Exception.php';
/**
* @category Zend
* @package Zend_Search_Lucene
* @subpackage Storage
- * @copyright Copyright (c) 2006 Zend Technologies USA Inc. (http://www.zend.com)
+ * @copyright Copyright (c) 2005-2007 Zend Technologies USA Inc. (http://www.zend.com)
* @license http://framework.zend.com/license/new-bsd New BSD License
*/
abstract class Zend_Search_Lucene_Storage_File
*/
abstract public function tell();
+ /**
+ * Flush output.
+ *
+ * Returns true on success or false on failure.
+ *
+ * @return boolean
+ */
+ abstract public function flush();
+
/**
* Writes $length number of bytes (all, if $length===null) to the end
* of the file.
*/
abstract protected function _fwrite($data, $length=null);
+ /**
+ * Lock file
+ *
+ * Lock type may be a LOCK_SH (shared lock) or a LOCK_EX (exclusive lock)
+ *
+ * @param integer $lockType
+ * @return boolean
+ */
+ abstract public function lock($lockType, $nonBlockinLock = false);
+
+ /**
+ * Unlock file
+ */
+ abstract public function unlock();
/**
* Reads a byte from the current position in the file
{
return $this->_fread($this->readVInt());
}
-}
\ No newline at end of file
+}
* @category Zend
* @package Zend_Search_Lucene
* @subpackage Storage
- * @copyright Copyright (c) 2006 Zend Technologies USA Inc. (http://www.zend.com)
+ * @copyright Copyright (c) 2005-2007 Zend Technologies USA Inc. (http://www.zend.com)
* @license http://framework.zend.com/license/new-bsd New BSD License
*/
/** Zend_Search_Lucene_Storage_File */
-require_once 'Zend/Search/Lucene/Storage/File.php';
+require_once $CFG->dirroot.'/search/Zend/Search/Lucene/Storage/File.php';
/** Zend_Search_Lucene_Exception */
-require_once 'Zend/Search/Lucene/Exception.php';
+require_once $CFG->dirroot.'/search/Zend/Search/Lucene/Exception.php';
/**
* @category Zend
* @package Zend_Search_Lucene
* @subpackage Storage
- * @copyright Copyright (c) 2006 Zend Technologies USA Inc. (http://www.zend.com)
+ * @copyright Copyright (c) 2005-2007 Zend Technologies USA Inc. (http://www.zend.com)
* @license http://framework.zend.com/license/new-bsd New BSD License
*/
class Zend_Search_Lucene_Storage_File_Filesystem extends Zend_Search_Lucene_Storage_File
{
global $php_errormsg;
- $trackErrors = ini_get( "track_errors");
+ $trackErrors = ini_get('track_errors');
ini_set('track_errors', '1');
$this->_fileHandle = @fopen($filename, $mode);
- if ($this->_fileHandle===false) {
+ if ($this->_fileHandle === false) {
ini_set('track_errors', $trackErrors);
throw new Zend_Search_Lucene_Exception($php_errormsg);
}
return ftell($this->_fileHandle);
}
+ /**
+ * Flush output.
+ *
+ * Returns true on success or false on failure.
+ *
+ * @return boolean
+ */
+ public function flush()
+ {
+ return fflush($this->_fileHandle);
+ }
/**
* Close File object
fwrite($this->_fileHandle, $data, $length);
}
}
+
+ /**
+ * Lock file
+ *
+ * Lock type may be a LOCK_SH (shared lock) or a LOCK_EX (exclusive lock)
+ *
+ * @param integer $lockType
+ * @param boolean $nonBlockinLock
+ * @return boolean
+ */
+ public function lock($lockType, $nonBlockinLock = false)
+ {
+ if ($nonBlockinLock) {
+ return flock($this->_fileHandle, $lockType | LOCK_NB);
+ } else {
+ return flock($this->_fileHandle, $lockType);
+ }
+ }
+
+ /**
+ * Unlock file
+ *
+ * Returns true on success
+ *
+ * @return boolean
+ */
+ public function unlock()
+ {
+ if ($this->_fileHandle !== null ) {
+ return flock($this->_fileHandle, LOCK_UN);
+ } else {
+ return true;
+ }
+ }
}
--- /dev/null
+<?php
+/**
+ * Zend Framework
+ *
+ * LICENSE
+ *
+ * This source file is subject to the new BSD license that is bundled
+ * with this package in the file LICENSE.txt.
+ * It is also available through the world-wide-web at this URL:
+ * http://framework.zend.com/license/new-bsd
+ * If you did not receive a copy of the license and are unable to
+ * obtain it through the world-wide-web, please send an email
+ * to license@zend.com so we can send you a copy immediately.
+ *
+ * @category Zend
+ * @package Zend_Search_Lucene
+ * @subpackage Storage
+ * @copyright Copyright (c) 2005-2007 Zend Technologies USA Inc. (http://www.zend.com)
+ * @license http://framework.zend.com/license/new-bsd New BSD License
+ */
+
+
+/** Zend_Search_Lucene_Storage_File */
+require_once $CFG->dirroot.'/search/Zend/Search/Lucene/Storage/File.php';
+
+/** Zend_Search_Lucene_Exception */
+require_once $CFG->dirroot.'/search/Zend/Search/Lucene/Exception.php';
+
+
+/**
+ * @category Zend
+ * @package Zend_Search_Lucene
+ * @subpackage Storage
+ * @copyright Copyright (c) 2005-2007 Zend Technologies USA Inc. (http://www.zend.com)
+ * @license http://framework.zend.com/license/new-bsd New BSD License
+ */
+class Zend_Search_Lucene_Storage_File_Memory extends Zend_Search_Lucene_Storage_File
+{
+ /**
+ * FileData
+ *
+ * @var string
+ */
+ private $_data;
+
+ /**
+ * File Position
+ *
+ * @var integer
+ */
+ private $_position = 0;
+
+
+ /**
+ * Object constractor
+ *
+ * @param string $data
+ */
+ public function __construct($data)
+ {
+ $this->_data = $data;
+ }
+
+ /**
+ * Reads $length number of bytes at the current position in the
+ * file and advances the file pointer.
+ *
+ * @param integer $length
+ * @return string
+ */
+ protected function _fread($length = 1)
+ {
+ $returnValue = substr($this->_data, $this->_position, $length);
+ $this->_position += $length;
+ return $returnValue;
+ }
+
+
+ /**
+ * Sets the file position indicator and advances the file pointer.
+ * The new position, measured in bytes from the beginning of the file,
+ * is obtained by adding offset to the position specified by whence,
+ * whose values are defined as follows:
+ * SEEK_SET - Set position equal to offset bytes.
+ * SEEK_CUR - Set position to current location plus offset.
+ * SEEK_END - Set position to end-of-file plus offset. (To move to
+ * a position before the end-of-file, you need to pass a negative value
+ * in offset.)
+ * Upon success, returns 0; otherwise, returns -1
+ *
+ * @param integer $offset
+ * @param integer $whence
+ * @return integer
+ */
+ public function seek($offset, $whence=SEEK_SET)
+ {
+ switch ($whence) {
+ case SEEK_SET:
+ $this->_position = $offset;
+ break;
+
+ case SEEK_CUR:
+ $this->_position += $offset;
+ break;
+
+ case SEEK_END:
+ $this->_position = strlen($this->_data);
+ $this->_position += $offset;
+ break;
+
+ default:
+ break;
+ }
+ }
+
+ /**
+ * Get file position.
+ *
+ * @return integer
+ */
+ public function tell()
+ {
+ return $this->_position;
+ }
+
+ /**
+ * Flush output.
+ *
+ * Returns true on success or false on failure.
+ *
+ * @return boolean
+ */
+ public function flush()
+ {
+ // Do nothing
+
+ return true;
+ }
+
+ /**
+ * Writes $length number of bytes (all, if $length===null) to the end
+ * of the file.
+ *
+ * @param string $data
+ * @param integer $length
+ */
+ protected function _fwrite($data, $length=null)
+ {
+ // We do not need to check if file position points to the end of "file".
+ // Only append operation is supported now
+
+ if ($length !== null) {
+ $this->_data .= substr($data, 0, $length);
+ } else {
+ $this->_data .= $data;
+ }
+
+ $this->_position = strlen($this->_data);
+ }
+
+ /**
+ * Lock file
+ *
+ * Lock type may be a LOCK_SH (shared lock) or a LOCK_EX (exclusive lock)
+ *
+ * @param integer $lockType
+ * @return boolean
+ */
+ public function lock($lockType, $nonBlockinLock = false)
+ {
+ // Memory files can't be shared
+ // do nothing
+
+ return true;
+ }
+
+ /**
+ * Unlock file
+ */
+ public function unlock()
+ {
+ // Memory files can't be shared
+ // do nothing
+ }
+
+ /**
+ * Reads a byte from the current position in the file
+ * and advances the file pointer.
+ *
+ * @return integer
+ */
+ public function readByte()
+ {
+ return ord($this->_data[$this->_position++]);
+ }
+
+ /**
+ * Writes a byte to the end of the file.
+ *
+ * @param integer $byte
+ */
+ public function writeByte($byte)
+ {
+ // We do not need to check if file position points to the end of "file".
+ // Only append operation is supported now
+
+ $this->_data .= chr($byte);
+ $this->_position = strlen($this->_data);
+
+ return 1;
+ }
+
+ /**
+ * Read num bytes from the current position in the file
+ * and advances the file pointer.
+ *
+ * @param integer $num
+ * @return string
+ */
+ public function readBytes($num)
+ {
+ $returnValue = substr($this->_data, $this->_position, $num);
+ $this->_position += $num;
+
+ return $returnValue;
+ }
+
+ /**
+ * Writes num bytes of data (all, if $num===null) to the end
+ * of the string.
+ *
+ * @param string $data
+ * @param integer $num
+ */
+ public function writeBytes($data, $num=null)
+ {
+ // We do not need to check if file position points to the end of "file".
+ // Only append operation is supported now
+
+ if ($num !== null) {
+ $this->_data .= substr($data, 0, $num);
+ } else {
+ $this->_data .= $data;
+ }
+
+ $this->_position = strlen($this->_data);
+ }
+
+
+ /**
+ * Reads an integer from the current position in the file
+ * and advances the file pointer.
+ *
+ * @return integer
+ */
+ public function readInt()
+ {
+ $str = substr($this->_data, $this->_position, 4);
+ $this->_position += 4;
+
+ return ord($str{0}) << 24 |
+ ord($str{1}) << 16 |
+ ord($str{2}) << 8 |
+ ord($str{3});
+ }
+
+
+ /**
+ * Writes an integer to the end of file.
+ *
+ * @param integer $value
+ */
+ public function writeInt($value)
+ {
+ // We do not need to check if file position points to the end of "file".
+ // Only append operation is supported now
+
+ settype($value, 'integer');
+ $this->_data .= chr($value>>24 & 0xFF) .
+ chr($value>>16 & 0xFF) .
+ chr($value>>8 & 0xFF) .
+ chr($value & 0xFF);
+
+ $this->_position = strlen($this->_data);
+ }
+
+
+ /**
+ * Returns a long integer from the current position in the file
+ * and advances the file pointer.
+ *
+ * @return integer
+ * @throws Zend_Search_Lucene_Exception
+ */
+ public function readLong()
+ {
+ $str = substr($this->_data, $this->_position, 8);
+ $this->_position += 8;
+
+ /**
+ * Check, that we work in 64-bit mode.
+ * fseek() uses long for offset. Thus, largest index segment file size in 32bit mode is 2Gb
+ */
+ if (PHP_INT_SIZE > 4) {
+ return ord($str{0}) << 56 |
+ ord($str{1}) << 48 |
+ ord($str{2}) << 40 |
+ ord($str{3}) << 32 |
+ ord($str{4}) << 24 |
+ ord($str{5}) << 16 |
+ ord($str{6}) << 8 |
+ ord($str{7});
+ } else {
+ if ((ord($str{0}) != 0) ||
+ (ord($str{1}) != 0) ||
+ (ord($str{2}) != 0) ||
+ (ord($str{3}) != 0) ||
+ ((ord($str{0}) & 0x80) != 0)) {
+ throw new Zend_Search_Lucene_Exception('Largest supported segment size (for 32-bit mode) is 2Gb');
+ }
+
+ return ord($str{4}) << 24 |
+ ord($str{5}) << 16 |
+ ord($str{6}) << 8 |
+ ord($str{7});
+ }
+ }
+
+ /**
+ * Writes long integer to the end of file
+ *
+ * @param integer $value
+ * @throws Zend_Search_Lucene_Exception
+ */
+ public function writeLong($value)
+ {
+ // We do not need to check if file position points to the end of "file".
+ // Only append operation is supported now
+
+ /**
+ * Check, that we work in 64-bit mode.
+ * fseek() and ftell() use long for offset. Thus, largest index segment file size in 32bit mode is 2Gb
+ */
+ if (PHP_INT_SIZE > 4) {
+ settype($value, 'integer');
+ $this->_data .= chr($value>>56 & 0xFF) .
+ chr($value>>48 & 0xFF) .
+ chr($value>>40 & 0xFF) .
+ chr($value>>32 & 0xFF) .
+ chr($value>>24 & 0xFF) .
+ chr($value>>16 & 0xFF) .
+ chr($value>>8 & 0xFF) .
+ chr($value & 0xFF);
+ } else {
+ if ($value > 0x7FFFFFFF) {
+ throw new Zend_Search_Lucene_Exception('Largest supported segment size (for 32-bit mode) is 2Gb');
+ }
+
+ $this->_data .= chr(0) . chr(0) . chr(0) . chr(0) .
+ chr($value>>24 & 0xFF) .
+ chr($value>>16 & 0xFF) .
+ chr($value>>8 & 0xFF) .
+ chr($value & 0xFF);
+ }
+
+ $this->_position = strlen($this->_data);
+ }
+
+
+
+ /**
+ * Returns a variable-length integer from the current
+ * position in the file and advances the file pointer.
+ *
+ * @return integer
+ */
+ public function readVInt()
+ {
+ $nextByte = ord($this->_data[$this->_position++]);
+ $val = $nextByte & 0x7F;
+
+ for ($shift=7; ($nextByte & 0x80) != 0; $shift += 7) {
+ $nextByte = ord($this->_data[$this->_position++]);
+ $val |= ($nextByte & 0x7F) << $shift;
+ }
+ return $val;
+ }
+
+ /**
+ * Writes a variable-length integer to the end of file.
+ *
+ * @param integer $value
+ */
+ public function writeVInt($value)
+ {
+ // We do not need to check if file position points to the end of "file".
+ // Only append operation is supported now
+
+ settype($value, 'integer');
+ while ($value > 0x7F) {
+ $this->_data .= chr( ($value & 0x7F)|0x80 );
+ $value >>= 7;
+ }
+ $this->_data .= chr($value);
+
+ $this->_position = strlen($this->_data);
+ }
+
+
+ /**
+ * Reads a string from the current position in the file
+ * and advances the file pointer.
+ *
+ * @return string
+ */
+ public function readString()
+ {
+ $strlen = $this->readVInt();
+ if ($strlen == 0) {
+ return '';
+ } else {
+ /**
+ * This implementation supports only Basic Multilingual Plane
+ * (BMP) characters (from 0x0000 to 0xFFFF) and doesn't support
+ * "supplementary characters" (characters whose code points are
+ * greater than 0xFFFF)
+ * Java 2 represents these characters as a pair of char (16-bit)
+ * values, the first from the high-surrogates range (0xD800-0xDBFF),
+ * the second from the low-surrogates range (0xDC00-0xDFFF). Then
+ * they are encoded as usual UTF-8 characters in six bytes.
+ * Standard UTF-8 representation uses four bytes for supplementary
+ * characters.
+ */
+
+ $str_val = substr($this->_data, $this->_position, $strlen);
+ $this->_position += $strlen;
+
+ for ($count = 0; $count < $strlen; $count++ ) {
+ if (( ord($str_val{$count}) & 0xC0 ) == 0xC0) {
+ $addBytes = 1;
+ if (ord($str_val{$count}) & 0x20 ) {
+ $addBytes++;
+
+ // Never used. Java2 doesn't encode strings in four bytes
+ if (ord($str_val{$count}) & 0x10 ) {
+ $addBytes++;
+ }
+ }
+ $str_val .= substr($this->_data, $this->_position, $addBytes);
+ $this->_position += $addBytes;
+ $strlen += $addBytes;
+
+ // Check for null character. Java2 encodes null character
+ // in two bytes.
+ if (ord($str_val{$count}) == 0xC0 &&
+ ord($str_val{$count+1}) == 0x80 ) {
+ $str_val{$count} = 0;
+ $str_val = substr($str_val,0,$count+1)
+ . substr($str_val,$count+2);
+ }
+ $count += $addBytes;
+ }
+ }
+
+ return $str_val;
+ }
+ }
+
+ /**
+ * Writes a string to the end of file.
+ *
+ * @param string $str
+ * @throws Zend_Search_Lucene_Exception
+ */
+ public function writeString($str)
+ {
+ /**
+ * This implementation supports only Basic Multilingual Plane
+ * (BMP) characters (from 0x0000 to 0xFFFF) and doesn't support
+ * "supplementary characters" (characters whose code points are
+ * greater than 0xFFFF)
+ * Java 2 represents these characters as a pair of char (16-bit)
+ * values, the first from the high-surrogates range (0xD800-0xDBFF),
+ * the second from the low-surrogates range (0xDC00-0xDFFF). Then
+ * they are encoded as usual UTF-8 characters in six bytes.
+ * Standard UTF-8 representation uses four bytes for supplementary
+ * characters.
+ */
+
+ // We do not need to check if file position points to the end of "file".
+ // Only append operation is supported now
+
+ // convert input to a string before iterating string characters
+ settype($str, 'string');
+
+ $chars = $strlen = strlen($str);
+ $containNullChars = false;
+
+ for ($count = 0; $count < $strlen; $count++ ) {
+ /**
+ * String is already in Java 2 representation.
+ * We should only calculate actual string length and replace
+ * \x00 by \xC0\x80
+ */
+ if ((ord($str{$count}) & 0xC0) == 0xC0) {
+ $addBytes = 1;
+ if (ord($str{$count}) & 0x20 ) {
+ $addBytes++;
+
+ // Never used. Java2 doesn't encode strings in four bytes
+ // and we dont't support non-BMP characters
+ if (ord($str{$count}) & 0x10 ) {
+ $addBytes++;
+ }
+ }
+ $chars -= $addBytes;
+
+ if (ord($str{$count}) == 0 ) {
+ $containNullChars = true;
+ }
+ $count += $addBytes;
+ }
+ }
+
+ if ($chars < 0) {
+ throw new Zend_Search_Lucene_Exception('Invalid UTF-8 string');
+ }
+
+ $this->writeVInt($chars);
+ if ($containNullChars) {
+ $this->_data .= str_replace($str, "\x00", "\xC0\x80");
+
+ } else {
+ $this->_data .= $str;
+ }
+
+ $this->_position = strlen($this->_data);
+ }
+
+
+ /**
+ * Reads binary data from the current position in the file
+ * and advances the file pointer.
+ *
+ * @return string
+ */
+ public function readBinary()
+ {
+ $length = $this->readVInt();
+ $returnValue = substr($this->_data, $this->_position, $length);
+ $this->_position += $length;
+ return $returnValue;
+ }
+}
+