]> git.mjollnir.org Git - moodle.git/commitdiff
Lucene Zend Implementation update (better handle of UTF8)
authordiml <diml>
Mon, 9 Jul 2007 21:05:40 +0000 (21:05 +0000)
committerdiml <diml>
Mon, 9 Jul 2007 21:05:40 +0000 (21:05 +0000)
31 files changed:
search/Zend/Search/Lucene/Search/BooleanExpressionRecognizer.php [new file with mode: 0644]
search/Zend/Search/Lucene/Search/Query.php
search/Zend/Search/Lucene/Search/Query/Boolean.php [new file with mode: 0644]
search/Zend/Search/Lucene/Search/Query/Empty.php [new file with mode: 0644]
search/Zend/Search/Lucene/Search/Query/MultiTerm.php
search/Zend/Search/Lucene/Search/Query/Phrase.php
search/Zend/Search/Lucene/Search/Query/Term.php
search/Zend/Search/Lucene/Search/QueryEntry.php [new file with mode: 0644]
search/Zend/Search/Lucene/Search/QueryEntry/Phrase.php [new file with mode: 0644]
search/Zend/Search/Lucene/Search/QueryEntry/Subquery.php [new file with mode: 0644]
search/Zend/Search/Lucene/Search/QueryEntry/Term.php [new file with mode: 0644]
search/Zend/Search/Lucene/Search/QueryHit.php
search/Zend/Search/Lucene/Search/QueryLexer.php [new file with mode: 0644]
search/Zend/Search/Lucene/Search/QueryParser.php
search/Zend/Search/Lucene/Search/QueryParserContext.php [new file with mode: 0644]
search/Zend/Search/Lucene/Search/QueryParserException.php [new file with mode: 0644]
search/Zend/Search/Lucene/Search/QueryToken.php
search/Zend/Search/Lucene/Search/QueryTokenizer.php
search/Zend/Search/Lucene/Search/Similarity.php
search/Zend/Search/Lucene/Search/Similarity/Default.php
search/Zend/Search/Lucene/Search/Weight.php
search/Zend/Search/Lucene/Search/Weight/Boolean.php [new file with mode: 0644]
search/Zend/Search/Lucene/Search/Weight/Empty.php [new file with mode: 0644]
search/Zend/Search/Lucene/Search/Weight/MultiTerm.php
search/Zend/Search/Lucene/Search/Weight/Phrase.php
search/Zend/Search/Lucene/Search/Weight/Term.php
search/Zend/Search/Lucene/Storage/Directory.php
search/Zend/Search/Lucene/Storage/Directory/Filesystem.php
search/Zend/Search/Lucene/Storage/File.php
search/Zend/Search/Lucene/Storage/File/Filesystem.php
search/Zend/Search/Lucene/Storage/File/Memory.php [new file with mode: 0644]

diff --git a/search/Zend/Search/Lucene/Search/BooleanExpressionRecognizer.php b/search/Zend/Search/Lucene/Search/BooleanExpressionRecognizer.php
new file mode 100644 (file)
index 0000000..1801bc1
--- /dev/null
@@ -0,0 +1,280 @@
+<?php
+/**
+ * Zend Framework
+ *
+ * LICENSE
+ *
+ * This source file is subject to the new BSD license that is bundled
+ * with this package in the file LICENSE.txt.
+ * It is also available through the world-wide-web at this URL:
+ * http://framework.zend.com/license/new-bsd
+ * If you did not receive a copy of the license and are unable to
+ * obtain it through the world-wide-web, please send an email
+ * to license@zend.com so we can send you a copy immediately.
+ *
+ * @category   Zend
+ * @package    Zend_Search_Lucene
+ * @subpackage Search
+ * @copyright  Copyright (c) 2005-2007 Zend Technologies USA Inc. (http://www.zend.com)
+ * @license    http://framework.zend.com/license/new-bsd     New BSD License
+ */
+
+
+/** Zend_Search_Lucene_FSM */
+require_once $CFG->dirroot.'/search/Zend/Search/Lucene/FSM.php';
+
+/** Zend_Search_Lucene_Search_QueryToken */
+require_once $CFG->dirroot.'/search/Zend/Search/Lucene/Search/QueryToken.php';
+
+/** Zend_Search_Lucene_Search_QueryParser */
+require_once $CFG->dirroot.'/search/Zend/Search/Lucene/Search/QueryParser.php';
+
+
+/** Zend_Search_Lucene_Exception */
+require_once $CFG->dirroot.'/search/Zend/Search/Lucene/Exception.php';
+
+/**
+ * @category   Zend
+ * @package    Zend_Search_Lucene
+ * @subpackage Search
+ * @copyright  Copyright (c) 2005-2007 Zend Technologies USA Inc. (http://www.zend.com)
+ * @license    http://framework.zend.com/license/new-bsd     New BSD License
+ */
+class Zend_Search_Lucene_Search_BooleanExpressionRecognizer extends Zend_Search_Lucene_FSM
+{
+    /** State Machine states */
+    const ST_START           = 0;
+    const ST_LITERAL         = 1;
+    const ST_NOT_OPERATOR    = 2;
+    const ST_AND_OPERATOR    = 3;
+    const ST_OR_OPERATOR     = 4;
+
+    /** Input symbols */
+    const IN_LITERAL         = 0;
+    const IN_NOT_OPERATOR    = 1;
+    const IN_AND_OPERATOR    = 2;
+    const IN_OR_OPERATOR     = 3;
+
+
+    /**
+     * NOT operator signal
+     *
+     * @var boolean
+     */
+    private $_negativeLiteral = false;
+
+    /**
+     * Current literal
+     *
+     * @var mixed
+     */
+    private $_literal;
+
+
+    /**
+     * Set of boolean query conjunctions
+     *
+     * Each conjunction is an array of conjunction elements
+     * Each conjunction element is presented with two-elements array:
+     * array(<literal>, <is_negative>)
+     *
+     * So, it has a structure:
+     * array( array( array(<literal>, <is_negative>), // first literal of first conjuction
+     *               array(<literal>, <is_negative>), // second literal of first conjuction
+     *               ...
+     *               array(<literal>, <is_negative>)
+     *             ), // end of first conjuction
+     *        array( array(<literal>, <is_negative>), // first literal of second conjuction
+     *               array(<literal>, <is_negative>), // second literal of second conjuction
+     *               ...
+     *               array(<literal>, <is_negative>)
+     *             ), // end of second conjuction
+     *        ...
+     *      ) // end of structure
+     *
+     * @var array
+     */
+    private $_conjunctions = array();
+
+    /**
+     * Current conjuction
+     *
+     * @var array
+     */
+    private $_currentConjunction = array();
+
+
+    /**
+     * Object constructor
+     */
+    public function __construct()
+    {
+        parent::__construct( array(self::ST_START,
+                                   self::ST_LITERAL,
+                                   self::ST_NOT_OPERATOR,
+                                   self::ST_AND_OPERATOR,
+                                   self::ST_OR_OPERATOR),
+                             array(self::IN_LITERAL,
+                                   self::IN_NOT_OPERATOR,
+                                   self::IN_AND_OPERATOR,
+                                   self::IN_OR_OPERATOR));
+
+        $emptyOperatorAction    = new Zend_Search_Lucene_FSMAction($this, 'emptyOperatorAction');
+        $emptyNotOperatorAction = new Zend_Search_Lucene_FSMAction($this, 'emptyNotOperatorAction');
+
+        $this->addRules(array( array(self::ST_START,        self::IN_LITERAL,        self::ST_LITERAL),
+                               array(self::ST_START,        self::IN_NOT_OPERATOR,   self::ST_NOT_OPERATOR),
+
+                               array(self::ST_LITERAL,      self::IN_AND_OPERATOR,   self::ST_AND_OPERATOR),
+                               array(self::ST_LITERAL,      self::IN_OR_OPERATOR,    self::ST_OR_OPERATOR),
+                               array(self::ST_LITERAL,      self::IN_LITERAL,        self::ST_LITERAL,      $emptyOperatorAction),
+                               array(self::ST_LITERAL,      self::IN_NOT_OPERATOR,   self::ST_NOT_OPERATOR, $emptyNotOperatorAction),
+
+                               array(self::ST_NOT_OPERATOR, self::IN_LITERAL,        self::ST_LITERAL),
+
+                               array(self::ST_AND_OPERATOR, self::IN_LITERAL,        self::ST_LITERAL),
+                               array(self::ST_AND_OPERATOR, self::IN_NOT_OPERATOR,   self::ST_NOT_OPERATOR),
+
+                               array(self::ST_OR_OPERATOR,  self::IN_LITERAL,        self::ST_LITERAL),
+                               array(self::ST_OR_OPERATOR,  self::IN_NOT_OPERATOR,   self::ST_NOT_OPERATOR),
+                             ));
+
+        $notOperatorAction     = new Zend_Search_Lucene_FSMAction($this, 'notOperatorAction');
+        $orOperatorAction      = new Zend_Search_Lucene_FSMAction($this, 'orOperatorAction');
+        $literalAction         = new Zend_Search_Lucene_FSMAction($this, 'literalAction');
+
+
+        $this->addEntryAction(self::ST_NOT_OPERATOR, $notOperatorAction);
+        $this->addEntryAction(self::ST_OR_OPERATOR,  $orOperatorAction);
+        $this->addEntryAction(self::ST_LITERAL,      $literalAction);
+    }
+
+
+    /**
+     * Process next operator.
+     *
+     * Operators are defined by class constants: IN_AND_OPERATOR, IN_OR_OPERATOR and IN_NOT_OPERATOR
+     *
+     * @param integer $operator
+     */
+    public function processOperator($operator)
+    {
+        $this->process($operator);
+    }
+
+    /**
+     * Process expression literal.
+     *
+     * @param integer $operator
+     */
+    public function processLiteral($literal)
+    {
+        $this->_literal = $literal;
+
+        $this->process(self::IN_LITERAL);
+    }
+
+    /**
+     * Finish an expression and return result
+     *
+     * Result is a set of boolean query conjunctions
+     *
+     * Each conjunction is an array of conjunction elements
+     * Each conjunction element is presented with two-elements array:
+     * array(<literal>, <is_negative>)
+     *
+     * So, it has a structure:
+     * array( array( array(<literal>, <is_negative>), // first literal of first conjuction
+     *               array(<literal>, <is_negative>), // second literal of first conjuction
+     *               ...
+     *               array(<literal>, <is_negative>)
+     *             ), // end of first conjuction
+     *        array( array(<literal>, <is_negative>), // first literal of second conjuction
+     *               array(<literal>, <is_negative>), // second literal of second conjuction
+     *               ...
+     *               array(<literal>, <is_negative>)
+     *             ), // end of second conjuction
+     *        ...
+     *      ) // end of structure
+     *
+     * @return array
+     * @throws Zend_Search_Lucene_Exception
+     */
+    public function finishExpression()
+    {
+        if ($this->getState() != self::ST_LITERAL) {
+            throw new Zend_Search_Lucene_Exception('Literal expected.');
+        }
+
+        $this->_conjunctions[] = $this->_currentConjunction;
+
+        return $this->_conjunctions;
+    }
+
+
+
+    /*********************************************************************
+     * Actions implementation
+     *********************************************************************/
+
+    /**
+     * default (omitted) operator processing
+     */
+    public function emptyOperatorAction()
+    {
+        if (Zend_Search_Lucene_Search_QueryParser::getDefaultOperator() == Zend_Search_Lucene_Search_QueryParser::B_AND) {
+            // Do nothing
+        } else {
+            $this->orOperatorAction();
+        }
+
+        // Process literal
+        $this->literalAction();
+    }
+
+    /**
+     * default (omitted) + NOT operator processing
+     */
+    public function emptyNotOperatorAction()
+    {
+        if (Zend_Search_Lucene_Search_QueryParser::getDefaultOperator() == Zend_Search_Lucene_Search_QueryParser::B_AND) {
+            // Do nothing
+        } else {
+            $this->orOperatorAction();
+        }
+
+        // Process NOT operator
+        $this->notOperatorAction();
+    }
+
+
+    /**
+     * NOT operator processing
+     */
+    public function notOperatorAction()
+    {
+        $this->_negativeLiteral = true;
+    }
+
+    /**
+     * OR operator processing
+     * Close current conjunction
+     */
+    public function orOperatorAction()
+    {
+        $this->_conjunctions[]     = $this->_currentConjunction;
+        $this->_currentConjunction = array();
+    }
+
+    /**
+     * Literal processing
+     */
+    public function literalAction()
+    {
+        // Add literal to the current conjunction
+        $this->_currentConjunction[] = array($this->_literal, !$this->_negativeLiteral);
+
+        // Switch off negative signal
+        $this->_negativeLiteral = false;
+    }
+}
index bf284970a1623a5d5fbb9ba63c33a8b121b3aeb7..e55c22ce0133ec5a1f13f6b3446396c54a2b96a8 100644 (file)
  * @category   Zend
  * @package    Zend_Search_Lucene
  * @subpackage Search
- * @copyright  Copyright (c) 2006 Zend Technologies USA Inc. (http://www.zend.com)
+ * @copyright  Copyright (c) 2005-2007 Zend Technologies USA Inc. (http://www.zend.com)
  * @license    http://framework.zend.com/license/new-bsd     New BSD License
  */
 
+/** Zend_Search_Lucene_Document_Html */
+require_once $CFG->dirroot.'/search/Zend/Search/Lucene/Document/Html.php';
+
 
 /**
  * @category   Zend
  * @package    Zend_Search_Lucene
  * @subpackage Search
- * @copyright  Copyright (c) 2006 Zend Technologies USA Inc. (http://www.zend.com)
+ * @copyright  Copyright (c) 2005-2007 Zend Technologies USA Inc. (http://www.zend.com)
  * @license    http://framework.zend.com/license/new-bsd     New BSD License
  */
 abstract class Zend_Search_Lucene_Search_Query
@@ -35,14 +38,31 @@ abstract class Zend_Search_Lucene_Search_Query
      *
      * @var float
      */
-    private $_boost = 1.0;
+    private $_boost = 1;
 
     /**
      * Query weight
      *
      * @var Zend_Search_Lucene_Search_Weight
      */
-    protected $_weight;
+    protected $_weight = null;
+
+    /**
+     * Current highlight color
+     *
+     * @var integer
+     */
+    private $_currentColorIndex = 0;
+
+    /**
+     * List of colors for text highlighting
+     *
+     * @var array
+     */
+    private $_highlightColors = array('#66ffff', '#ff66ff', '#ffff66',
+                                      '#ff8888', '#88ff88', '#8888ff',
+                                      '#88dddd', '#dd88dd', '#dddd88',
+                                      '#aaddff', '#aaffdd', '#ddaaff', '#ddffaa', '#ffaadd', '#ffddaa');
 
 
     /**
@@ -71,30 +91,133 @@ abstract class Zend_Search_Lucene_Search_Query
      * Score specified document
      *
      * @param integer $docId
-     * @param Zend_Search_Lucene $reader
+     * @param Zend_Search_Lucene_Interface $reader
      * @return float
      */
-    abstract public function score($docId, $reader);
+    abstract public function score($docId, Zend_Search_Lucene_Interface $reader);
+
+    /**
+     * Get document ids likely matching the query
+     *
+     * It's an array with document ids as keys (performance considerations)
+     *
+     * @return array
+     */
+    abstract public function matchedDocs();
+
+    /**
+     * Execute query in context of index reader
+     * It also initializes necessary internal structures
+     *
+     * Query specific implementation
+     *
+     * @param Zend_Search_Lucene_Interface $reader
+     */
+    abstract public function execute(Zend_Search_Lucene_Interface $reader);
 
     /**
      * Constructs an appropriate Weight implementation for this query.
      *
-     * @param Zend_Search_Lucene $reader
+     * @param Zend_Search_Lucene_Interface $reader
      * @return Zend_Search_Lucene_Search_Weight
      */
-    abstract protected function _createWeight($reader);
+    abstract public function createWeight(Zend_Search_Lucene_Interface $reader);
 
     /**
-     * Constructs an initializes a Weight for a query.
+     * Constructs an initializes a Weight for a _top-level_query_.
      *
-     * @param Zend_Search_Lucene $reader
+     * @param Zend_Search_Lucene_Interface $reader
      */
-    protected function _initWeight($reader)
+    protected function _initWeight(Zend_Search_Lucene_Interface $reader)
     {
-        $this->_weight = $this->_createWeight($reader);
+        // Check, that it's a top-level query and query weight is not initialized yet.
+        if ($this->_weight !== null) {
+            return $this->_weight;
+        }
+
+        $this->createWeight($reader);
         $sum = $this->_weight->sumOfSquaredWeights();
         $queryNorm = $reader->getSimilarity()->queryNorm($sum);
         $this->_weight->normalize($queryNorm);
     }
 
-}
\ No newline at end of file
+    /**
+     * Re-write query into primitive queries in the context of specified index
+     *
+     * @param Zend_Search_Lucene_Interface $index
+     * @return Zend_Search_Lucene_Search_Query
+     */
+    abstract public function rewrite(Zend_Search_Lucene_Interface $index);
+
+    /**
+     * Optimize query in the context of specified index
+     *
+     * @param Zend_Search_Lucene_Interface $index
+     * @return Zend_Search_Lucene_Search_Query
+     */
+    abstract public function optimize(Zend_Search_Lucene_Interface $index);
+
+    /**
+     * Reset query, so it can be reused within other queries or
+     * with other indeces
+     */
+    public function reset()
+    {
+        $this->_weight = null;
+    }
+
+
+    /**
+     * Print a query
+     *
+     * @return string
+     */
+    abstract public function __toString();
+
+    /**
+     * Return query terms
+     *
+     * @return array
+     */
+    abstract public function getQueryTerms();
+
+    /**
+     * Get highlight color and shift to next
+     *
+     * @param integer &$colorIndex
+     * @return string
+     */
+    protected function _getHighlightColor(&$colorIndex)
+    {
+        $color = $this->_highlightColors[$colorIndex++];
+
+        $colorIndex %= count($this->_highlightColors);
+
+        return $color;
+    }
+
+    /**
+     * Highlight query terms
+     *
+     * @param integer &$colorIndex
+     * @param Zend_Search_Lucene_Document_Html $doc
+     */
+    abstract public function highlightMatchesDOM(Zend_Search_Lucene_Document_Html $doc, &$colorIndex);
+
+    /**
+     * Highlight matches in $inputHTML
+     *
+     * @param string $inputHTML
+     * @return string
+     */
+    public function highlightMatches($inputHTML)
+    {
+        $doc = Zend_Search_Lucene_Document_Html::loadHTML($inputHTML);
+
+        $colorIndex = 0;
+        $this->highlightMatchesDOM($doc, $colorIndex);
+
+        return $doc->getHTML();
+    }
+}
+
diff --git a/search/Zend/Search/Lucene/Search/Query/Boolean.php b/search/Zend/Search/Lucene/Search/Query/Boolean.php
new file mode 100644 (file)
index 0000000..710a41f
--- /dev/null
@@ -0,0 +1,715 @@
+<?php
+/**
+ * Zend Framework
+ *
+ * LICENSE
+ *
+ * This source file is subject to the new BSD license that is bundled
+ * with this package in the file LICENSE.txt.
+ * It is also available through the world-wide-web at this URL:
+ * http://framework.zend.com/license/new-bsd
+ * If you did not receive a copy of the license and are unable to
+ * obtain it through the world-wide-web, please send an email
+ * to license@zend.com so we can send you a copy immediately.
+ *
+ * @category   Zend
+ * @package    Zend_Search_Lucene
+ * @subpackage Search
+ * @copyright  Copyright (c) 2005-2007 Zend Technologies USA Inc. (http://www.zend.com)
+ * @license    http://framework.zend.com/license/new-bsd     New BSD License
+ */
+
+
+/** Zend_Search_Lucene_Search_Query */
+require_once $CFG->dirroot.'/search/Zend/Search/Lucene/Search/Query.php';
+
+/** Zend_Search_Lucene_Search_Weight_Boolean */
+require_once $CFG->dirroot.'/search/Zend/Search/Lucene/Search/Weight/Boolean.php';
+
+
+/**
+ * @category   Zend
+ * @package    Zend_Search_Lucene
+ * @subpackage Search
+ * @copyright  Copyright (c) 2005-2007 Zend Technologies USA Inc. (http://www.zend.com)
+ * @license    http://framework.zend.com/license/new-bsd     New BSD License
+ */
+class Zend_Search_Lucene_Search_Query_Boolean extends Zend_Search_Lucene_Search_Query
+{
+
+    /**
+     * Subqueries
+     * Array of Zend_Search_Lucene_Query
+     *
+     * @var array
+     */
+    private $_subqueries = array();
+
+    /**
+     * Subqueries signs.
+     * If true then subquery is required.
+     * If false then subquery is prohibited.
+     * If null then subquery is neither prohibited, nor required
+     *
+     * If array is null then all subqueries are required
+     *
+     * @var array
+     */
+    private $_signs = array();
+
+    /**
+     * Result vector.
+     *
+     * @var array
+     */
+    private $_resVector = null;
+
+    /**
+     * A score factor based on the fraction of all query subqueries
+     * that a document contains.
+     * float for conjunction queries
+     * array of float for non conjunction queries
+     *
+     * @var mixed
+     */
+    private $_coord = null;
+
+
+    /**
+     * Class constructor.  Create a new Boolean query object.
+     *
+     * if $signs array is omitted then all subqueries are required
+     * it differs from addSubquery() behavior, but should never be used
+     *
+     * @param array $subqueries    Array of Zend_Search_Search_Query objects
+     * @param array $signs    Array of signs.  Sign is boolean|null.
+     * @return void
+     */
+    public function __construct($subqueries = null, $signs = null)
+    {
+        if (is_array($subqueries)) {
+            $this->_subqueries = $subqueries;
+
+            $this->_signs = null;
+            // Check if all subqueries are required
+            if (is_array($signs)) {
+                foreach ($signs as $sign ) {
+                    if ($sign !== true) {
+                        $this->_signs = $signs;
+                        break;
+                    }
+                }
+            }
+        }
+    }
+
+
+    /**
+     * Add a $subquery (Zend_Search_Lucene_Query) to this query.
+     *
+     * The sign is specified as:
+     *     TRUE  - subquery is required
+     *     FALSE - subquery is prohibited
+     *     NULL  - subquery is neither prohibited, nor required
+     *
+     * @param  Zend_Search_Lucene_Search_Query $subquery
+     * @param  boolean|null $sign
+     * @return void
+     */
+    public function addSubquery(Zend_Search_Lucene_Search_Query $subquery, $sign=null) {
+        if ($sign !== true || $this->_signs !== null) {       // Skip, if all subqueries are required
+            if ($this->_signs === null) {                     // Check, If all previous subqueries are required
+                foreach ($this->_subqueries as $prevSubquery) {
+                    $this->_signs[] = true;
+                }
+            }
+            $this->_signs[] = $sign;
+        }
+
+        $this->_subqueries[] = $subquery;
+    }
+
+    /**
+     * Re-write queries into primitive queries
+     *
+     * @param Zend_Search_Lucene_Interface $index
+     * @return Zend_Search_Lucene_Search_Query
+     */
+    public function rewrite(Zend_Search_Lucene_Interface $index)
+    {
+        $query = new Zend_Search_Lucene_Search_Query_Boolean();
+        $query->setBoost($this->getBoost());
+
+        foreach ($this->_subqueries as $subqueryId => $subquery) {
+            $query->addSubquery($subquery->rewrite($index),
+                                ($this->_signs === null)?  true : $this->_signs[$subqueryId]);
+        }
+
+        return $query;
+    }
+
+    /**
+     * Optimize query in the context of specified index
+     *
+     * @param Zend_Search_Lucene_Interface $index
+     * @return Zend_Search_Lucene_Search_Query
+     */
+    public function optimize(Zend_Search_Lucene_Interface $index)
+    {
+        $subqueries = array();
+        $signs      = array();
+
+        // Optimize all subqueries
+        foreach ($this->_subqueries as $id => $subquery) {
+            $subqueries[] = $subquery->optimize($index);
+            $signs[]      = ($this->_signs === null)? true : $this->_signs[$id];
+        }
+
+        // Check for empty subqueries
+        foreach ($subqueries as $id => $subquery) {
+            if ($subquery instanceof Zend_Search_Lucene_Search_Query_Empty) {
+                if ($signs[$id] === true) {
+                    // Matching is required, but is actually empty
+                    return new Zend_Search_Lucene_Search_Query_Empty();
+                } else {
+                    // Matching is optional or prohibited, but is empty
+                    // Remove it from subqueries and signs list
+                    unset($subqueries[$id]);
+                    unset($signs[$id]);
+                }
+            }
+        }
+
+
+        // Check if all non-empty subqueries are prohibited
+        $allProhibited = true;
+        foreach ($signs as $sign) {
+            if ($sign !== false) {
+                $allProhibited = false;
+                break;
+            }
+        }
+        if ($allProhibited) {
+            return new Zend_Search_Lucene_Search_Query_Empty();
+        }
+
+
+        // Check, if reduced subqueries list has only one entry
+        if (count($subqueries) == 1) {
+            // It's a query with only one required or optional clause
+            // (it's already checked, that it's not a prohibited clause)
+
+            if ($this->getBoost() == 1) {
+                return reset($subqueries);
+            }
+
+            $optimizedQuery = clone reset($subqueries);
+            $optimizedQuery->setBoost($optimizedQuery->getBoost()*$this->getBoost());
+
+            return $optimizedQuery;
+        }
+
+
+        // Check, if reduced subqueries list is empty
+        if (count($subqueries) == 0) {
+            return new Zend_Search_Lucene_Search_Query_Empty();
+        }
+
+
+        // Prepare first candidate for optimized query
+        $optimizedQuery = new Zend_Search_Lucene_Search_Query_Boolean($subqueries, $signs);
+        $optimizedQuery->setBoost($this->getBoost());
+
+
+        $terms        = array();
+        $tsigns       = array();
+        $boostFactors = array();
+
+        // Try to decompose term and multi-term subqueries
+        foreach ($subqueries as $id => $subquery) {
+            if ($subquery instanceof Zend_Search_Lucene_Search_Query_Term) {
+                $terms[]        = $subquery->getTerm();
+                $tsigns[]       = $signs[$id];
+                $boostFactors[] = $subquery->getBoost();
+
+                // remove subquery from a subqueries list
+                unset($subqueries[$id]);
+                unset($signs[$id]);
+           } else if ($subquery instanceof Zend_Search_Lucene_Search_Query_MultiTerm) {
+                $subTerms = $subquery->getTerms();
+                $subSigns = $subquery->getSigns();
+
+                if ($signs[$id] === true) {
+                    // It's a required multi-term subquery.
+                    // Something like '... +(+term1 -term2 term3 ...) ...'
+
+                    // Multi-term required subquery can be decomposed only if it contains
+                    // required terms and doesn't contain prohibited terms:
+                    // ... +(+term1 term2 ...) ... => ... +term1 term2 ...
+                    //
+                    // Check this
+                    $hasRequired   = false;
+                    $hasProhibited = false;
+                    if ($subSigns === null) {
+                        // All subterms are required
+                        $hasRequired = true;
+                    } else {
+                        foreach ($subSigns as $sign) {
+                            if ($sign === true) {
+                                $hasRequired   = true;
+                            } else if ($sign === false) {
+                                $hasProhibited = true;
+                                break;
+                            }
+                        }
+                    }
+                    // Continue if subquery has prohibited terms or doesn't have required terms
+                    if ($hasProhibited  ||  !$hasRequired) {
+                        continue;
+                    }
+
+                    foreach ($subTerms as $termId => $term) {
+                        $terms[]        = $term;
+                        $tsigns[]       = ($subSigns === null)? true : $subSigns[$termId];
+                        $boostFactors[] = $subquery->getBoost();
+                    }
+
+                    // remove subquery from a subqueries list
+                    unset($subqueries[$id]);
+                    unset($signs[$id]);
+
+                } else { // $signs[$id] === null  ||  $signs[$id] === false
+                    // It's an optional or prohibited multi-term subquery.
+                    // Something like '... (+term1 -term2 term3 ...) ...'
+                    // or
+                    // something like '... -(+term1 -term2 term3 ...) ...'
+
+                    // Multi-term optional and required subqueries can be decomposed
+                    // only if all terms are optional.
+                    //
+                    // Check if all terms are optional.
+                    $onlyOptional = true;
+                    if ($subSigns === null) {
+                        // All subterms are required
+                        $onlyOptional = false;
+                    } else {
+                        foreach ($subSigns as $sign) {
+                            if ($sign !== null) {
+                                $onlyOptional = false;
+                                break;
+                            }
+                        }
+                    }
+
+                    // Continue if non-optional terms are presented in this multi-term subquery
+                    if (!$onlyOptional) {
+                        continue;
+                    }
+
+                    foreach ($subTerms as $termId => $term) {
+                        $terms[]  = $term;
+                        $tsigns[] = ($signs[$id] === null)? null  /* optional */ :
+                                                            false /* prohibited */;
+                        $boostFactors[] = $subquery->getBoost();
+                    }
+
+                    // remove subquery from a subqueries list
+                    unset($subqueries[$id]);
+                    unset($signs[$id]);
+                }
+            }
+        }
+
+
+        // Check, if there are no decomposed subqueries
+        if (count($terms) == 0 ) {
+            // return prepared candidate
+            return $optimizedQuery;
+        }
+
+
+        // Check, if all subqueries have been decomposed and all terms has the same boost factor
+        if (count($subqueries) == 0  &&  count(array_unique($boostFactors)) == 1) {
+            $optimizedQuery = new Zend_Search_Lucene_Search_Query_MultiTerm($terms, $tsigns);
+            $optimizedQuery->setBoost(reset($boostFactors)*$this->getBoost());
+
+            return $optimizedQuery;
+        }
+
+
+        // This boolean query can't be transformed to Term/MultiTerm query and still contains
+        // several subqueries
+
+        // Separate prohibited terms
+        $prohibitedTerms        = array();
+        foreach ($terms as $id => $term) {
+            if ($tsigns[$id] === false) {
+                $prohibitedTerms[]        = $term;
+
+                unset($terms[$id]);
+                unset($tsigns[$id]);
+                unset($boostFactors[$id]);
+            }
+        }
+
+        if (count($terms) == 1) {
+            $clause = new Zend_Search_Lucene_Search_Query_Term(reset($terms));
+            $clause->setBoost(reset($boostFactors));
+
+            $subqueries[] = $clause;
+            $signs[]      = reset($tsigns);
+
+            // Clear terms list
+            $terms = array();
+        } else if (count($terms) > 1  &&  count(array_unique($boostFactors)) == 1) {
+            $clause = new Zend_Search_Lucene_Search_Query_MultiTerm($terms, $tsigns);
+            $clause->setBoost(reset($boostFactors));
+
+            $subqueries[] = $clause;
+            // Clause sign is 'required' if clause contains required terms. 'Optional' otherwise.
+            $signs[]      = (in_array(true, $tsigns))? true : null;
+
+            // Clear terms list
+            $terms = array();
+        }
+
+        if (count($prohibitedTerms) == 1) {
+            // (boost factors are not significant for prohibited clauses)
+            $subqueries[] = new Zend_Search_Lucene_Search_Query_Term(reset($prohibitedTerms));
+            $signs[]      = false;
+
+            // Clear prohibited terms list
+            $prohibitedTerms = array();
+        } else if (count($prohibitedTerms) > 1) {
+            // prepare signs array
+            $prohibitedSigns = array();
+            foreach ($prohibitedTerms as $id => $term) {
+                // all prohibited term are grouped as optional into multi-term query
+                $prohibitedSigns[$id] = null;
+            }
+
+            // (boost factors are not significant for prohibited clauses)
+            $subqueries[] = new Zend_Search_Lucene_Search_Query_MultiTerm($prohibitedTerms, $prohibitedSigns);
+            // Clause sign is 'prohibited'
+            $signs[]      = false;
+
+            // Clear terms list
+            $prohibitedTerms = array();
+        }
+
+        /** @todo Group terms with the same boost factors together */
+
+        // Check, that all terms are processed
+        // Replace candidate for optimized query
+        if (count($terms) == 0  &&  count($prohibitedTerms) == 0) {
+            $optimizedQuery = new Zend_Search_Lucene_Search_Query_Boolean($subqueries, $signs);
+            $optimizedQuery->setBoost($this->getBoost());
+        }
+
+        return $optimizedQuery;
+    }
+
+    /**
+     * Returns subqueries
+     *
+     * @return array
+     */
+    public function getSubqueries()
+    {
+        return $this->_subqueries;
+    }
+
+
+    /**
+     * Return subqueries signs
+     *
+     * @return array
+     */
+    public function getSigns()
+    {
+        return $this->_signs;
+    }
+
+
+    /**
+     * Constructs an appropriate Weight implementation for this query.
+     *
+     * @param Zend_Search_Lucene_Interface $reader
+     * @return Zend_Search_Lucene_Search_Weight
+     */
+    public function createWeight(Zend_Search_Lucene_Interface $reader)
+    {
+        $this->_weight = new Zend_Search_Lucene_Search_Weight_Boolean($this, $reader);
+        return $this->_weight;
+    }
+
+
+    /**
+     * Calculate result vector for Conjunction query
+     * (like '<subquery1> AND <subquery2> AND <subquery3>')
+     */
+    private function _calculateConjunctionResult()
+    {
+        $this->_resVector = null;
+
+        if (count($this->_subqueries) == 0) {
+            $this->_resVector = array();
+        }
+
+        foreach ($this->_subqueries as $subquery) {
+            if($this->_resVector === null) {
+                $this->_resVector = $subquery->matchedDocs();
+            } else {
+                $this->_resVector = array_intersect_key($this->_resVector, $subquery->matchedDocs());
+            }
+
+            if (count($this->_resVector) == 0) {
+                // Empty result set, we don't need to check other terms
+                break;
+            }
+        }
+
+        ksort($this->_resVector, SORT_NUMERIC);
+    }
+
+
+    /**
+     * Calculate result vector for non Conjunction query
+     * (like '<subquery1> AND <subquery2> AND NOT <subquery3> OR <subquery4>')
+     */
+    private function _calculateNonConjunctionResult()
+    {
+        $required   = null;
+        $optional   = array();
+
+        foreach ($this->_subqueries as $subqueryId => $subquery) {
+            $docs = $subquery->matchedDocs();
+
+            if ($this->_signs[$subqueryId] === true) {
+                // required
+                if ($required !== null) {
+                    // array intersection
+                    $required = array_intersect_key($required, $docs);
+                } else {
+                    $required = $docs;
+                }
+            } elseif ($this->_signs[$subqueryId] === false) {
+                // prohibited
+                // Do nothing. matchedDocs() may include non-matching id's
+            } else {
+                // neither required, nor prohibited
+                // array union
+                $optional += $docs;
+            }
+        }
+
+        if ($required !== null) {
+            $this->_resVector = &$required;
+        } else {
+            $this->_resVector = &$optional;
+        }
+
+        ksort($this->_resVector, SORT_NUMERIC);
+    }
+
+
+    /**
+     * Score calculator for conjunction queries (all subqueries are required)
+     *
+     * @param integer $docId
+     * @param Zend_Search_Lucene_Interface $reader
+     * @return float
+     */
+    public function _conjunctionScore($docId, Zend_Search_Lucene_Interface $reader)
+    {
+        if ($this->_coord === null) {
+            $this->_coord = $reader->getSimilarity()->coord(count($this->_subqueries),
+                                                            count($this->_subqueries) );
+        }
+
+        $score = 0;
+
+        foreach ($this->_subqueries as $subquery) {
+            $subscore = $subquery->score($docId, $reader);
+
+            if ($subscore == 0) {
+                return 0;
+            }
+
+            $score += $subquery->score($docId, $reader) * $this->_coord;
+        }
+
+        return $score * $this->_coord * $this->getBoost();
+    }
+
+
+    /**
+     * Score calculator for non conjunction queries (not all subqueries are required)
+     *
+     * @param integer $docId
+     * @param Zend_Search_Lucene_Interface $reader
+     * @return float
+     */
+    public function _nonConjunctionScore($docId, Zend_Search_Lucene_Interface $reader)
+    {
+        if ($this->_coord === null) {
+            $this->_coord = array();
+
+            $maxCoord = 0;
+            foreach ($this->_signs as $sign) {
+                if ($sign !== false /* not prohibited */) {
+                    $maxCoord++;
+                }
+            }
+
+            for ($count = 0; $count <= $maxCoord; $count++) {
+                $this->_coord[$count] = $reader->getSimilarity()->coord($count, $maxCoord);
+            }
+        }
+
+        $score = 0;
+        $matchedSubqueries = 0;
+        foreach ($this->_subqueries as $subqueryId => $subquery) {
+            $subscore = $subquery->score($docId, $reader);
+
+            // Prohibited
+            if ($this->_signs[$subqueryId] === false && $subscore != 0) {
+                return 0;
+            }
+
+            // is required, but doen't match
+            if ($this->_signs[$subqueryId] === true &&  $subscore == 0) {
+                return 0;
+            }
+
+            if ($subscore != 0) {
+                $matchedSubqueries++;
+                $score += $subscore;
+            }
+        }
+
+        return $score * $this->_coord[$matchedSubqueries] * $this->getBoost();
+    }
+
+    /**
+     * Execute query in context of index reader
+     * It also initializes necessary internal structures
+     *
+     * @param Zend_Search_Lucene_Interface $reader
+     */
+    public function execute(Zend_Search_Lucene_Interface $reader)
+    {
+        // Initialize weight if it's not done yet
+        $this->_initWeight($reader);
+
+        foreach ($this->_subqueries as $subquery) {
+            $subquery->execute($reader);
+        }
+
+        if ($this->_signs === null) {
+            $this->_calculateConjunctionResult();
+        } else {
+            $this->_calculateNonConjunctionResult();
+        }
+    }
+
+
+
+    /**
+     * Get document ids likely matching the query
+     *
+     * It's an array with document ids as keys (performance considerations)
+     *
+     * @return array
+     */
+    public function matchedDocs()
+    {
+        return $this->_resVector;
+    }
+
+    /**
+     * Score specified document
+     *
+     * @param integer $docId
+     * @param Zend_Search_Lucene_Interface $reader
+     * @return float
+     */
+    public function score($docId, Zend_Search_Lucene_Interface $reader)
+    {
+        if (isset($this->_resVector[$docId])) {
+            if ($this->_signs === null) {
+                return $this->_conjunctionScore($docId, $reader);
+            } else {
+                return $this->_nonConjunctionScore($docId, $reader);
+            }
+        } else {
+            return 0;
+        }
+    }
+
+    /**
+     * Return query terms
+     *
+     * @return array
+     */
+    public function getQueryTerms()
+    {
+        $terms = array();
+
+        foreach ($this->_subqueries as $id => $subquery) {
+            if ($this->_signs === null  ||  $this->_signs[$id] !== false) {
+                $terms = array_merge($terms, $subquery->getQueryTerms());
+            }
+        }
+
+        return $terms;
+    }
+
+    /**
+     * Highlight query terms
+     *
+     * @param integer &$colorIndex
+     * @param Zend_Search_Lucene_Document_Html $doc
+     */
+    public function highlightMatchesDOM(Zend_Search_Lucene_Document_Html $doc, &$colorIndex)
+    {
+        foreach ($this->_subqueries as $id => $subquery) {
+            if ($this->_signs === null  ||  $this->_signs[$id] !== false) {
+                $subquery->highlightMatchesDOM($doc, $colorIndex);
+            }
+        }
+    }
+
+    /**
+     * Print a query
+     *
+     * @return string
+     */
+    public function __toString()
+    {
+        // It's used only for query visualisation, so we don't care about characters escaping
+
+        $query = '';
+
+        foreach ($this->_subqueries as $id => $subquery) {
+            if ($id != 0) {
+                $query .= ' ';
+            }
+
+            if ($this->_signs === null || $this->_signs[$id] === true) {
+                $query .= '+';
+            } else if ($this->_signs[$id] === false) {
+                $query .= '-';
+            }
+
+            $query .= '(' . $subquery->__toString() . ')';
+
+            if ($subquery->getBoost() != 1) {
+                $query .= '^' . $subquery->getBoost();
+            }
+        }
+
+        return $query;
+    }
+}
+
diff --git a/search/Zend/Search/Lucene/Search/Query/Empty.php b/search/Zend/Search/Lucene/Search/Query/Empty.php
new file mode 100644 (file)
index 0000000..738e332
--- /dev/null
@@ -0,0 +1,139 @@
+<?php
+/**
+ * Zend Framework
+ *
+ * LICENSE
+ *
+ * This source file is subject to the new BSD license that is bundled
+ * with this package in the file LICENSE.txt.
+ * It is also available through the world-wide-web at this URL:
+ * http://framework.zend.com/license/new-bsd
+ * If you did not receive a copy of the license and are unable to
+ * obtain it through the world-wide-web, please send an email
+ * to license@zend.com so we can send you a copy immediately.
+ *
+ * @category   Zend
+ * @package    Zend_Search_Lucene
+ * @subpackage Search
+ * @copyright  Copyright (c) 2005-2007 Zend Technologies USA Inc. (http://www.zend.com)
+ * @license    http://framework.zend.com/license/new-bsd     New BSD License
+ */
+
+
+/** Zend_Search_Lucene_Search_Query */
+require_once $CFG->dirroot.'/search/Zend/Search/Lucene/Search/Query.php';
+
+/** Zend_Search_Lucene_Search_Weight_Empty */
+require_once $CFG->dirroot.'/search/Zend/Search/Lucene/Search/Weight/Empty.php';
+
+
+/**
+ * @category   Zend
+ * @package    Zend_Search_Lucene
+ * @subpackage Search
+ * @copyright  Copyright (c) 2005-2007 Zend Technologies USA Inc. (http://www.zend.com)
+ * @license    http://framework.zend.com/license/new-bsd     New BSD License
+ */
+class Zend_Search_Lucene_Search_Query_Empty extends Zend_Search_Lucene_Search_Query
+{
+    /**
+     * Re-write query into primitive queries in the context of specified index
+     *
+     * @param Zend_Search_Lucene_Interface $index
+     * @return Zend_Search_Lucene_Search_Query
+     */
+    public function rewrite(Zend_Search_Lucene_Interface $index)
+    {
+        return $this;
+    }
+
+    /**
+     * Optimize query in the context of specified index
+     *
+     * @param Zend_Search_Lucene_Interface $index
+     * @return Zend_Search_Lucene_Search_Query
+     */
+    public function optimize(Zend_Search_Lucene_Interface $index)
+    {
+        // "Empty" query is a primitive query and don't need to be optimized
+        return $this;
+    }
+
+    /**
+     * Constructs an appropriate Weight implementation for this query.
+     *
+     * @param Zend_Search_Lucene_Interface $reader
+     * @return Zend_Search_Lucene_Search_Weight
+     */
+    public function createWeight(Zend_Search_Lucene_Interface $reader)
+    {
+        return new Zend_Search_Lucene_Search_Weight_Empty();
+    }
+
+    /**
+     * Execute query in context of index reader
+     * It also initializes necessary internal structures
+     *
+     * @param Zend_Search_Lucene_Interface $reader
+     */
+    public function execute(Zend_Search_Lucene_Interface $reader)
+    {
+        // Do nothing
+    }
+
+    /**
+     * Get document ids likely matching the query
+     *
+     * It's an array with document ids as keys (performance considerations)
+     *
+     * @return array
+     */
+    public function matchedDocs()
+    {
+        return array();
+    }
+
+    /**
+     * Score specified document
+     *
+     * @param integer $docId
+     * @param Zend_Search_Lucene_Interface $reader
+     * @return float
+     */
+    public function score($docId, Zend_Search_Lucene_Interface $reader)
+    {
+        return 0;
+    }
+
+    /**
+     * Return query terms
+     *
+     * @return array
+     */
+    public function getQueryTerms()
+    {
+        return array();
+    }
+
+    /**
+     * Highlight query terms
+     *
+     * @param integer &$colorIndex
+     * @param Zend_Search_Lucene_Document_Html $doc
+     */
+    public function highlightMatchesDOM(Zend_Search_Lucene_Document_Html $doc, &$colorIndex)
+    {
+        // Do nothing
+    }
+
+    /**
+     * Print a query
+     *
+     * @return string
+     */
+    public function __toString()
+    {
+        return '<EmptyQuery>';
+    }
+}
+
index d3ec761bc68c33a4e7c8351ce5fc1b19da939beb..9258279a0037bfeaf16fde5156cf0b9330e549e5 100644 (file)
  * @category   Zend
  * @package    Zend_Search_Lucene
  * @subpackage Search
- * @copyright  Copyright (c) 2006 Zend Technologies USA Inc. (http://www.zend.com)
+ * @copyright  Copyright (c) 2005-2007 Zend Technologies USA Inc. (http://www.zend.com)
  * @license    http://framework.zend.com/license/new-bsd     New BSD License
  */
 
 
 /** Zend_Search_Lucene_Search_Query */
-require_once 'Zend/Search/Lucene/Search/Query.php';
+require_once $CFG->dirroot.'/search/Zend/Search/Lucene/Search/Query.php';
 
 /** Zend_Search_Lucene_Search_Weight_MultiTerm */
-require_once 'Zend/Search/Lucene/Search/Weight/MultiTerm.php';
+require_once $CFG->dirroot.'/search/Zend/Search/Lucene/Search/Weight/MultiTerm.php';
 
 
 /**
  * @category   Zend
  * @package    Zend_Search_Lucene
  * @subpackage Search
- * @copyright  Copyright (c) 2006 Zend Technologies USA Inc. (http://www.zend.com)
+ * @copyright  Copyright (c) 2005-2007 Zend Technologies USA Inc. (http://www.zend.com)
  * @license    http://framework.zend.com/license/new-bsd     New BSD License
  */
 class Zend_Search_Lucene_Search_Query_MultiTerm extends Zend_Search_Lucene_Search_Query
@@ -55,27 +55,24 @@ class Zend_Search_Lucene_Search_Query_MultiTerm extends Zend_Search_Lucene_Searc
      *
      * @var array
      */
-
-    private $_signs = array();
+    private $_signs;
 
     /**
      * Result vector.
-     * Bitset or array of document IDs
-     * (depending from Bitset extension availability).
      *
-     * @var mixed
+     * @var array
      */
     private $_resVector = null;
 
     /**
      * Terms positions vectors.
      * Array of Arrays:
-     * term1Id => (docId => array( pos1, pos2, ... ), ...)
-     * term2Id => (docId => array( pos1, pos2, ... ), ...)
+     * term1Id => (docId => freq, ...)
+     * term2Id => (docId => freq, ...)
      *
      * @var array
      */
-    private $_termsPositions = array();
+    private $_termsFreqs = array();
 
 
     /**
@@ -101,15 +98,15 @@ class Zend_Search_Lucene_Search_Query_MultiTerm extends Zend_Search_Lucene_Searc
     /**
      * Class constructor.  Create a new multi-term query object.
      *
+     * if $signs array is omitted then all terms are required
+     * it differs from addTerm() behavior, but should never be used
+     *
      * @param array $terms    Array of Zend_Search_Lucene_Index_Term objects
      * @param array $signs    Array of signs.  Sign is boolean|null.
      * @return void
      */
     public function __construct($terms = null, $signs = null)
     {
-        /**
-         * @todo Check contents of $terms and $signs before adding them.
-         */
         if (is_array($terms)) {
             $this->_terms = $terms;
 
@@ -119,7 +116,7 @@ class Zend_Search_Lucene_Search_Query_MultiTerm extends Zend_Search_Lucene_Searc
                 foreach ($signs as $sign ) {
                     if ($sign !== true) {
                         $this->_signs = $signs;
-                        continue;
+                        break;
                     }
                 }
             }
@@ -139,25 +136,122 @@ class Zend_Search_Lucene_Search_Query_MultiTerm extends Zend_Search_Lucene_Searc
      * @param  boolean|null $sign
      * @return void
      */
-    public function addTerm(Zend_Search_Lucene_Index_Term $term, $sign=null) {
+    public function addTerm(Zend_Search_Lucene_Index_Term $term, $sign = null) {
+        if ($sign !== true || $this->_signs !== null) {       // Skip, if all terms are required
+            if ($this->_signs === null) {                     // Check, If all previous terms are required
+                foreach ($this->_terms as $prevTerm) {
+                    $this->_signs[] = true;
+                }
+            }
+            $this->_signs[] = $sign;
+        }
+
         $this->_terms[] = $term;
+    }
 
-        /**
-         * @todo This is not good.  Sometimes $this->_signs is an array, sometimes
-         * it is null, even when there are terms.  It will be changed so that
-         * it is always an array.
-         */
-        if ($this->_signs === null) {
-            if ($sign !== null) {
-                $this->_signs = array();
-                foreach ($this->_terms as $term) {
-                    $this->_signs[] = null;
+
+    /**
+     * Re-write query into primitive queries in the context of specified index
+     *
+     * @param Zend_Search_Lucene_Interface $index
+     * @return Zend_Search_Lucene_Search_Query
+     */
+    public function rewrite(Zend_Search_Lucene_Interface $index)
+    {
+        if (count($this->_terms) == 0) {
+            return new Zend_Search_Lucene_Search_Query_Empty();
+        }
+
+        // Check, that all fields are qualified
+        $allQualified = true;
+        foreach ($this->_terms as $term) {
+            if ($term->field === null) {
+                $allQualified = false;
+                break;
+            }
+        }
+
+        if ($allQualified) {
+            return $this;
+        } else {
+            /** transform multiterm query to boolean and apply rewrite() method to subqueries. */
+            $query = new Zend_Search_Lucene_Search_Query_Boolean();
+            $query->setBoost($this->getBoost());
+
+            foreach ($this->_terms as $termId => $term) {
+                $subquery = new Zend_Search_Lucene_Search_Query_Term($term);
+
+                $query->addSubquery($subquery->rewrite($index),
+                                    ($this->_signs === null)?  true : $this->_signs[$termId]);
+            }
+
+            return $query;
+        }
+    }
+
+    /**
+     * Optimize query in the context of specified index
+     *
+     * @param Zend_Search_Lucene_Interface $index
+     * @return Zend_Search_Lucene_Search_Query
+     */
+    public function optimize(Zend_Search_Lucene_Interface $index)
+    {
+        $terms = $this->_terms;
+        $signs = $this->_signs;
+
+        foreach ($terms as $id => $term) {
+            if (!$index->hasTerm($term)) {
+                if ($signs === null  ||  $signs[$id] === true) {
+                    // Term is required
+                    return new Zend_Search_Lucene_Search_Query_Empty();
+                } else {
+                    // Term is optional or prohibited
+                    // Remove it from terms and signs list
+                    unset($terms[$id]);
+                    unset($signs[$id]);
                 }
-                $this->_signs[] = $sign;
             }
+        }
+
+        // Check if all presented terms are prohibited
+        $allProhibited = true;
+        if ($signs === null) {
+            $allProhibited = false;
         } else {
-            $this->_signs[] = $sign;
+            foreach ($signs as $sign) {
+                if ($sign !== false) {
+                    $allProhibited = false;
+                    break;
+                }
+            }
+        }
+        if ($allProhibited) {
+            return new Zend_Search_Lucene_Search_Query_Empty();
+        }
+
+        /**
+         * @todo make an optimization for repeated terms
+         * (they may have different signs)
+         */
+
+        if (count($terms) == 1) {
+            // It's already checked, that it's not a prohibited term
+
+            // It's one term query with one required or optional element
+            $optimizedQuery = new Zend_Search_Lucene_Search_Query_Term(reset($terms));
+            $optimizedQuery->setBoost($this->getBoost());
+
+            return $optimizedQuery;
         }
+
+        if (count($terms) == 0) {
+            return new Zend_Search_Lucene_Search_Query_Empty();
+        }
+
+        $optimizedQuery = new Zend_Search_Lucene_Search_Query_MultiTerm($terms, $signs);
+        $optimizedQuery->setBoost($this->getBoost());
+        return $optimizedQuery;
     }
 
 
@@ -198,12 +292,13 @@ class Zend_Search_Lucene_Search_Query_MultiTerm extends Zend_Search_Lucene_Searc
     /**
      * Constructs an appropriate Weight implementation for this query.
      *
-     * @param Zend_Search_Lucene $reader
+     * @param Zend_Search_Lucene_Interface $reader
      * @return Zend_Search_Lucene_Search_Weight
      */
-    protected function _createWeight($reader)
+    public function createWeight(Zend_Search_Lucene_Interface $reader)
     {
-        return new Zend_Search_Lucene_Search_Weight_MultiTerm($this, $reader);
+        $this->_weight = new Zend_Search_Lucene_Search_Weight_MultiTerm($this, $reader);
+        return $this->_weight;
     }
 
 
@@ -211,38 +306,32 @@ class Zend_Search_Lucene_Search_Query_MultiTerm extends Zend_Search_Lucene_Searc
      * Calculate result vector for Conjunction query
      * (like '+something +another')
      *
-     * @param Zend_Search_Lucene $reader
+     * @param Zend_Search_Lucene_Interface $reader
      */
-    private function _calculateConjunctionResult($reader)
+    private function _calculateConjunctionResult(Zend_Search_Lucene_Interface $reader)
     {
-        if (extension_loaded('bitset')) {
-            foreach( $this->_terms as $termId=>$term ) {
-                if($this->_resVector === null) {
-                    $this->_resVector = bitset_from_array($reader->termDocs($term));
-                } else {
-                    $this->_resVector = bitset_intersection(
-                                $this->_resVector,
-                                bitset_from_array($reader->termDocs($term)) );
-                }
+        $this->_resVector = null;
 
-                $this->_termsPositions[$termId] = $reader->termPositions($term);
+        if (count($this->_terms) == 0) {
+            $this->_resVector = array();
+        }
+
+        foreach( $this->_terms as $termId=>$term ) {
+            if($this->_resVector === null) {
+                $this->_resVector = array_flip($reader->termDocs($term));
+            } else {
+                $this->_resVector = array_intersect_key($this->_resVector, array_flip($reader->termDocs($term)));
             }
-        } else {
-            foreach( $this->_terms as $termId=>$term ) {
-                if($this->_resVector === null) {
-                    $this->_resVector = array_flip($reader->termDocs($term));
-                } else {
-                    $termDocs = array_flip($reader->termDocs($term));
-                    foreach($this->_resVector as $key=>$value) {
-                        if (!isset( $termDocs[$key] )) {
-                            unset( $this->_resVector[$key] );
-                        }
-                    }
-                }
 
-                $this->_termsPositions[$termId] = $reader->termPositions($term);
+            if (count($this->_resVector) == 0) {
+                // Empty result set, we don't need to check other terms
+                break;
             }
+
+            $this->_termsFreqs[$termId] = $reader->termFreqs($term);
         }
+
+        ksort($this->_resVector, SORT_NUMERIC);
     }
 
 
@@ -250,89 +339,49 @@ class Zend_Search_Lucene_Search_Query_MultiTerm extends Zend_Search_Lucene_Searc
      * Calculate result vector for non Conjunction query
      * (like '+something -another')
      *
-     * @param Zend_Search_Lucene $reader
+     * @param Zend_Search_Lucene_Interface $reader
      */
-    private function _calculateNonConjunctionResult($reader)
+    private function _calculateNonConjunctionResult(Zend_Search_Lucene_Interface $reader)
     {
-        if (extension_loaded('bitset')) {
-            $required   = null;
-            $neither    = bitset_empty();
-            $prohibited = bitset_empty();
-
-            foreach ($this->_terms as $termId => $term) {
-                $termDocs = bitset_from_array($reader->termDocs($term));
-
-                if ($this->_signs[$termId] === true) {
-                    // required
-                    if ($required !== null) {
-                        $required = bitset_intersection($required, $termDocs);
-                    } else {
-                        $required = $termDocs;
-                    }
-                } elseif ($this->_signs[$termId] === false) {
-                    // prohibited
-                    $prohibited = bitset_union($prohibited, $termDocs);
+        $required   = null;
+        $optional   = array();
+        $prohibited = array();
+
+        foreach ($this->_terms as $termId => $term) {
+            $termDocs = array_flip($reader->termDocs($term));
+
+            if ($this->_signs[$termId] === true) {
+                // required
+                if ($required !== null) {
+                    // array intersection
+                    $required = array_intersect_key($required, $termDocs);
                 } else {
-                    // neither required, nor prohibited
-                    $neither = bitset_union($neither, $termDocs);
+                    $required = $termDocs;
                 }
-
-                $this->_termsPositions[$termId] = $reader->termPositions($term);
-            }
-
-            if ($required === null) {
-                $required = $neither;
-            }
-            $this->_resVector = bitset_intersection( $required,
-                                                     bitset_invert($prohibited, $reader->count()) );
-        } else {
-            $required   = null;
-            $neither    = array();
-            $prohibited = array();
-
-            foreach ($this->_terms as $termId => $term) {
-                $termDocs = array_flip($reader->termDocs($term));
-
-                if ($this->_signs[$termId] === true) {
-                    // required
-                    if ($required !== null) {
-                        // substitute for bitset_intersection
-                        foreach ($required as $key => $value) {
-                            if (!isset( $termDocs[$key] )) {
-                                unset($required[$key]);
-                            }
-                        }
-                    } else {
-                        $required = $termDocs;
-                    }
-                } elseif ($this->_signs[$termId] === false) {
-                    // prohibited
-                    // substitute for bitset_union
-                    foreach ($termDocs as $key => $value) {
-                        $prohibited[$key] = $value;
-                    }
-                } else {
-                    // neither required, nor prohibited
-                    // substitute for bitset_union
-                    foreach ($termDocs as $key => $value) {
-                        $neither[$key] = $value;
-                    }
-                }
-
-                $this->_termsPositions[$termId] = $reader->termPositions($term);
+            } elseif ($this->_signs[$termId] === false) {
+                // prohibited
+                // array union
+                $prohibited += $termDocs;
+            } else {
+                // neither required, nor prohibited
+                // array union
+                $optional += $termDocs;
             }
 
-            if ($required === null) {
-                $required = $neither;
-            }
+            $this->_termsFreqs[$termId] = $reader->termFreqs($term);
+        }
 
-            foreach ($required as $key=>$value) {
-                if (isset( $prohibited[$key] )) {
-                    unset($required[$key]);
-                }
-            }
-            $this->_resVector = $required;
+        if ($required !== null) {
+            $this->_resVector = (count($prohibited) > 0) ?
+                                           array_diff_key($required, $prohibited) :
+                                           $required;
+        } else {
+            $this->_resVector = (count($prohibited) > 0) ?
+                                           array_diff_key($optional, $prohibited) :
+                                           $optional;
         }
+
+        ksort($this->_resVector, SORT_NUMERIC);
     }
 
 
@@ -340,10 +389,10 @@ class Zend_Search_Lucene_Search_Query_MultiTerm extends Zend_Search_Lucene_Searc
      * Score calculator for conjunction queries (all terms are required)
      *
      * @param integer $docId
-     * @param Zend_Search_Lucene $reader
+     * @param Zend_Search_Lucene_Interface $reader
      * @return float
      */
-    public function _conjunctionScore($docId, $reader)
+    public function _conjunctionScore($docId, Zend_Search_Lucene_Interface $reader)
     {
         if ($this->_coord === null) {
             $this->_coord = $reader->getSimilarity()->coord(count($this->_terms),
@@ -353,12 +402,16 @@ class Zend_Search_Lucene_Search_Query_MultiTerm extends Zend_Search_Lucene_Searc
         $score = 0.0;
 
         foreach ($this->_terms as $termId=>$term) {
-            $score += $reader->getSimilarity()->tf(count($this->_termsPositions[$termId][$docId]) ) *
+            /**
+             * We don't need to check that term freq is not 0
+             * Score calculation is performed only for matched docs
+             */
+            $score += $reader->getSimilarity()->tf($this->_termsFreqs[$termId][$docId]) *
                       $this->_weights[$termId]->getValue() *
                       $reader->norm($docId, $term->field);
         }
 
-        return $score * $this->_coord;
+        return $score * $this->_coord * $this->getBoost();
     }
 
 
@@ -366,7 +419,7 @@ class Zend_Search_Lucene_Search_Query_MultiTerm extends Zend_Search_Lucene_Searc
      * Score calculator for non conjunction queries (not all terms are required)
      *
      * @param integer $docId
-     * @param Zend_Search_Lucene $reader
+     * @param Zend_Search_Lucene_Interface $reader
      * @return float
      */
     public function _nonConjunctionScore($docId, $reader)
@@ -390,50 +443,155 @@ class Zend_Search_Lucene_Search_Query_MultiTerm extends Zend_Search_Lucene_Searc
         $matchedTerms = 0;
         foreach ($this->_terms as $termId=>$term) {
             // Check if term is
-            if ($this->_signs[$termId] !== false &&            // not prohibited
-                isset($this->_termsPositions[$termId][$docId]) // matched
+            if ($this->_signs[$termId] !== false &&        // not prohibited
+                isset($this->_termsFreqs[$termId][$docId]) // matched
                ) {
                 $matchedTerms++;
+
+                /**
+                 * We don't need to check that term freq is not 0
+                 * Score calculation is performed only for matched docs
+                 */
                 $score +=
-                      $reader->getSimilarity()->tf(count($this->_termsPositions[$termId][$docId]) ) *
+                      $reader->getSimilarity()->tf($this->_termsFreqs[$termId][$docId]) *
                       $this->_weights[$termId]->getValue() *
                       $reader->norm($docId, $term->field);
             }
         }
 
-        return $score * $this->_coord[$matchedTerms];
+        return $score * $this->_coord[$matchedTerms] * $this->getBoost();
+    }
+
+    /**
+     * Execute query in context of index reader
+     * It also initializes necessary internal structures
+     *
+     * @param Zend_Search_Lucene_Interface $reader
+     */
+    public function execute(Zend_Search_Lucene_Interface $reader)
+    {
+        if ($this->_signs === null) {
+            $this->_calculateConjunctionResult($reader);
+        } else {
+            $this->_calculateNonConjunctionResult($reader);
+        }
+
+        // Initialize weight if it's not done yet
+        $this->_initWeight($reader);
+    }
+
+    /**
+     * Get document ids likely matching the query
+     *
+     * It's an array with document ids as keys (performance considerations)
+     *
+     * @return array
+     */
+    public function matchedDocs()
+    {
+        return $this->_resVector;
     }
 
     /**
      * Score specified document
      *
      * @param integer $docId
-     * @param Zend_Search_Lucene $reader
+     * @param Zend_Search_Lucene_Interface $reader
      * @return float
      */
-    public function score($docId, $reader)
+    public function score($docId, Zend_Search_Lucene_Interface $reader)
     {
-        if($this->_resVector === null) {
+        if (isset($this->_resVector[$docId])) {
             if ($this->_signs === null) {
-                $this->_calculateConjunctionResult($reader);
+                return $this->_conjunctionScore($docId, $reader);
             } else {
-                $this->_calculateNonConjunctionResult($reader);
+                return $this->_nonConjunctionScore($docId, $reader);
             }
+        } else {
+            return 0;
+        }
+    }
+
+    /**
+     * Return query terms
+     *
+     * @return array
+     */
+    public function getQueryTerms()
+    {
+        if ($this->_signs === null) {
+            return $this->_terms;
+        }
+
+        $terms = array();
 
-            $this->_initWeight($reader);
+        foreach ($this->_signs as $id => $sign) {
+            if ($sign !== false) {
+                $terms[] = $this->_terms[$id];
+            }
         }
 
-        if ( (extension_loaded('bitset')) ?
-                bitset_in($this->_resVector, $docId) :
-                isset($this->_resVector[$docId])  ) {
-            if ($this->_signs === null) {
-                return $this->_conjunctionScore($docId, $reader);
-            } else {
-                return $this->_nonConjunctionScore($docId, $reader);
+        return $terms;
+    }
+
+    /**
+     * Highlight query terms
+     *
+     * @param integer &$colorIndex
+     * @param Zend_Search_Lucene_Document_Html $doc
+     */
+    public function highlightMatchesDOM(Zend_Search_Lucene_Document_Html $doc, &$colorIndex)
+    {
+        $words = array();
+
+        if ($this->_signs === null) {
+            foreach ($this->_terms as $term) {
+                $words[] = $term->text;
             }
         } else {
-            return 0;
+            foreach ($this->_signs as $id => $sign) {
+                if ($sign !== false) {
+                    $words[] = $this->_terms[$id]->text;
+                }
+            }
         }
+
+        $doc->highlight($words, $this->_getHighlightColor($colorIndex));
+    }
+
+    /**
+     * Print a query
+     *
+     * @return string
+     */
+    public function __toString()
+    {
+        // It's used only for query visualisation, so we don't care about characters escaping
+
+        $query = '';
+
+        foreach ($this->_terms as $id => $term) {
+            if ($id != 0) {
+                $query .= ' ';
+            }
+
+            if ($this->_signs === null || $this->_signs[$id] === true) {
+                $query .= '+';
+            } else if ($this->_signs[$id] === false) {
+                $query .= '-';
+            }
+
+            if ($term->field !== null) {
+                $query .= $term->field . ':';
+            }
+            $query .= $term->text;
+        }
+
+        if ($this->getBoost() != 1) {
+            $query = '(' . $query . ')^' . $this->getBoost();
+        }
+
+        return $query;
     }
 }
 
index b1d40b4bea4b1dcc4e3cda0d354666da47720c85..3e7b782792f35bc7ac2cfc4bc34307070136fdb0 100644 (file)
@@ -15,7 +15,7 @@
  * @category   Zend
  * @package    Zend_Search_Lucene
  * @subpackage Search
- * @copyright  Copyright (c) 2006 Zend Technologies USA Inc. (http://www.zend.com)
+ * @copyright  Copyright (c) 2005-2007 Zend Technologies USA Inc. (http://www.zend.com)
  * @license    http://framework.zend.com/license/new-bsd     New BSD License
  */
 
 /**
  * Zend_Search_Lucene_Search_Query
  */
-require_once 'Zend/Search/Lucene/Search/Query.php';
+require_once $CFG->dirroot.'/search/Zend/Search/Lucene/Search/Query.php';
 
 /**
  * Zend_Search_Lucene_Search_Weight_MultiTerm
  */
-require_once 'Zend/Search/Lucene/Search/Weight/Phrase.php';
+require_once $CFG->dirroot.'/search/Zend/Search/Lucene/Search/Weight/Phrase.php';
 
 
 /**
@@ -37,7 +37,7 @@ require_once 'Zend/Search/Lucene/Search/Weight/Phrase.php';
  * @category   Zend
  * @package    Zend_Search_Lucene
  * @subpackage Search
- * @copyright  Copyright (c) 2006 Zend Technologies USA Inc. (http://www.zend.com)
+ * @copyright  Copyright (c) 2005-2007 Zend Technologies USA Inc. (http://www.zend.com)
  * @license    http://framework.zend.com/license/new-bsd     New BSD License
  */
 class Zend_Search_Lucene_Search_Query_Phrase extends Zend_Search_Lucene_Search_Query
@@ -73,16 +73,14 @@ class Zend_Search_Lucene_Search_Query_Phrase extends Zend_Search_Lucene_Search_Q
      *
      * The slop is zero by default, requiring exact matches.
      *
-     * @var unknown_type
+     * @var integer
      */
     private $_slop;
 
     /**
      * Result vector.
-     * Bitset or array of document IDs
-     * (depending from Bitset extension availability).
      *
-     * @var mixed
+     * @var array
      */
     private $_resVector = null;
 
@@ -183,6 +181,70 @@ class Zend_Search_Lucene_Search_Query_Phrase extends Zend_Search_Lucene_Search_Q
     }
 
 
+    /**
+     * Re-write query into primitive queries in the context of specified index
+     *
+     * @param Zend_Search_Lucene_Interface $index
+     * @return Zend_Search_Lucene_Search_Query
+     */
+    public function rewrite(Zend_Search_Lucene_Interface $index)
+    {
+        if (count($this->_terms) == 0) {
+            return new Zend_Search_Lucene_Search_Query_Empty();
+        } else if ($this->_terms[0]->field !== null) {
+            return $this;
+        } else {
+            $query = new Zend_Search_Lucene_Search_Query_Boolean();
+            $query->setBoost($this->getBoost());
+
+            foreach ($index->getFieldNames(true) as $fieldName) {
+                $subquery = new Zend_Search_Lucene_Search_Query_Phrase();
+                $subquery->setSlop($this->getSlop());
+
+                foreach ($this->_terms as $termId => $term) {
+                    $qualifiedTerm = new Zend_Search_Lucene_Index_Term($term->text, $fieldName);
+
+                    $subquery->addTerm($qualifiedTerm, $this->_offsets[$termId]);
+                }
+
+                $query->addSubquery($subquery);
+            }
+
+            return $query;
+        }
+    }
+
+    /**
+     * Optimize query in the context of specified index
+     *
+     * @param Zend_Search_Lucene_Interface $index
+     * @return Zend_Search_Lucene_Search_Query
+     */
+    public function optimize(Zend_Search_Lucene_Interface $index)
+    {
+        // Check, that index contains all phrase terms
+        foreach ($this->_terms as $term) {
+            if (!$index->hasTerm($term)) {
+                return new Zend_Search_Lucene_Search_Query_Empty();
+            }
+        }
+
+        if (count($this->_terms) == 1) {
+            // It's one term query
+            $optimizedQuery = new Zend_Search_Lucene_Search_Query_Term(reset($this->_terms));
+            $optimizedQuery->setBoost($this->getBoost());
+
+            return $optimizedQuery;
+        }
+
+        if (count($this->_terms) == 0) {
+            return new Zend_Search_Lucene_Search_Query_Empty();
+        }
+
+
+        return $this;
+    }
+
     /**
      * Returns query term
      *
@@ -209,50 +271,13 @@ class Zend_Search_Lucene_Search_Query_Phrase extends Zend_Search_Lucene_Search_Q
     /**
      * Constructs an appropriate Weight implementation for this query.
      *
-     * @param Zend_Search_Lucene $reader
+     * @param Zend_Search_Lucene_Interface $reader
      * @return Zend_Search_Lucene_Search_Weight
      */
-    protected function _createWeight($reader)
+    public function createWeight(Zend_Search_Lucene_Interface $reader)
     {
-        return new Zend_Search_Lucene_Search_Weight_Phrase($this, $reader);
-    }
-
-
-    /**
-     * Calculate result vector
-     *
-     * @param Zend_Search_Lucene $reader
-     */
-    private function _calculateResult($reader)
-    {
-        if (extension_loaded('bitset')) {
-            foreach( $this->_terms as $termId=>$term ) {
-                if($this->_resVector === null) {
-                    $this->_resVector = bitset_from_array($reader->termDocs($term));
-                } else {
-                    $this->_resVector = bitset_intersection(
-                                $this->_resVector,
-                                bitset_from_array($reader->termDocs($term)) );
-                }
-
-                $this->_termsPositions[$termId] = $reader->termPositions($term);
-            }
-        } else {
-            foreach( $this->_terms as $termId=>$term ) {
-                if($this->_resVector === null) {
-                    $this->_resVector = array_flip($reader->termDocs($term));
-                } else {
-                    $termDocs = array_flip($reader->termDocs($term));
-                    foreach($this->_resVector as $key=>$value) {
-                        if (!isset( $termDocs[$key] )) {
-                            unset( $this->_resVector[$key] );
-                        }
-                    }
-                }
-
-                $this->_termsPositions[$termId] = $reader->termPositions($term);
-            }
-        }
+        $this->_weight = new Zend_Search_Lucene_Search_Weight_Phrase($this, $reader);
+        return $this->_weight;
     }
 
 
@@ -305,10 +330,10 @@ class Zend_Search_Lucene_Search_Query_Phrase extends Zend_Search_Lucene_Search_Q
      * Score calculator for sloppy phrase queries (terms sequence is fixed)
      *
      * @param integer $docId
-     * @param Zend_Search_Lucene $reader
+     * @param Zend_Search_Lucene_Interface $reader
      * @return float
      */
-    public function _sloppyPhraseFreq($docId, Zend_Search_Lucene $reader)
+    public function _sloppyPhraseFreq($docId, Zend_Search_Lucene_Interface $reader)
     {
         $freq = 0;
 
@@ -377,50 +402,141 @@ class Zend_Search_Lucene_Search_Query_Phrase extends Zend_Search_Lucene_Search_Q
         return $freq;
     }
 
-
     /**
-     * Score specified document
+     * Execute query in context of index reader
+     * It also initializes necessary internal structures
      *
-     * @param integer $docId
-     * @param Zend_Search_Lucene $reader
-     * @return float
+     * @param Zend_Search_Lucene_Interface $reader
      */
-    public function score($docId, $reader)
+    public function execute(Zend_Search_Lucene_Interface $reader)
     {
-        // optimize zero-term case
+        $this->_resVector = null;
+
         if (count($this->_terms) == 0) {
-            return 0;
+            $this->_resVector = array();
         }
 
-        if($this->_resVector === null) {
-            $this->_calculateResult($reader);
-            $this->_initWeight($reader);
+        foreach( $this->_terms as $termId=>$term ) {
+            if($this->_resVector === null) {
+                $this->_resVector = array_flip($reader->termDocs($term));
+            } else {
+                $this->_resVector = array_intersect_key($this->_resVector, array_flip($reader->termDocs($term)));
+            }
+
+            if (count($this->_resVector) == 0) {
+                // Empty result set, we don't need to check other terms
+                break;
+            }
+
+            $this->_termsPositions[$termId] = $reader->termPositions($term);
         }
 
-        if ( (extension_loaded('bitset')) ?
-                bitset_in($this->_resVector, $docId) :
-                isset($this->_resVector[$docId])  ) {
+        ksort($this->_resVector, SORT_NUMERIC);
+
+        // Initialize weight if it's not done yet
+        $this->_initWeight($reader);
+    }
+
+    /**
+     * Get document ids likely matching the query
+     *
+     * It's an array with document ids as keys (performance considerations)
+     *
+     * @return array
+     */
+    public function matchedDocs()
+    {
+        return $this->_resVector;
+    }
+
+    /**
+     * Score specified document
+     *
+     * @param integer $docId
+     * @param Zend_Search_Lucene_Interface $reader
+     * @return float
+     */
+    public function score($docId, Zend_Search_Lucene_Interface $reader)
+    {
+        if (isset($this->_resVector[$docId])) {
             if ($this->_slop == 0) {
                 $freq = $this->_exactPhraseFreq($docId);
             } else {
                 $freq = $this->_sloppyPhraseFreq($docId, $reader);
             }
 
-/*
-            return $reader->getSimilarity()->tf($freq) *
-                   $this->_weight->getValue() *
-                   $reader->norm($docId, reset($this->_terms)->field);
-*/
             if ($freq != 0) {
                 $tf = $reader->getSimilarity()->tf($freq);
                 $weight = $this->_weight->getValue();
                 $norm = $reader->norm($docId, reset($this->_terms)->field);
 
-                return $tf*$weight*$norm;
+                return $tf * $weight * $norm * $this->getBoost();
             }
+
+            // Included in result, but culculated freq is zero
+            return 0;
         } else {
             return 0;
         }
     }
+
+    /**
+     * Return query terms
+     *
+     * @return array
+     */
+    public function getQueryTerms()
+    {
+        return $this->_terms;
+    }
+
+    /**
+     * Highlight query terms
+     *
+     * @param integer &$colorIndex
+     * @param Zend_Search_Lucene_Document_Html $doc
+     */
+    public function highlightMatchesDOM(Zend_Search_Lucene_Document_Html $doc, &$colorIndex)
+    {
+        $words = array();
+        foreach ($this->_terms as $term) {
+            $words[] = $term->text;
+        }
+
+        $doc->highlight($words, $this->_getHighlightColor($colorIndex));
+    }
+
+    /**
+     * Print a query
+     *
+     * @return string
+     */
+    public function __toString()
+    {
+        // It's used only for query visualisation, so we don't care about characters escaping
+
+        $query = '';
+
+        if (isset($this->_terms[0]) && $this->_terms[0]->field !== null) {
+            $query .= $this->_terms[0]->field . ':';
+        }
+
+        $query .= '"';
+
+        foreach ($this->_terms as $id => $term) {
+            if ($id != 0) {
+                $query .= ' ';
+            }
+            $query .= $term->text;
+        }
+
+        $query .= '"';
+
+        if ($this->_slop != 0) {
+            $query .= '~' . $this->_slop;
+        }
+
+        return $query;
+    }
 }
 
index b0baf0f5aca6b93e5782fee995d0428b5304bc48..0240104e3258dbadc402d67c47a2a6bdf0ccdf82 100644 (file)
  * @category   Zend
  * @package    Zend_Search_Lucene
  * @subpackage Search
- * @copyright  Copyright (c) 2006 Zend Technologies USA Inc. (http://www.zend.com)
+ * @copyright  Copyright (c) 2005-2007 Zend Technologies USA Inc. (http://www.zend.com)
  * @license    http://framework.zend.com/license/new-bsd     New BSD License
  */
 
 
 /** Zend_Search_Lucene_Search_Query */
-require_once 'Zend/Search/Lucene/Search/Query.php';
+require_once $CFG->dirroot.'/search/Zend/Search/Lucene/Search/Query.php';
 
 /** Zend_Search_Lucene_Search_Weight_Term */
-require_once 'Zend/Search/Lucene/Search/Weight/Term.php';
+require_once $CFG->dirroot.'/search/Zend/Search/Lucene/Search/Weight/Term.php';
 
 
 /**
  * @category   Zend
  * @package    Zend_Search_Lucene
  * @subpackage Search
- * @copyright  Copyright (c) 2006 Zend Technologies USA Inc. (http://www.zend.com)
+ * @copyright  Copyright (c) 2005-2007 Zend Technologies USA Inc. (http://www.zend.com)
  * @license    http://framework.zend.com/license/new-bsd     New BSD License
  */
 class Zend_Search_Lucene_Search_Query_Term extends Zend_Search_Lucene_Search_Query
@@ -43,31 +43,20 @@ class Zend_Search_Lucene_Search_Query_Term extends Zend_Search_Lucene_Search_Que
      */
     private $_term;
 
-    /**
-     * Term sign.
-     * If true then term is required
-     * If false then term is prohibited.
-     *
-     * @var bool
-     */
-    private $_sign;
-
     /**
      * Documents vector.
-     * Bitset or array of document IDs
-     * (depending from Bitset extension availability).
      *
-     * @var mixed
+     * @var array
      */
     private $_docVector = null;
 
     /**
-     * Term positions vector.
-     * Array: docId => array( pos1, pos2, ... )
+     * Term freqs vector.
+     * array(docId => freq, ...)
      *
      * @var array
      */
-    private $_termPositions;
+    private $_termFreqs;
 
 
     /**
@@ -76,53 +65,160 @@ class Zend_Search_Lucene_Search_Query_Term extends Zend_Search_Lucene_Search_Que
      * @param Zend_Search_Lucene_Index_Term $term
      * @param boolean $sign
      */
-    public function __construct( $term, $sign = true )
+    public function __construct($term)
     {
         $this->_term = $term;
-        $this->_sign = $sign;
+    }
+
+    /**
+     * Re-write query into primitive queries in the context of specified index
+     *
+     * @param Zend_Search_Lucene_Interface $index
+     * @return Zend_Search_Lucene_Search_Query
+     */
+    public function rewrite(Zend_Search_Lucene_Interface $index)
+    {
+        if ($this->_term->field != null) {
+            return $this;
+        } else {
+            $query = new Zend_Search_Lucene_Search_Query_MultiTerm();
+            $query->setBoost($this->getBoost());
+
+            foreach ($index->getFieldNames(true) as $fieldName) {
+                $term = new Zend_Search_Lucene_Index_Term($this->_term->text, $fieldName);
+
+                $query->addTerm($term);
+            }
+
+            return $query->rewrite($index);
+        }
+    }
+
+    /**
+     * Optimize query in the context of specified index
+     *
+     * @param Zend_Search_Lucene_Interface $index
+     * @return Zend_Search_Lucene_Search_Query
+     */
+    public function optimize(Zend_Search_Lucene_Interface $index)
+    {
+        // Check, that index contains specified term
+        if (!$index->hasTerm($this->_term)) {
+            return new Zend_Search_Lucene_Search_Query_Empty();
+        }
+
+        return $this;
     }
 
 
     /**
      * Constructs an appropriate Weight implementation for this query.
      *
-     * @param Zend_Search_Lucene $reader
+     * @param Zend_Search_Lucene_Interface $reader
      * @return Zend_Search_Lucene_Search_Weight
      */
-    protected function _createWeight($reader)
+    public function createWeight(Zend_Search_Lucene_Interface $reader)
+    {
+        $this->_weight = new Zend_Search_Lucene_Search_Weight_Term($this->_term, $this, $reader);
+        return $this->_weight;
+    }
+
+    /**
+     * Execute query in context of index reader
+     * It also initializes necessary internal structures
+     *
+     * @param Zend_Search_Lucene_Interface $reader
+     */
+    public function execute(Zend_Search_Lucene_Interface $reader)
     {
-        return new Zend_Search_Lucene_Search_Weight_Term($this->_term, $this, $reader);
+        $this->_docVector = array_flip($reader->termDocs($this->_term));
+        $this->_termFreqs = $reader->termFreqs($this->_term);
+
+        // Initialize weight if it's not done yet
+        $this->_initWeight($reader);
+    }
+
+    /**
+     * Get document ids likely matching the query
+     *
+     * It's an array with document ids as keys (performance considerations)
+     *
+     * @return array
+     */
+    public function matchedDocs()
+    {
+        return $this->_docVector;
     }
 
     /**
      * Score specified document
      *
      * @param integer $docId
-     * @param Zend_Search_Lucene $reader
+     * @param Zend_Search_Lucene_Interface $reader
      * @return float
      */
-    public function score( $docId, $reader )
+    public function score($docId, Zend_Search_Lucene_Interface $reader)
     {
-        if($this->_docVector===null) {
-            if (extension_loaded('bitset')) {
-                $this->_docVector = bitset_from_array( $reader->termDocs($this->_term) );
-            } else {
-                $this->_docVector = array_flip($reader->termDocs($this->_term));
-            }
-
-            $this->_termPositions = $reader->termPositions($this->_term);
-            $this->_initWeight($reader);
-        }
-
-        $match = extension_loaded('bitset') ?  bitset_in($this->_docVector, $docId) :
-                                               isset($this->_docVector[$docId]);
-        if ($this->_sign && $match) {
-            return $reader->getSimilarity()->tf(count($this->_termPositions[$docId]) ) *
+        if (isset($this->_docVector[$docId])) {
+            return $reader->getSimilarity()->tf($this->_termFreqs[$docId]) *
                    $this->_weight->getValue() *
-                   $reader->norm($docId, $this->_term->field);
+                   $reader->norm($docId, $this->_term->field) *
+                   $this->getBoost();
         } else {
             return 0;
         }
     }
+
+    /**
+     * Return query terms
+     *
+     * @return array
+     */
+    public function getQueryTerms()
+    {
+        return array($this->_term);
+    }
+
+    /**
+     * Return query term
+     *
+     * @return Zend_Search_Lucene_Index_Term
+     */
+    public function getTerm()
+    {
+        return $this->_term;
+    }
+
+    /**
+     * Returns query term
+     *
+     * @return array
+     */
+    public function getTerms()
+    {
+        return $this->_terms;
+    }
+
+    /**
+     * Highlight query terms
+     *
+     * @param integer &$colorIndex
+     * @param Zend_Search_Lucene_Document_Html $doc
+     */
+    public function highlightMatchesDOM(Zend_Search_Lucene_Document_Html $doc, &$colorIndex)
+    {
+        $doc->highlight($this->_term->text, $this->_getHighlightColor($colorIndex));
+    }
+
+    /**
+     * Print a query
+     *
+     * @return string
+     */
+    public function __toString()
+    {
+        // It's used only for query visualisation, so we don't care about characters escaping
+        return (($this->_term->field === null)? '':$this->_term->field . ':') . $this->_term->text;
+    }
 }
 
diff --git a/search/Zend/Search/Lucene/Search/QueryEntry.php b/search/Zend/Search/Lucene/Search/QueryEntry.php
new file mode 100644 (file)
index 0000000..53777c8
--- /dev/null
@@ -0,0 +1,87 @@
+<?php
+/**
+ * Zend Framework
+ *
+ * LICENSE
+ *
+ * This source file is subject to the new BSD license that is bundled
+ * with this package in the file LICENSE.txt.
+ * It is also available through the world-wide-web at this URL:
+ * http://framework.zend.com/license/new-bsd
+ * If you did not receive a copy of the license and are unable to
+ * obtain it through the world-wide-web, please send an email
+ * to license@zend.com so we can send you a copy immediately.
+ *
+ * @category   Zend
+ * @package    Zend_Search_Lucene
+ * @subpackage Search
+ * @copyright  Copyright (c) 2005-2007 Zend Technologies USA Inc. (http://www.zend.com)
+ * @license    http://framework.zend.com/license/new-bsd     New BSD License
+ */
+
+
+/** Zend_Search_Lucene_Index_Term */
+require_once $CFG->dirroot.'/search/Zend/Search/Lucene/Index/Term.php';
+
+/** Zend_Search_Lucene_Exception */
+require_once $CFG->dirroot.'/search/Zend/Search/Lucene/Exception.php';
+
+/** Zend_Search_Lucene_Search_QueryEntry_Term */
+require_once $CFG->dirroot.'/search/Zend/Search/Lucene/Search/QueryEntry/Term.php';
+
+/** Zend_Search_Lucene_Search_QueryEntry_Phrase */
+require_once $CFG->dirroot.'/search/Zend/Search/Lucene/Search/QueryEntry/Phrase.php';
+
+/** Zend_Search_Lucene_Search_QueryEntry_Subquery */
+require_once $CFG->dirroot.'/search/Zend/Search/Lucene/Search/QueryEntry/Subquery.php';
+
+
+/** Zend_Search_Lucene_Search_QueryParserException */
+require_once $CFG->dirroot.'/search/Zend/Search/Lucene/Search/QueryParserException.php';
+
+
+/**
+ * @category   Zend
+ * @package    Zend_Search_Lucene
+ * @subpackage Search
+ * @copyright  Copyright (c) 2005-2007 Zend Technologies USA Inc. (http://www.zend.com)
+ * @license    http://framework.zend.com/license/new-bsd     New BSD License
+ */
+abstract class Zend_Search_Lucene_Search_QueryEntry
+{
+    /**
+     * Query entry boost factor
+     *
+     * @var float
+     */
+    protected $_boost = 1.0;
+
+
+    /**
+     * Process modifier ('~')
+     *
+     * @param mixed $parameter
+     */
+    abstract public function processFuzzyProximityModifier($parameter = null);
+
+
+    /**
+     * Transform entry to a subquery
+     *
+     * @param string $encoding
+     * @return Zend_Search_Lucene_Search_Query
+     */
+    abstract public function getQuery($encoding);
+
+    /**
+     * Boost query entry
+     *
+     * @param float $boostFactor
+     */
+    public function boost($boostFactor)
+    {
+        $this->_boost *= $boostFactor;
+    }
+
+
+}
diff --git a/search/Zend/Search/Lucene/Search/QueryEntry/Phrase.php b/search/Zend/Search/Lucene/Search/QueryEntry/Phrase.php
new file mode 100644 (file)
index 0000000..e90a58b
--- /dev/null
@@ -0,0 +1,147 @@
+<?php
+/**
+ * Zend Framework
+ *
+ * LICENSE
+ *
+ * This source file is subject to the new BSD license that is bundled
+ * with this package in the file LICENSE.txt.
+ * It is also available through the world-wide-web at this URL:
+ * http://framework.zend.com/license/new-bsd
+ * If you did not receive a copy of the license and are unable to
+ * obtain it through the world-wide-web, please send an email
+ * to license@zend.com so we can send you a copy immediately.
+ *
+ * @category   Zend
+ * @package    Zend_Search_Lucene
+ * @subpackage Search
+ * @copyright  Copyright (c) 2005-2007 Zend Technologies USA Inc. (http://www.zend.com)
+ * @license    http://framework.zend.com/license/new-bsd     New BSD License
+ */
+
+
+/** Zend_Search_Lucene_Index_Term */
+require_once $CFG->dirroot.'/search/Zend/Search/Lucene/Index/Term.php';
+
+/** Zend_Search_Lucene_Exception */
+require_once $CFG->dirroot.'/search/Zend/Search/Lucene/Exception.php';
+
+/** Zend_Search_Lucene_Search_QueryEntry */
+require_once $CFG->dirroot.'/search/Zend/Search/Lucene/Search/QueryEntry.php';
+
+/** Zend_Search_Lucene_Search_QueryParserException */
+require_once $CFG->dirroot.'/search/Zend/Search/Lucene/Search/QueryParserException.php';
+
+/** Zend_Search_Lucene_Analysis_Analyzer */
+require_once $CFG->dirroot.'/search/Zend/Search/Lucene/Analysis/Analyzer.php';
+
+
+
+/**
+ * @category   Zend
+ * @package    Zend_Search_Lucene
+ * @subpackage Search
+ * @copyright  Copyright (c) 2005-2007 Zend Technologies USA Inc. (http://www.zend.com)
+ * @license    http://framework.zend.com/license/new-bsd     New BSD License
+ */
+class Zend_Search_Lucene_Search_QueryEntry_Phrase extends Zend_Search_Lucene_Search_QueryEntry
+{
+    /**
+     * Phrase value
+     *
+     * @var string
+     */
+    private $_phrase;
+
+    /**
+     * Field
+     *
+     * @var string|null
+     */
+    private $_field;
+
+
+    /**
+     * Proximity phrase query
+     *
+     * @var boolean
+     */
+    private $_proximityQuery = false;
+
+    /**
+     * Words distance, used for proximiti queries
+     *
+     * @var integer
+     */
+    private $_wordsDistance = 0;
+
+
+    /**
+     * Object constractor
+     *
+     * @param string $phrase
+     * @param string $field
+     */
+    public function __construct($phrase, $field)
+    {
+        $this->_phrase = $phrase;
+        $this->_field  = $field;
+    }
+
+    /**
+     * Process modifier ('~')
+     *
+     * @param mixed $parameter
+     */
+    public function processFuzzyProximityModifier($parameter = null)
+    {
+        $this->_proximityQuery = true;
+
+        if ($parameter !== null) {
+            $this->_wordsDistance = $parameter;
+        }
+    }
+
+    /**
+     * Transform entry to a subquery
+     *
+     * @param string $encoding
+     * @return Zend_Search_Lucene_Search_Query
+     * @throws Zend_Search_Lucene_Search_QueryParserException
+     */
+    public function getQuery($encoding)
+    {
+        if (strpos($this->_phrase, '?') !== false || strpos($this->_phrase, '*') !== false) {
+            throw new Zend_Search_Lucene_Search_QueryParserException('Wildcards are only allowed in a single terms.');
+        }
+
+        $tokens = Zend_Search_Lucene_Analysis_Analyzer::getDefault()->tokenize($this->_phrase, $encoding);
+
+        if (count($tokens) == 0) {
+            return new Zend_Search_Lucene_Search_Query_Empty();
+        }
+
+        if (count($tokens) == 1) {
+            $term  = new Zend_Search_Lucene_Index_Term($tokens[0]->getTermText(), $this->_field);
+            $query = new Zend_Search_Lucene_Search_Query_Term($term);
+            $query->setBoost($this->_boost);
+
+            return $query;
+        }
+
+        //It's not empty or one term query
+        $query = new Zend_Search_Lucene_Search_Query_Phrase();
+        foreach ($tokens as $token) {
+            $term = new Zend_Search_Lucene_Index_Term($token->getTermText(), $this->_field);
+            $query->addTerm($term);
+        }
+
+        if ($this->_proximityQuery) {
+            $query->setSlop($this->_wordsDistance);
+        }
+
+        $query->setBoost($this->_boost);
+
+        return $query;
+    }
+}
diff --git a/search/Zend/Search/Lucene/Search/QueryEntry/Subquery.php b/search/Zend/Search/Lucene/Search/QueryEntry/Subquery.php
new file mode 100644 (file)
index 0000000..8d6e64f
--- /dev/null
@@ -0,0 +1,86 @@
+<?php
+/**
+ * Zend Framework
+ *
+ * LICENSE
+ *
+ * This source file is subject to the new BSD license that is bundled
+ * with this package in the file LICENSE.txt.
+ * It is also available through the world-wide-web at this URL:
+ * http://framework.zend.com/license/new-bsd
+ * If you did not receive a copy of the license and are unable to
+ * obtain it through the world-wide-web, please send an email
+ * to license@zend.com so we can send you a copy immediately.
+ *
+ * @category   Zend
+ * @package    Zend_Search_Lucene
+ * @subpackage Search
+ * @copyright  Copyright (c) 2005-2007 Zend Technologies USA Inc. (http://www.zend.com)
+ * @license    http://framework.zend.com/license/new-bsd     New BSD License
+ */
+
+
+/** Zend_Search_Lucene_Index_Term */
+require_once $CFG->dirroot.'/search/Zend/Search/Lucene/Index/Term.php';
+
+/** Zend_Search_Lucene_Exception */
+require_once $CFG->dirroot.'/search/Zend/Search/Lucene/Exception.php';
+
+/** Zend_Search_Lucene_Search_QueryEntry */
+require_once $CFG->dirroot.'/search/Zend/Search/Lucene/Search/QueryEntry.php';
+
+/** Zend_Search_Lucene_Search_QueryParserException */
+require_once $CFG->dirroot.'/search/Zend/Search/Lucene/Search/QueryParserException.php';
+
+
+/**
+ * @category   Zend
+ * @package    Zend_Search_Lucene
+ * @subpackage Search
+ * @copyright  Copyright (c) 2005-2007 Zend Technologies USA Inc. (http://www.zend.com)
+ * @license    http://framework.zend.com/license/new-bsd     New BSD License
+ */
+class Zend_Search_Lucene_Search_QueryEntry_Subquery extends Zend_Search_Lucene_Search_QueryEntry
+{
+    /**
+     * Query
+     *
+     * @var Zend_Search_Lucene_Search_Query
+     */
+    private $_query;
+
+    /**
+     * Object constractor
+     *
+     * @param Zend_Search_Lucene_Search_Query $query
+     */
+    public function __construct(Zend_Search_Lucene_Search_Query $query)
+    {
+        $this->_query = $query;
+    }
+
+    /**
+     * Process modifier ('~')
+     *
+     * @param mixed $parameter
+     * @throws Zend_Search_Lucene_Search_QueryParserException
+     */
+    public function processFuzzyProximityModifier($parameter = null)
+    {
+        throw new Zend_Search_Lucene_Search_QueryParserException('\'~\' sign must follow term or phrase');
+    }
+
+
+    /**
+     * Transform entry to a subquery
+     *
+     * @param string $encoding
+     * @return Zend_Search_Lucene_Search_Query
+     */
+    public function getQuery($encoding)
+    {
+        $this->_query->setBoost($this->_boost);
+
+        return $this->_query;
+    }
+}
diff --git a/search/Zend/Search/Lucene/Search/QueryEntry/Term.php b/search/Zend/Search/Lucene/Search/QueryEntry/Term.php
new file mode 100644 (file)
index 0000000..f9b3d97
--- /dev/null
@@ -0,0 +1,154 @@
+<?php
+/**
+ * Zend Framework
+ *
+ * LICENSE
+ *
+ * This source file is subject to the new BSD license that is bundled
+ * with this package in the file LICENSE.txt.
+ * It is also available through the world-wide-web at this URL:
+ * http://framework.zend.com/license/new-bsd
+ * If you did not receive a copy of the license and are unable to
+ * obtain it through the world-wide-web, please send an email
+ * to license@zend.com so we can send you a copy immediately.
+ *
+ * @category   Zend
+ * @package    Zend_Search_Lucene
+ * @subpackage Search
+ * @copyright  Copyright (c) 2005-2007 Zend Technologies USA Inc. (http://www.zend.com)
+ * @license    http://framework.zend.com/license/new-bsd     New BSD License
+ */
+
+
+/** Zend_Search_Lucene_Index_Term */
+require_once $CFG->dirroot.'/search/Zend/Search/Lucene/Index/Term.php';
+
+/** Zend_Search_Lucene_Exception */
+require_once $CFG->dirroot.'/search/Zend/Search/Lucene/Exception.php';
+
+/** Zend_Search_Lucene_Search_QueryEntry */
+require_once $CFG->dirroot.'/search/Zend/Search/Lucene/Search/QueryEntry.php';
+
+/** Zend_Search_Lucene_Search_QueryParserException */
+require_once $CFG->dirroot.'/search/Zend/Search/Lucene/Search/QueryParserException.php';
+
+/** Zend_Search_Lucene_Analysis_Analyzer */
+require_once $CFG->dirroot.'/search/Zend/Search/Lucene/Analysis/Analyzer.php';
+
+
+
+/**
+ * @category   Zend
+ * @package    Zend_Search_Lucene
+ * @subpackage Search
+ * @copyright  Copyright (c) 2005-2007 Zend Technologies USA Inc. (http://www.zend.com)
+ * @license    http://framework.zend.com/license/new-bsd     New BSD License
+ */
+class Zend_Search_Lucene_Search_QueryEntry_Term extends Zend_Search_Lucene_Search_QueryEntry
+{
+    /**
+     * Term value
+     *
+     * @var string
+     */
+    private $_term;
+
+    /**
+     * Field
+     *
+     * @var string|null
+     */
+    private $_field;
+
+
+    /**
+     * Fuzzy search query
+     *
+     * @var boolean
+     */
+    private $_fuzzyQuery = false;
+
+    /**
+     * Similarity
+     *
+     * @var float
+     */
+    private $_similarity = 1.;
+
+
+    /**
+     * Object constractor
+     *
+     * @param string $term
+     * @param string $field
+     */
+    public function __construct($term, $field)
+    {
+        $this->_term  = $term;
+        $this->_field = $field;
+    }
+
+    /**
+     * Process modifier ('~')
+     *
+     * @param mixed $parameter
+     */
+    public function processFuzzyProximityModifier($parameter = null)
+    {
+        $this->_fuzzyQuery = true;
+
+        if ($parameter !== null) {
+            $this->_similarity = $parameter;
+        } else {
+            $this->_similarity = 0.5;
+        }
+    }
+
+    /**
+     * Transform entry to a subquery
+     *
+     * @param string $encoding
+     * @return Zend_Search_Lucene_Search_Query
+     * @throws Zend_Search_Lucene_Search_QueryParserException
+     */
+    public function getQuery($encoding)
+    {
+        if ($this->_fuzzyQuery) {
+            throw new Zend_Search_Lucene_Search_QueryParserException('Fuzzy search is not supported yet.');
+        }
+
+        if (strpos($this->_term, '?') !== false || strpos($this->_term, '*') !== false) {
+            throw new Zend_Search_Lucene_Search_QueryParserException('Wildcard queries are not supported yet.');
+        }
+
+        $tokens = Zend_Search_Lucene_Analysis_Analyzer::getDefault()->tokenize($this->_term, $encoding);
+
+        if (count($tokens) == 0) {
+            return new Zend_Search_Lucene_Search_Query_Empty();
+        }
+
+        if (count($tokens) == 1) {
+            $term  = new Zend_Search_Lucene_Index_Term($tokens[0]->getTermText(), $this->_field);
+            $query = new Zend_Search_Lucene_Search_Query_Term($term);
+            $query->setBoost($this->_boost);
+
+            return $query;
+        }
+
+        //It's not empty or one term query
+        $query = new Zend_Search_Lucene_Search_Query_MultiTerm();
+
+        /**
+         * @todo Process $token->getPositionIncrement() to support stemming, synonyms and other
+         * analizer design features
+         */
+        foreach ($tokens as $token) {
+            $term = new Zend_Search_Lucene_Index_Term($token->getTermText(), $this->_field);
+            $query->addTerm($term, true); // all subterms are required
+        }
+
+        $query->setBoost($this->_boost);
+
+        return $query;
+    }
+}
index 19ab381fe461bdb37316a8d143eb84ddf0fb3205..12278ea4ad84a40e6bdc3313362a6193df89e525 100644 (file)
@@ -15,7 +15,7 @@
  * @category   Zend
  * @package    Zend_Search_Lucene
  * @subpackage Search
- * @copyright  Copyright (c) 2006 Zend Technologies USA Inc. (http://www.zend.com)
+ * @copyright  Copyright (c) 2005-2007 Zend Technologies USA Inc. (http://www.zend.com)
  * @license    http://framework.zend.com/license/new-bsd     New BSD License
  */
 
  * @category   Zend
  * @package    Zend_Search_Lucene
  * @subpackage Search
- * @copyright  Copyright (c) 2006 Zend Technologies USA Inc. (http://www.zend.com)
+ * @copyright  Copyright (c) 2005-2007 Zend Technologies USA Inc. (http://www.zend.com)
  * @license    http://framework.zend.com/license/new-bsd     New BSD License
  */
 class Zend_Search_Lucene_Search_QueryHit
 {
     /**
      * Object handle of the index
-     * @var Zend_Search_Lucene
+     * @var Zend_Search_Lucene_Interface
      */
     protected $_index = null;
 
@@ -55,15 +55,15 @@ class Zend_Search_Lucene_Search_QueryHit
 
 
     /**
-     * Constructor - pass object handle of Zend_Search_Lucene index that produced
+     * Constructor - pass object handle of Zend_Search_Lucene_Interface index that produced
      * the hit so the document can be retrieved easily from the hit.
      *
-     * @param Zend_Search_Lucene $index
+     * @param Zend_Search_Lucene_Interface $index
      */
 
-    public function __construct(Zend_Search_Lucene $index)
+    public function __construct(Zend_Search_Lucene_Interface $index)
     {
-        $this->_index = $index;
+        $this->_index = new Zend_Search_Lucene_Proxy($index);
     }
 
 
@@ -98,7 +98,7 @@ class Zend_Search_Lucene_Search_QueryHit
     /**
      * Return the index object for this hit
      *
-     * @return Zend_Search_Lucene
+     * @return Zend_Search_Lucene_Interface
      */
     public function getIndex()
     {
diff --git a/search/Zend/Search/Lucene/Search/QueryLexer.php b/search/Zend/Search/Lucene/Search/QueryLexer.php
new file mode 100644 (file)
index 0000000..6b72110
--- /dev/null
@@ -0,0 +1,508 @@
+<?php
+/**
+ * Zend Framework
+ *
+ * LICENSE
+ *
+ * This source file is subject to the new BSD license that is bundled
+ * with this package in the file LICENSE.txt.
+ * It is also available through the world-wide-web at this URL:
+ * http://framework.zend.com/license/new-bsd
+ * If you did not receive a copy of the license and are unable to
+ * obtain it through the world-wide-web, please send an email
+ * to license@zend.com so we can send you a copy immediately.
+ *
+ * @category   Zend
+ * @package    Zend_Search_Lucene
+ * @subpackage Search
+ * @copyright  Copyright (c) 2005-2007 Zend Technologies USA Inc. (http://www.zend.com)
+ * @license    http://framework.zend.com/license/new-bsd     New BSD License
+ */
+
+
+/** Zend_Search_Lucene_FSM */
+require_once $CFG->dirroot.'/search/Zend/Search/Lucene/FSM.php';
+
+/** Zend_Search_Lucene_Search_QueryParser */
+require_once $CFG->dirroot.'/search/Zend/Search/Lucene/Search/QueryToken.php';
+
+/** Zend_Search_Lucene_Exception */
+require_once $CFG->dirroot.'/search/Zend/Search/Lucene/Exception.php';
+
+/** Zend_Search_Lucene_Search_QueryParserException */
+require_once $CFG->dirroot.'/search/Zend/Search/Lucene/Search/QueryParserException.php';
+
+
+/**
+ * @category   Zend
+ * @package    Zend_Search_Lucene
+ * @subpackage Search
+ * @copyright  Copyright (c) 2005-2007 Zend Technologies USA Inc. (http://www.zend.com)
+ * @license    http://framework.zend.com/license/new-bsd     New BSD License
+ */
+class Zend_Search_Lucene_Search_QueryLexer extends Zend_Search_Lucene_FSM
+{
+    /** State Machine states */
+    const ST_WHITE_SPACE     = 0;
+    const ST_SYNT_LEXEME     = 1;
+    const ST_LEXEME          = 2;
+    const ST_QUOTED_LEXEME   = 3;
+    const ST_ESCAPED_CHAR    = 4;
+    const ST_ESCAPED_QCHAR   = 5;
+    const ST_LEXEME_MODIFIER = 6;
+    const ST_NUMBER          = 7;
+    const ST_MANTISSA        = 8;
+    const ST_ERROR           = 9;
+
+    /** Input symbols */
+    const IN_WHITE_SPACE     = 0;
+    const IN_SYNT_CHAR       = 1;
+    const IN_LEXEME_MODIFIER = 2;
+    const IN_ESCAPE_CHAR     = 3;
+    const IN_QUOTE           = 4;
+    const IN_DECIMAL_POINT   = 5;
+    const IN_ASCII_DIGIT     = 6;
+    const IN_CHAR            = 7;
+    const IN_MUTABLE_CHAR    = 8;
+
+    const QUERY_WHITE_SPACE_CHARS      = " \n\r\t";
+    const QUERY_SYNT_CHARS             = ':()[]{}!|&';
+    const QUERY_MUTABLE_CHARS          = '+-';
+    const QUERY_DOUBLECHARLEXEME_CHARS = '|&';
+    const QUERY_LEXEMEMODIFIER_CHARS   = '~^';
+    const QUERY_ASCIIDIGITS_CHARS      = '0123456789';
+
+    /**
+     * List of recognized lexemes
+     *
+     * @var array
+     */
+    private $_lexemes;
+
+    /**
+     * Query string (array of single- or non single-byte characters)
+     *
+     * @var array
+     */
+    private $_queryString;
+
+    /**
+     * Current position within a query string
+     * Used to create appropriate error messages
+     *
+     * @var integer
+     */
+    private $_queryStringPosition;
+
+    /**
+     * Recognized part of current lexeme
+     *
+     * @var string
+     */
+    private $_currentLexeme;
+
+    public function __construct()
+    {
+        parent::__construct( array(self::ST_WHITE_SPACE,
+                                   self::ST_SYNT_LEXEME,
+                                   self::ST_LEXEME,
+                                   self::ST_QUOTED_LEXEME,
+                                   self::ST_ESCAPED_CHAR,
+                                   self::ST_ESCAPED_QCHAR,
+                                   self::ST_LEXEME_MODIFIER,
+                                   self::ST_NUMBER,
+                                   self::ST_MANTISSA,
+                                   self::ST_ERROR),
+                             array(self::IN_WHITE_SPACE,
+                                   self::IN_SYNT_CHAR,
+                                   self::IN_MUTABLE_CHAR,
+                                   self::IN_LEXEME_MODIFIER,
+                                   self::IN_ESCAPE_CHAR,
+                                   self::IN_QUOTE,
+                                   self::IN_DECIMAL_POINT,
+                                   self::IN_ASCII_DIGIT,
+                                   self::IN_CHAR));
+
+
+        $lexemeModifierErrorAction    = new Zend_Search_Lucene_FSMAction($this, 'lexModifierErrException');
+        $quoteWithinLexemeErrorAction = new Zend_Search_Lucene_FSMAction($this, 'quoteWithinLexemeErrException');
+        $wrongNumberErrorAction       = new Zend_Search_Lucene_FSMAction($this, 'wrongNumberErrException');
+
+
+
+        $this->addRules(array( array(self::ST_WHITE_SPACE,   self::IN_WHITE_SPACE,     self::ST_WHITE_SPACE),
+                               array(self::ST_WHITE_SPACE,   self::IN_SYNT_CHAR,       self::ST_SYNT_LEXEME),
+                               array(self::ST_WHITE_SPACE,   self::IN_MUTABLE_CHAR,    self::ST_SYNT_LEXEME),
+                               array(self::ST_WHITE_SPACE,   self::IN_LEXEME_MODIFIER, self::ST_LEXEME_MODIFIER),
+                               array(self::ST_WHITE_SPACE,   self::IN_ESCAPE_CHAR,     self::ST_ESCAPED_CHAR),
+                               array(self::ST_WHITE_SPACE,   self::IN_QUOTE,           self::ST_QUOTED_LEXEME),
+                               array(self::ST_WHITE_SPACE,   self::IN_DECIMAL_POINT,   self::ST_LEXEME),
+                               array(self::ST_WHITE_SPACE,   self::IN_ASCII_DIGIT,     self::ST_LEXEME),
+                               array(self::ST_WHITE_SPACE,   self::IN_CHAR,            self::ST_LEXEME)
+                             ));
+        $this->addRules(array( array(self::ST_SYNT_LEXEME,   self::IN_WHITE_SPACE,     self::ST_WHITE_SPACE),
+                               array(self::ST_SYNT_LEXEME,   self::IN_SYNT_CHAR,       self::ST_SYNT_LEXEME),
+                               array(self::ST_SYNT_LEXEME,   self::IN_MUTABLE_CHAR,    self::ST_SYNT_LEXEME),
+                               array(self::ST_SYNT_LEXEME,   self::IN_LEXEME_MODIFIER, self::ST_LEXEME_MODIFIER),
+                               array(self::ST_SYNT_LEXEME,   self::IN_ESCAPE_CHAR,     self::ST_ESCAPED_CHAR),
+                               array(self::ST_SYNT_LEXEME,   self::IN_QUOTE,           self::ST_QUOTED_LEXEME),
+                               array(self::ST_SYNT_LEXEME,   self::IN_DECIMAL_POINT,   self::ST_LEXEME),
+                               array(self::ST_SYNT_LEXEME,   self::IN_ASCII_DIGIT,     self::ST_LEXEME),
+                               array(self::ST_SYNT_LEXEME,   self::IN_CHAR,            self::ST_LEXEME)
+                             ));
+        $this->addRules(array( array(self::ST_LEXEME,        self::IN_WHITE_SPACE,     self::ST_WHITE_SPACE),
+                               array(self::ST_LEXEME,        self::IN_SYNT_CHAR,       self::ST_SYNT_LEXEME),
+                               array(self::ST_LEXEME,        self::IN_MUTABLE_CHAR,    self::ST_LEXEME),
+                               array(self::ST_LEXEME,        self::IN_LEXEME_MODIFIER, self::ST_LEXEME_MODIFIER),
+                               array(self::ST_LEXEME,        self::IN_ESCAPE_CHAR,     self::ST_ESCAPED_CHAR),
+
+                               // IN_QUOTE     not allowed
+                               array(self::ST_LEXEME,        self::IN_QUOTE,           self::ST_ERROR, $quoteWithinLexemeErrorAction),
+
+                               array(self::ST_LEXEME,        self::IN_DECIMAL_POINT,   self::ST_LEXEME),
+                               array(self::ST_LEXEME,        self::IN_ASCII_DIGIT,     self::ST_LEXEME),
+                               array(self::ST_LEXEME,        self::IN_CHAR,            self::ST_LEXEME)
+                             ));
+        $this->addRules(array( array(self::ST_QUOTED_LEXEME, self::IN_WHITE_SPACE,     self::ST_QUOTED_LEXEME),
+                               array(self::ST_QUOTED_LEXEME, self::IN_SYNT_CHAR,       self::ST_QUOTED_LEXEME),
+                               array(self::ST_QUOTED_LEXEME, self::IN_MUTABLE_CHAR,    self::ST_QUOTED_LEXEME),
+                               array(self::ST_QUOTED_LEXEME, self::IN_LEXEME_MODIFIER, self::ST_QUOTED_LEXEME),
+                               array(self::ST_QUOTED_LEXEME, self::IN_ESCAPE_CHAR,     self::ST_ESCAPED_QCHAR),
+                               array(self::ST_QUOTED_LEXEME, self::IN_QUOTE,           self::ST_WHITE_SPACE),
+                               array(self::ST_QUOTED_LEXEME, self::IN_DECIMAL_POINT,   self::ST_QUOTED_LEXEME),
+                               array(self::ST_QUOTED_LEXEME, self::IN_ASCII_DIGIT,     self::ST_QUOTED_LEXEME),
+                               array(self::ST_QUOTED_LEXEME, self::IN_CHAR,            self::ST_QUOTED_LEXEME)
+                             ));
+        $this->addRules(array( array(self::ST_ESCAPED_CHAR,  self::IN_WHITE_SPACE,     self::ST_LEXEME),
+                               array(self::ST_ESCAPED_CHAR,  self::IN_SYNT_CHAR,       self::ST_LEXEME),
+                               array(self::ST_ESCAPED_CHAR,  self::IN_MUTABLE_CHAR,    self::ST_LEXEME),
+                               array(self::ST_ESCAPED_CHAR,  self::IN_LEXEME_MODIFIER, self::ST_LEXEME),
+                               array(self::ST_ESCAPED_CHAR,  self::IN_ESCAPE_CHAR,     self::ST_LEXEME),
+                               array(self::ST_ESCAPED_CHAR,  self::IN_QUOTE,           self::ST_LEXEME),
+                               array(self::ST_ESCAPED_CHAR,  self::IN_DECIMAL_POINT,   self::ST_LEXEME),
+                               array(self::ST_ESCAPED_CHAR,  self::IN_ASCII_DIGIT,     self::ST_LEXEME),
+                               array(self::ST_ESCAPED_CHAR,  self::IN_CHAR,            self::ST_LEXEME)
+                             ));
+        $this->addRules(array( array(self::ST_ESCAPED_QCHAR, self::IN_WHITE_SPACE,     self::ST_QUOTED_LEXEME),
+                               array(self::ST_ESCAPED_QCHAR, self::IN_SYNT_CHAR,       self::ST_QUOTED_LEXEME),
+                               array(self::ST_ESCAPED_QCHAR, self::IN_MUTABLE_CHAR,    self::ST_QUOTED_LEXEME),
+                               array(self::ST_ESCAPED_QCHAR, self::IN_LEXEME_MODIFIER, self::ST_QUOTED_LEXEME),
+                               array(self::ST_ESCAPED_QCHAR, self::IN_ESCAPE_CHAR,     self::ST_QUOTED_LEXEME),
+                               array(self::ST_ESCAPED_QCHAR, self::IN_QUOTE,           self::ST_QUOTED_LEXEME),
+                               array(self::ST_ESCAPED_QCHAR, self::IN_DECIMAL_POINT,   self::ST_QUOTED_LEXEME),
+                               array(self::ST_ESCAPED_QCHAR, self::IN_ASCII_DIGIT,     self::ST_QUOTED_LEXEME),
+                               array(self::ST_ESCAPED_QCHAR, self::IN_CHAR,            self::ST_QUOTED_LEXEME)
+                             ));
+        $this->addRules(array( array(self::ST_LEXEME_MODIFIER, self::IN_WHITE_SPACE,     self::ST_WHITE_SPACE),
+                               array(self::ST_LEXEME_MODIFIER, self::IN_SYNT_CHAR,       self::ST_SYNT_LEXEME),
+                               array(self::ST_LEXEME_MODIFIER, self::IN_MUTABLE_CHAR,    self::ST_SYNT_LEXEME),
+                               array(self::ST_LEXEME_MODIFIER, self::IN_LEXEME_MODIFIER, self::ST_LEXEME_MODIFIER),
+
+                               // IN_ESCAPE_CHAR       not allowed
+                               array(self::ST_LEXEME_MODIFIER, self::IN_ESCAPE_CHAR,     self::ST_ERROR, $lexemeModifierErrorAction),
+
+                               // IN_QUOTE             not allowed
+                               array(self::ST_LEXEME_MODIFIER, self::IN_QUOTE,           self::ST_ERROR, $lexemeModifierErrorAction),
+
+
+                               array(self::ST_LEXEME_MODIFIER, self::IN_DECIMAL_POINT,   self::ST_MANTISSA),
+                               array(self::ST_LEXEME_MODIFIER, self::IN_ASCII_DIGIT,     self::ST_NUMBER),
+
+                               // IN_CHAR              not allowed
+                               array(self::ST_LEXEME_MODIFIER, self::IN_CHAR,            self::ST_ERROR, $lexemeModifierErrorAction),
+                             ));
+        $this->addRules(array( array(self::ST_NUMBER, self::IN_WHITE_SPACE,     self::ST_WHITE_SPACE),
+                               array(self::ST_NUMBER, self::IN_SYNT_CHAR,       self::ST_SYNT_LEXEME),
+                               array(self::ST_NUMBER, self::IN_MUTABLE_CHAR,    self::ST_SYNT_LEXEME),
+                               array(self::ST_NUMBER, self::IN_LEXEME_MODIFIER, self::ST_LEXEME_MODIFIER),
+
+                               // IN_ESCAPE_CHAR       not allowed
+                               array(self::ST_NUMBER, self::IN_ESCAPE_CHAR,     self::ST_ERROR, $wrongNumberErrorAction),
+
+                               // IN_QUOTE             not allowed
+                               array(self::ST_NUMBER, self::IN_QUOTE,           self::ST_ERROR, $wrongNumberErrorAction),
+
+                               array(self::ST_NUMBER, self::IN_DECIMAL_POINT,   self::ST_MANTISSA),
+                               array(self::ST_NUMBER, self::IN_ASCII_DIGIT,     self::ST_NUMBER),
+
+                               // IN_CHAR              not allowed
+                               array(self::ST_NUMBER, self::IN_CHAR,            self::ST_ERROR, $wrongNumberErrorAction),
+                             ));
+        $this->addRules(array( array(self::ST_MANTISSA, self::IN_WHITE_SPACE,     self::ST_WHITE_SPACE),
+                               array(self::ST_MANTISSA, self::IN_SYNT_CHAR,       self::ST_SYNT_LEXEME),
+                               array(self::ST_MANTISSA, self::IN_MUTABLE_CHAR,    self::ST_SYNT_LEXEME),
+                               array(self::ST_MANTISSA, self::IN_LEXEME_MODIFIER, self::ST_LEXEME_MODIFIER),
+
+                               // IN_ESCAPE_CHAR       not allowed
+                               array(self::ST_MANTISSA, self::IN_ESCAPE_CHAR,     self::ST_ERROR, $wrongNumberErrorAction),
+
+                               // IN_QUOTE             not allowed
+                               array(self::ST_MANTISSA, self::IN_QUOTE,           self::ST_ERROR, $wrongNumberErrorAction),
+
+                               // IN_DECIMAL_POINT     not allowed
+                               array(self::ST_MANTISSA, self::IN_DECIMAL_POINT,   self::ST_ERROR, $wrongNumberErrorAction),
+
+                               array(self::ST_MANTISSA, self::IN_ASCII_DIGIT,     self::ST_MANTISSA),
+
+                               // IN_CHAR              not allowed
+                               array(self::ST_MANTISSA, self::IN_CHAR,            self::ST_ERROR, $wrongNumberErrorAction),
+                             ));
+
+
+        /** Actions */
+        $syntaxLexemeAction    = new Zend_Search_Lucene_FSMAction($this, 'addQuerySyntaxLexeme');
+        $lexemeModifierAction  = new Zend_Search_Lucene_FSMAction($this, 'addLexemeModifier');
+        $addLexemeAction       = new Zend_Search_Lucene_FSMAction($this, 'addLexeme');
+        $addQuotedLexemeAction = new Zend_Search_Lucene_FSMAction($this, 'addQuotedLexeme');
+        $addNumberLexemeAction = new Zend_Search_Lucene_FSMAction($this, 'addNumberLexeme');
+        $addLexemeCharAction   = new Zend_Search_Lucene_FSMAction($this, 'addLexemeChar');
+
+
+        /** Syntax lexeme */
+        $this->addEntryAction(self::ST_SYNT_LEXEME,  $syntaxLexemeAction);
+        // Two lexemes in succession
+        $this->addTransitionAction(self::ST_SYNT_LEXEME, self::ST_SYNT_LEXEME, $syntaxLexemeAction);
+
+
+        /** Lexeme */
+        $this->addEntryAction(self::ST_LEXEME,                       $addLexemeCharAction);
+        $this->addTransitionAction(self::ST_LEXEME, self::ST_LEXEME, $addLexemeCharAction);
+        // ST_ESCAPED_CHAR => ST_LEXEME transition is covered by ST_LEXEME entry action
+
+        $this->addTransitionAction(self::ST_LEXEME, self::ST_WHITE_SPACE,     $addLexemeAction);
+        $this->addTransitionAction(self::ST_LEXEME, self::ST_SYNT_LEXEME,     $addLexemeAction);
+        $this->addTransitionAction(self::ST_LEXEME, self::ST_QUOTED_LEXEME,   $addLexemeAction);
+        $this->addTransitionAction(self::ST_LEXEME, self::ST_LEXEME_MODIFIER, $addLexemeAction);
+        $this->addTransitionAction(self::ST_LEXEME, self::ST_NUMBER,          $addLexemeAction);
+        $this->addTransitionAction(self::ST_LEXEME, self::ST_MANTISSA,        $addLexemeAction);
+
+
+        /** Quoted lexeme */
+        // We don't need entry action (skeep quote)
+        $this->addTransitionAction(self::ST_QUOTED_LEXEME, self::ST_QUOTED_LEXEME, $addLexemeCharAction);
+        $this->addTransitionAction(self::ST_ESCAPED_QCHAR, self::ST_QUOTED_LEXEME, $addLexemeCharAction);
+        // Closing quote changes state to the ST_WHITE_SPACE   other states are not used
+        $this->addTransitionAction(self::ST_QUOTED_LEXEME, self::ST_WHITE_SPACE,   $addQuotedLexemeAction);
+
+
+        /** Lexeme modifier */
+        $this->addEntryAction(self::ST_LEXEME_MODIFIER, $lexemeModifierAction);
+
+
+        /** Number */
+        $this->addEntryAction(self::ST_NUMBER,                           $addLexemeCharAction);
+        $this->addEntryAction(self::ST_MANTISSA,                         $addLexemeCharAction);
+        $this->addTransitionAction(self::ST_NUMBER,   self::ST_NUMBER,   $addLexemeCharAction);
+        // ST_NUMBER => ST_MANTISSA transition is covered by ST_MANTISSA entry action
+        $this->addTransitionAction(self::ST_MANTISSA, self::ST_MANTISSA, $addLexemeCharAction);
+
+        $this->addTransitionAction(self::ST_NUMBER,   self::ST_WHITE_SPACE,     $addNumberLexemeAction);
+        $this->addTransitionAction(self::ST_NUMBER,   self::ST_SYNT_LEXEME,     $addNumberLexemeAction);
+        $this->addTransitionAction(self::ST_NUMBER,   self::ST_LEXEME_MODIFIER, $addNumberLexemeAction);
+        $this->addTransitionAction(self::ST_MANTISSA, self::ST_WHITE_SPACE,     $addNumberLexemeAction);
+        $this->addTransitionAction(self::ST_MANTISSA, self::ST_SYNT_LEXEME,     $addNumberLexemeAction);
+        $this->addTransitionAction(self::ST_MANTISSA, self::ST_LEXEME_MODIFIER, $addNumberLexemeAction);
+    }
+
+
+
+
+    /**
+     * Translate input char to an input symbol of state machine
+     *
+     * @param string $char
+     * @return integer
+     */
+    private function _translateInput($char)
+    {
+        if        (strpos(self::QUERY_WHITE_SPACE_CHARS,    $char) !== false) { return self::IN_WHITE_SPACE;
+        } else if (strpos(self::QUERY_SYNT_CHARS,           $char) !== false) { return self::IN_SYNT_CHAR;
+        } else if (strpos(self::QUERY_MUTABLE_CHARS,        $char) !== false) { return self::IN_MUTABLE_CHAR;
+        } else if (strpos(self::QUERY_LEXEMEMODIFIER_CHARS, $char) !== false) { return self::IN_LEXEME_MODIFIER;
+        } else if (strpos(self::QUERY_ASCIIDIGITS_CHARS,    $char) !== false) { return self::IN_ASCII_DIGIT;
+        } else if ($char === '"' )                                            { return self::IN_QUOTE;
+        } else if ($char === '.' )                                            { return self::IN_DECIMAL_POINT;
+        } else if ($char === '\\')                                            { return self::IN_ESCAPE_CHAR;
+        } else                                                                { return self::IN_CHAR;
+        }
+    }
+
+
+    /**
+     * This method is used to tokenize query string into lexemes
+     *
+     * @param string $inputString
+     * @param string $encoding
+     * @return array
+     * @throws Zend_Search_Lucene_Search_QueryParserException
+     */
+    public function tokenize($inputString, $encoding)
+    {
+        $this->reset();
+
+        $this->_lexemes     = array();
+        $this->_queryString = array();
+
+        $strLength = iconv_strlen($inputString, $encoding);
+
+        // Workaround for iconv_substr bug
+        $inputString .= ' ';
+
+        for ($count = 0; $count < $strLength; $count++) {
+            $this->_queryString[$count] = iconv_substr($inputString, $count, 1, $encoding);
+        }
+
+        for ($this->_queryStringPosition = 0;
+             $this->_queryStringPosition < count($this->_queryString);
+             $this->_queryStringPosition++) {
+            $this->process($this->_translateInput($this->_queryString[$this->_queryStringPosition]));
+        }
+
+        $this->process(self::IN_WHITE_SPACE);
+
+        if ($this->getState() != self::ST_WHITE_SPACE) {
+            throw new Zend_Search_Lucene_Search_QueryParserException('Unexpected end of query');
+        }
+
+        $this->_queryString = null;
+
+        return $this->_lexemes;
+    }
+
+
+
+    /*********************************************************************
+     * Actions implementation
+     *
+     * Actions affect on recognized lexemes list
+     *********************************************************************/
+
+    /**
+     * Add query syntax lexeme
+     *
+     * @throws Zend_Search_Lucene_Search_QueryParserException
+     */
+    public function addQuerySyntaxLexeme()
+    {
+        $lexeme = $this->_queryString[$this->_queryStringPosition];
+
+        // Process two char lexemes
+        if (strpos(self::QUERY_DOUBLECHARLEXEME_CHARS, $lexeme) !== false) {
+            // increase current position in a query string
+            $this->_queryStringPosition++;
+
+            // check,
+            if ($this->_queryStringPosition == count($this->_queryString)  ||
+                $this->_queryString[$this->_queryStringPosition] != $lexeme) {
+                    throw new Zend_Search_Lucene_Search_QueryParserException('Two chars lexeme expected. ' . $this->_positionMsg());
+                }
+
+            // duplicate character
+            $lexeme .= $lexeme;
+        }
+
+        $token = new Zend_Search_Lucene_Search_QueryToken(
+                                Zend_Search_Lucene_Search_QueryToken::TC_SYNTAX_ELEMENT,
+                                $lexeme,
+                                $this->_queryStringPosition);
+
+        // Skip this lexeme if it's a field indicator ':' and treat previous as 'field' instead of 'word'
+        if ($token->type == Zend_Search_Lucene_Search_QueryToken::TT_FIELD_INDICATOR) {
+            $token = array_pop($this->_lexemes);
+            if ($token === null  ||  $token->type != Zend_Search_Lucene_Search_QueryToken::TT_WORD) {
+                throw new Zend_Search_Lucene_Search_QueryParserException('Field mark \':\' must follow field name. ' . $this->_positionMsg());
+            }
+
+            $token->type = Zend_Search_Lucene_Search_QueryToken::TT_FIELD;
+        }
+
+        $this->_lexemes[] = $token;
+    }
+
+    /**
+     * Add lexeme modifier
+     */
+    public function addLexemeModifier()
+    {
+        $this->_lexemes[] = new Zend_Search_Lucene_Search_QueryToken(
+                                    Zend_Search_Lucene_Search_QueryToken::TC_SYNTAX_ELEMENT,
+                                    $this->_queryString[$this->_queryStringPosition],
+                                    $this->_queryStringPosition);
+    }
+
+
+    /**
+     * Add lexeme
+     */
+    public function addLexeme()
+    {
+        $this->_lexemes[] = new Zend_Search_Lucene_Search_QueryToken(
+                                    Zend_Search_Lucene_Search_QueryToken::TC_WORD,
+                                    $this->_currentLexeme,
+                                    $this->_queryStringPosition - 1);
+
+        $this->_currentLexeme = '';
+    }
+
+    /**
+     * Add quoted lexeme
+     */
+    public function addQuotedLexeme()
+    {
+        $this->_lexemes[] = new Zend_Search_Lucene_Search_QueryToken(
+                                    Zend_Search_Lucene_Search_QueryToken::TC_PHRASE,
+                                    $this->_currentLexeme,
+                                    $this->_queryStringPosition);
+
+        $this->_currentLexeme = '';
+    }
+
+    /**
+     * Add number lexeme
+     */
+    public function addNumberLexeme()
+    {
+        $this->_lexemes[] = new Zend_Search_Lucene_Search_QueryToken(
+                                    Zend_Search_Lucene_Search_QueryToken::TC_NUMBER,
+                                    $this->_currentLexeme,
+                                    $this->_queryStringPosition - 1);
+        $this->_currentLexeme = '';
+    }
+
+    /**
+     * Extend lexeme by one char
+     */
+    public function addLexemeChar()
+    {
+        $this->_currentLexeme .= $this->_queryString[$this->_queryStringPosition];
+    }
+
+
+    /**
+     * Position message
+     *
+     * @return string
+     */
+    private function _positionMsg()
+    {
+        return 'Position is ' . $this->_queryStringPosition . '.';
+    }
+
+
+    /*********************************************************************
+     * Syntax errors actions
+     *********************************************************************/
+    public function lexModifierErrException()
+    {
+        throw new Zend_Search_Lucene_Search_QueryParserException('Lexeme modifier character can be followed only by number, white space or query syntax element. ' . $this->_positionMsg());
+    }
+    public function quoteWithinLexemeErrException()
+    {
+        throw new Zend_Search_Lucene_Search_QueryParserException('Quote within lexeme must be escaped by \'\\\' char. ' . $this->_positionMsg());
+    }
+    public function wrongNumberErrException()
+    {
+        throw new Zend_Search_Lucene_Search_QueryParserException('Wrong number syntax.' . $this->_positionMsg());
+    }
+}
+
index 63b6497e0543f2e0d116eb743855ce189bd45514..1a3d5712de820f444c47178013910078765980af 100644 (file)
  * @category   Zend
  * @package    Zend_Search_Lucene
  * @subpackage Search
- * @copyright  Copyright (c) 2006 Zend Technologies USA Inc. (http://www.zend.com)
+ * @copyright  Copyright (c) 2005-2007 Zend Technologies USA Inc. (http://www.zend.com)
  * @license    http://framework.zend.com/license/new-bsd     New BSD License
  */
 
 
-/** Zend_Search_Lucene_Search_QueryTokenizer */
-require_once 'Zend/Search/Lucene/Search/QueryTokenizer.php';
-
 /** Zend_Search_Lucene_Index_Term */
-require_once 'Zend/Search/Lucene/Index/Term.php';
+require_once $CFG->dirroot.'/search/Zend/Search/Lucene/Index/Term.php';
 
 /** Zend_Search_Lucene_Search_Query_Term */
-require_once 'Zend/Search/Lucene/Search/Query/Term.php';
+require_once $CFG->dirroot.'/search/Zend/Search/Lucene/Search/Query/Term.php';
 
 /** Zend_Search_Lucene_Search_Query_MultiTerm */
-require_once 'Zend/Search/Lucene/Search/Query/MultiTerm.php';
+require_once $CFG->dirroot.'/search/Zend/Search/Lucene/Search/Query/MultiTerm.php';
+
+/** Zend_Search_Lucene_Search_Query_Boolean */
+require_once $CFG->dirroot.'/search/Zend/Search/Lucene/Search/Query/Boolean.php';
 
 /** Zend_Search_Lucene_Search_Query_Phrase */
-require_once 'Zend/Search/Lucene/Search/Query/Phrase.php';
+require_once $CFG->dirroot.'/search/Zend/Search/Lucene/Search/Query/Phrase.php';
+
+/** Zend_Search_Lucene_Search_Query_Empty */
+require_once $CFG->dirroot.'/search/Zend/Search/Lucene/Search/Query/Empty.php';
+
+
+/** Zend_Search_Lucene_Search_QueryLexer */
+require_once $CFG->dirroot.'/search/Zend/Search/Lucene/Search/QueryLexer.php';
+
+/** Zend_Search_Lucene_Search_QueryParserContext */
+require_once $CFG->dirroot.'/search/Zend/Search/Lucene/Search/QueryParserContext.php';
+
 
+/** Zend_Search_Lucene_FSM */
+require_once $CFG->dirroot.'/search/Zend/Search/Lucene/FSM.php';
 
 /** Zend_Search_Lucene_Exception */
-require_once 'Zend/Search/Lucene/Exception.php';
+require_once $CFG->dirroot.'/search/Zend/Search/Lucene/Exception.php';
+
+/** Zend_Search_Lucene_Search_QueryParserException */
+require_once $CFG->dirroot.'/search/Zend/Search/Lucene/Search/QueryParserException.php';
 
 
 /**
  * @category   Zend
  * @package    Zend_Search_Lucene
  * @subpackage Search
- * @copyright  Copyright (c) 2006 Zend Technologies USA Inc. (http://www.zend.com)
+ * @copyright  Copyright (c) 2005-2007 Zend Technologies USA Inc. (http://www.zend.com)
  * @license    http://framework.zend.com/license/new-bsd     New BSD License
  */
-class Zend_Search_Lucene_Search_QueryParser
+class Zend_Search_Lucene_Search_QueryParser extends Zend_Search_Lucene_FSM
 {
+    /**
+     * Parser instance
+     *
+     * @var Zend_Search_Lucene_Search_QueryParser
+     */
+    private static $_instance = null;
+
 
     /**
-     * Parses a query string, returning a Zend_Search_Lucene_Search_Query
+     * Query lexer
+     *
+     * @var Zend_Search_Lucene_Search_QueryLexer
+     */
+    private $_lexer;
+
+    /**
+     * Tokens list
+     * Array of Zend_Search_Lucene_Search_QueryToken objects
+     *
+     * @var array
+     */
+    private $_tokens;
+
+    /**
+     * Current token
+     *
+     * @var integer|string
+     */
+    private $_currentToken;
+
+    /**
+     * Last token
+     *
+     * It can be processed within FSM states, but this addirional state simplifies FSM
+     *
+     * @var Zend_Search_Lucene_Search_QueryToken
+     */
+    private $_lastToken = null;
+
+    /**
+     * Range query first term
+     *
+     * @var string
+     */
+    private $_rqFirstTerm = null;
+
+    /**
+     * Current query parser context
+     *
+     * @var Zend_Search_Lucene_Search_QueryParserContext
+     */
+    private $_context;
+
+    /**
+     * Context stack
+     *
+     * @var array
+     */
+    private $_contextStack;
+
+    /**
+     * Query string encoding
+     *
+     * @var string
+     */
+    private $_encoding;
+
+    /**
+     * Query string default encoding
+     *
+     * @var string
+     */
+    private $_defaultEncoding = '';
+
+
+    /**
+     * Boolean operators constants
+     */
+    const B_OR  = 0;
+    const B_AND = 1;
+
+    /**
+     * Default boolean queries operator
+     *
+     * @var integer
+     */
+    private $_defaultOperator = self::B_OR;
+
+
+    /** Query parser State Machine states */
+    const ST_COMMON_QUERY_ELEMENT       = 0;   // Terms, phrases, operators
+    const ST_CLOSEDINT_RQ_START         = 1;   // Range query start (closed interval) - '['
+    const ST_CLOSEDINT_RQ_FIRST_TERM    = 2;   // First term in '[term1 to term2]' construction
+    const ST_CLOSEDINT_RQ_TO_TERM       = 3;   // 'TO' lexeme in '[term1 to term2]' construction
+    const ST_CLOSEDINT_RQ_LAST_TERM     = 4;   // Second term in '[term1 to term2]' construction
+    const ST_CLOSEDINT_RQ_END           = 5;   // Range query end (closed interval) - ']'
+    const ST_OPENEDINT_RQ_START         = 6;   // Range query start (opened interval) - '{'
+    const ST_OPENEDINT_RQ_FIRST_TERM    = 7;   // First term in '{term1 to term2}' construction
+    const ST_OPENEDINT_RQ_TO_TERM       = 8;   // 'TO' lexeme in '{term1 to term2}' construction
+    const ST_OPENEDINT_RQ_LAST_TERM     = 9;   // Second term in '{term1 to term2}' construction
+    const ST_OPENEDINT_RQ_END           = 10;  // Range query end (opened interval) - '}'
+
+    /**
+     * Parser constructor
+     */
+    public function __construct()
+    {
+        parent::__construct(array(self::ST_COMMON_QUERY_ELEMENT,
+                                  self::ST_CLOSEDINT_RQ_START,
+                                  self::ST_CLOSEDINT_RQ_FIRST_TERM,
+                                  self::ST_CLOSEDINT_RQ_TO_TERM,
+                                  self::ST_CLOSEDINT_RQ_LAST_TERM,
+                                  self::ST_CLOSEDINT_RQ_END,
+                                  self::ST_OPENEDINT_RQ_START,
+                                  self::ST_OPENEDINT_RQ_FIRST_TERM,
+                                  self::ST_OPENEDINT_RQ_TO_TERM,
+                                  self::ST_OPENEDINT_RQ_LAST_TERM,
+                                  self::ST_OPENEDINT_RQ_END
+                                 ),
+                            Zend_Search_Lucene_Search_QueryToken::getTypes());
+
+        $this->addRules(
+             array(array(self::ST_COMMON_QUERY_ELEMENT, Zend_Search_Lucene_Search_QueryToken::TT_WORD,             self::ST_COMMON_QUERY_ELEMENT),
+                   array(self::ST_COMMON_QUERY_ELEMENT, Zend_Search_Lucene_Search_QueryToken::TT_PHRASE,           self::ST_COMMON_QUERY_ELEMENT),
+                   array(self::ST_COMMON_QUERY_ELEMENT, Zend_Search_Lucene_Search_QueryToken::TT_FIELD,            self::ST_COMMON_QUERY_ELEMENT),
+                   array(self::ST_COMMON_QUERY_ELEMENT, Zend_Search_Lucene_Search_QueryToken::TT_REQUIRED,         self::ST_COMMON_QUERY_ELEMENT),
+                   array(self::ST_COMMON_QUERY_ELEMENT, Zend_Search_Lucene_Search_QueryToken::TT_PROHIBITED,       self::ST_COMMON_QUERY_ELEMENT),
+                   array(self::ST_COMMON_QUERY_ELEMENT, Zend_Search_Lucene_Search_QueryToken::TT_FUZZY_PROX_MARK,  self::ST_COMMON_QUERY_ELEMENT),
+                   array(self::ST_COMMON_QUERY_ELEMENT, Zend_Search_Lucene_Search_QueryToken::TT_BOOSTING_MARK,    self::ST_COMMON_QUERY_ELEMENT),
+                   array(self::ST_COMMON_QUERY_ELEMENT, Zend_Search_Lucene_Search_QueryToken::TT_RANGE_INCL_START, self::ST_CLOSEDINT_RQ_START),
+                   array(self::ST_COMMON_QUERY_ELEMENT, Zend_Search_Lucene_Search_QueryToken::TT_RANGE_EXCL_START, self::ST_OPENEDINT_RQ_START),
+                   array(self::ST_COMMON_QUERY_ELEMENT, Zend_Search_Lucene_Search_QueryToken::TT_SUBQUERY_START,   self::ST_COMMON_QUERY_ELEMENT),
+                   array(self::ST_COMMON_QUERY_ELEMENT, Zend_Search_Lucene_Search_QueryToken::TT_SUBQUERY_END,     self::ST_COMMON_QUERY_ELEMENT),
+                   array(self::ST_COMMON_QUERY_ELEMENT, Zend_Search_Lucene_Search_QueryToken::TT_AND_LEXEME,       self::ST_COMMON_QUERY_ELEMENT),
+                   array(self::ST_COMMON_QUERY_ELEMENT, Zend_Search_Lucene_Search_QueryToken::TT_OR_LEXEME,        self::ST_COMMON_QUERY_ELEMENT),
+                   array(self::ST_COMMON_QUERY_ELEMENT, Zend_Search_Lucene_Search_QueryToken::TT_NOT_LEXEME,       self::ST_COMMON_QUERY_ELEMENT),
+                   array(self::ST_COMMON_QUERY_ELEMENT, Zend_Search_Lucene_Search_QueryToken::TT_NUMBER,           self::ST_COMMON_QUERY_ELEMENT)
+                  ));
+        $this->addRules(
+             array(array(self::ST_CLOSEDINT_RQ_START,      Zend_Search_Lucene_Search_QueryToken::TT_WORD,           self::ST_CLOSEDINT_RQ_FIRST_TERM),
+                   array(self::ST_CLOSEDINT_RQ_FIRST_TERM, Zend_Search_Lucene_Search_QueryToken::TT_TO_LEXEME,      self::ST_CLOSEDINT_RQ_TO_TERM),
+                   array(self::ST_CLOSEDINT_RQ_TO_TERM,    Zend_Search_Lucene_Search_QueryToken::TT_WORD,           self::ST_CLOSEDINT_RQ_LAST_TERM),
+                   array(self::ST_CLOSEDINT_RQ_LAST_TERM,  Zend_Search_Lucene_Search_QueryToken::TT_RANGE_INCL_END, self::ST_COMMON_QUERY_ELEMENT)
+                  ));
+        $this->addRules(
+             array(array(self::ST_OPENEDINT_RQ_START,      Zend_Search_Lucene_Search_QueryToken::TT_WORD,           self::ST_OPENEDINT_RQ_FIRST_TERM),
+                   array(self::ST_OPENEDINT_RQ_FIRST_TERM, Zend_Search_Lucene_Search_QueryToken::TT_TO_LEXEME,      self::ST_OPENEDINT_RQ_TO_TERM),
+                   array(self::ST_OPENEDINT_RQ_TO_TERM,    Zend_Search_Lucene_Search_QueryToken::TT_WORD,           self::ST_OPENEDINT_RQ_LAST_TERM),
+                   array(self::ST_OPENEDINT_RQ_LAST_TERM,  Zend_Search_Lucene_Search_QueryToken::TT_RANGE_EXCL_END, self::ST_COMMON_QUERY_ELEMENT)
+                  ));
+
+
+
+        $addTermEntryAction             = new Zend_Search_Lucene_FSMAction($this, 'addTermEntry');
+        $addPhraseEntryAction           = new Zend_Search_Lucene_FSMAction($this, 'addPhraseEntry');
+        $setFieldAction                 = new Zend_Search_Lucene_FSMAction($this, 'setField');
+        $setSignAction                  = new Zend_Search_Lucene_FSMAction($this, 'setSign');
+        $setFuzzyProxAction             = new Zend_Search_Lucene_FSMAction($this, 'processFuzzyProximityModifier');
+        $processModifierParameterAction = new Zend_Search_Lucene_FSMAction($this, 'processModifierParameter');
+        $subqueryStartAction            = new Zend_Search_Lucene_FSMAction($this, 'subqueryStart');
+        $subqueryEndAction              = new Zend_Search_Lucene_FSMAction($this, 'subqueryEnd');
+        $logicalOperatorAction          = new Zend_Search_Lucene_FSMAction($this, 'logicalOperator');
+        $openedRQFirstTermAction        = new Zend_Search_Lucene_FSMAction($this, 'openedRQFirstTerm');
+        $openedRQLastTermAction         = new Zend_Search_Lucene_FSMAction($this, 'openedRQLastTerm');
+        $closedRQFirstTermAction        = new Zend_Search_Lucene_FSMAction($this, 'closedRQFirstTerm');
+        $closedRQLastTermAction         = new Zend_Search_Lucene_FSMAction($this, 'closedRQLastTerm');
+
+
+        $this->addInputAction(self::ST_COMMON_QUERY_ELEMENT, Zend_Search_Lucene_Search_QueryToken::TT_WORD,            $addTermEntryAction);
+        $this->addInputAction(self::ST_COMMON_QUERY_ELEMENT, Zend_Search_Lucene_Search_QueryToken::TT_PHRASE,          $addPhraseEntryAction);
+        $this->addInputAction(self::ST_COMMON_QUERY_ELEMENT, Zend_Search_Lucene_Search_QueryToken::TT_FIELD,           $setFieldAction);
+        $this->addInputAction(self::ST_COMMON_QUERY_ELEMENT, Zend_Search_Lucene_Search_QueryToken::TT_REQUIRED,        $setSignAction);
+        $this->addInputAction(self::ST_COMMON_QUERY_ELEMENT, Zend_Search_Lucene_Search_QueryToken::TT_PROHIBITED,      $setSignAction);
+        $this->addInputAction(self::ST_COMMON_QUERY_ELEMENT, Zend_Search_Lucene_Search_QueryToken::TT_FUZZY_PROX_MARK, $setFuzzyProxAction);
+        $this->addInputAction(self::ST_COMMON_QUERY_ELEMENT, Zend_Search_Lucene_Search_QueryToken::TT_NUMBER,          $processModifierParameterAction);
+        $this->addInputAction(self::ST_COMMON_QUERY_ELEMENT, Zend_Search_Lucene_Search_QueryToken::TT_SUBQUERY_START,  $subqueryStartAction);
+        $this->addInputAction(self::ST_COMMON_QUERY_ELEMENT, Zend_Search_Lucene_Search_QueryToken::TT_SUBQUERY_END,    $subqueryEndAction);
+        $this->addInputAction(self::ST_COMMON_QUERY_ELEMENT, Zend_Search_Lucene_Search_QueryToken::TT_AND_LEXEME,      $logicalOperatorAction);
+        $this->addInputAction(self::ST_COMMON_QUERY_ELEMENT, Zend_Search_Lucene_Search_QueryToken::TT_OR_LEXEME,       $logicalOperatorAction);
+        $this->addInputAction(self::ST_COMMON_QUERY_ELEMENT, Zend_Search_Lucene_Search_QueryToken::TT_NOT_LEXEME,      $logicalOperatorAction);
+
+        $this->addEntryAction(self::ST_OPENEDINT_RQ_FIRST_TERM, $openedRQFirstTermAction);
+        $this->addEntryAction(self::ST_OPENEDINT_RQ_LAST_TERM,  $openedRQLastTermAction);
+        $this->addEntryAction(self::ST_CLOSEDINT_RQ_FIRST_TERM, $closedRQFirstTermAction);
+        $this->addEntryAction(self::ST_CLOSEDINT_RQ_LAST_TERM,  $closedRQLastTermAction);
+
+
+
+        $this->_lexer = new Zend_Search_Lucene_Search_QueryLexer();
+    }
+
+
+    /**
+     * Set query string default encoding
+     *
+     * @param string $encoding
+     */
+    public static function setDefaultEncoding($encoding)
+    {
+        if (self::$_instance === null) {
+            self::$_instance = new Zend_Search_Lucene_Search_QueryParser();
+        }
+
+        self::$_instance->_defaultEncoding = $encoding;
+    }
+
+    /**
+     * Get query string default encoding
+     *
+     * @return string
+     */
+    public static function getDefaultEncoding()
+    {
+        if (self::$_instance === null) {
+            self::$_instance = new Zend_Search_Lucene_Search_QueryParser();
+        }
+
+        return self::$_instance->_defaultEncoding;
+    }
+
+    /**
+     * Set default boolean operator
+     *
+     * @param integer $operator
+     */
+    public static function setDefaultOperator($operator)
+    {
+        if (self::$_instance === null) {
+            self::$_instance = new Zend_Search_Lucene_Search_QueryParser();
+        }
+
+        self::$_instance->_defaultOperator = $operator;
+    }
+
+    /**
+     * Get default boolean operator
+     *
+     * @return integer
+     */
+    public static function getDefaultOperator()
+    {
+        if (self::$_instance === null) {
+            self::$_instance = new Zend_Search_Lucene_Search_QueryParser();
+        }
+
+        return self::$_instance->_defaultOperator;
+    }
+
+    /**
+     * Parses a query string
      *
      * @param string $strQuery
+     * @param string $encoding
      * @return Zend_Search_Lucene_Search_Query
+     * @throws Zend_Search_Lucene_Search_QueryParserException
      */
-    static public function parse($strQuery)
+    public static function parse($strQuery, $encoding = null)
     {
-        $tokens = new Zend_Search_Lucene_Search_QueryTokenizer($strQuery);
+        if (self::$_instance === null) {
+            self::$_instance = new Zend_Search_Lucene_Search_QueryParser();
+        }
+
+        self::$_instance->_encoding     = ($encoding !== null) ? $encoding : self::$_instance->_defaultEncoding;
+        self::$_instance->_lastToken    = null;
+        self::$_instance->_context      = new Zend_Search_Lucene_Search_QueryParserContext(self::$_instance->_encoding);
+        self::$_instance->_contextStack = array();
+        self::$_instance->_tokens       = self::$_instance->_lexer->tokenize($strQuery, self::$_instance->_encoding);
 
         // Empty query
-        if (!$tokens->count()) {
-            throw new Zend_Search_Lucene_Exception('Syntax error: query string cannot be empty.');
+        if (count(self::$_instance->_tokens) == 0) {
+            return new Zend_Search_Lucene_Search_Query_Empty();
         }
 
-        // Term query
-        if ($tokens->count() == 1) {
-            if ($tokens->current()->type == Zend_Search_Lucene_Search_QueryToken::TOKTYPE_WORD) {
-                return new Zend_Search_Lucene_Search_Query_Term(new Zend_Search_Lucene_Index_Term($tokens->current()->text, 'contents'));
-            } else {
-                throw new Zend_Search_Lucene_Exception('Syntax error: query string must contain at least one word.');
-            }
-        }
 
+        foreach (self::$_instance->_tokens as $token) {
+            try {
+                self::$_instance->_currentToken = $token;
+                self::$_instance->process($token->type);
+
+                self::$_instance->_lastToken = $token;
+            } catch (Exception $e) {
+                if (strpos($e->getMessage(), 'There is no any rule for') !== false) {
+                    throw new Zend_Search_Lucene_Search_QueryParserException( 'Syntax error at char position ' . $token->position . '.' );
+                }
 
-        /**
-         * MultiTerm Query
-         *
-         * Process each token that was returned by the tokenizer.
-         */
-        $terms = array();
-        $signs = array();
-        $prevToken = null;
-        $openBrackets = 0;
-        $field = 'contents';
-        foreach ($tokens as $token) {
-            switch ($token->type) {
-                case Zend_Search_Lucene_Search_QueryToken::TOKTYPE_WORD:
-                    $terms[] = new Zend_Search_Lucene_Index_Term($token->text, $field);
-                    $field = 'contents';
-                    if ($prevToken !== null &&
-                        $prevToken->type == Zend_Search_Lucene_Search_QueryToken::TOKTYPE_SIGN) {
-                            if ($prevToken->text == "+") {
-                                $signs[] = true;
-                            } else {
-                                $signs[] = false;
-                            }
-                    } else {
-                        $signs[] = null;
-                    }
-                    break;
-                case Zend_Search_Lucene_Search_QueryToken::TOKTYPE_SIGN:
-                    if ($prevToken !== null &&
-                        $prevToken->type == Zend_Search_Lucene_Search_QueryToken::TOKTYPE_SIGN) {
-                            throw new Zend_Search_Lucene_Exception('Syntax error: sign operator must be followed by a word.');
-                    }
-                    break;
-                case Zend_Search_Lucene_Search_QueryToken::TOKTYPE_FIELD:
-                    $field = $token->text;
-                    // let previous token to be signed as next $prevToken
-                    $token = $prevToken;
-                    break;
-                case Zend_Search_Lucene_Search_QueryToken::TOKTYPE_BRACKET:
-                    $token->text=='(' ? $openBrackets++ : $openBrackets--;
+                throw $e;
             }
-            $prevToken = $token;
         }
 
-        // Finish up parsing: check the last token in the query for an opening sign or parenthesis.
-        if ($prevToken->type == Zend_Search_Lucene_Search_QueryToken::TOKTYPE_SIGN) {
-            throw new Zend_Search_Lucene_Exception('Syntax Error: sign operator must be followed by a word.');
+        if (count(self::$_instance->_contextStack) != 0) {
+            throw new Zend_Search_Lucene_Search_QueryParserException('Syntax Error: mismatched parentheses, every opening must have closing.' );
         }
 
-        // Finish up parsing: check that every opening bracket has a matching closing bracket.
-        if ($openBrackets != 0) {
-            throw new Zend_Search_Lucene_Exception('Syntax Error: mismatched parentheses, every opening must have closing.');
+        return self::$_instance->_context->getQuery();
+    }
+
+
+    /*********************************************************************
+     * Actions implementation
+     *
+     * Actions affect on recognized lexemes list
+     *********************************************************************/
+
+    /**
+     * Add term to a query
+     */
+    public function addTermEntry()
+    {
+        $entry = new Zend_Search_Lucene_Search_QueryEntry_Term($this->_currentToken->text, $this->_context->getField());
+        $this->_context->addEntry($entry);
+    }
+
+    /**
+     * Add phrase to a query
+     */
+    public function addPhraseEntry()
+    {
+        $entry = new Zend_Search_Lucene_Search_QueryEntry_Phrase($this->_currentToken->text, $this->_context->getField());
+        $this->_context->addEntry($entry);
+    }
+
+    /**
+     * Set entry field
+     */
+    public function setField()
+    {
+        $this->_context->setNextEntryField($this->_currentToken->text);
+    }
+
+    /**
+     * Set entry sign
+     */
+    public function setSign()
+    {
+        $this->_context->setNextEntrySign($this->_currentToken->type);
+    }
+
+
+    /**
+     * Process fuzzy search/proximity modifier - '~'
+     */
+    public function processFuzzyProximityModifier()
+    {
+        $this->_context->processFuzzyProximityModifier();
+    }
+
+    /**
+     * Process modifier parameter
+     *
+     * @throws Zend_Search_Lucene_Exception
+     */
+    public function processModifierParameter()
+    {
+        if ($this->_lastToken === null) {
+            throw new Zend_Search_Lucene_Search_QueryParserException('Lexeme modifier parameter must follow lexeme modifier. Char position 0.' );
         }
 
-        switch (count($terms)) {
-            case 0:
-                throw new Zend_Search_Lucene_Exception('Syntax error: bad term count.');
-            case 1:
-                return new Zend_Search_Lucene_Search_Query_Term($terms[0],$signs[0] !== false);
+        switch ($this->_lastToken->type) {
+            case Zend_Search_Lucene_Search_QueryToken::TT_FUZZY_PROX_MARK:
+                $this->_context->processFuzzyProximityModifier($this->_currentToken->text);
+                break;
+
+            case Zend_Search_Lucene_Search_QueryToken::TT_BOOSTING_MARK:
+                $this->_context->boost($this->_currentToken->text);
+                break;
+
             default:
-                return new Zend_Search_Lucene_Search_Query_MultiTerm($terms,$signs);
+                // It's not a user input exception
+                throw new Zend_Search_Lucene_Exception('Lexeme modifier parameter must follow lexeme modifier. Char position .' );
+        }
+    }
+
+
+    /**
+     * Start subquery
+     */
+    public function subqueryStart()
+    {
+        $this->_contextStack[] = $this->_context;
+        $this->_context        = new Zend_Search_Lucene_Search_QueryParserContext($this->_encoding, $this->_context->getField());
+    }
+
+    /**
+     * End subquery
+     */
+    public function subqueryEnd()
+    {
+        if (count($this->_contextStack) == 0) {
+            throw new Zend_Search_Lucene_Search_QueryParserException('Syntax Error: mismatched parentheses, every opening must have closing. Char position ' . $this->_currentToken->position . '.' );
         }
+
+        $query          = $this->_context->getQuery();
+        $this->_context = array_pop($this->_contextStack);
+
+        $this->_context->addEntry(new Zend_Search_Lucene_Search_QueryEntry_Subquery($query));
+    }
+
+    /**
+     * Process logical operator
+     */
+    public function logicalOperator()
+    {
+        $this->_context->addLogicalOperator($this->_currentToken->type);
+    }
+
+    /**
+     * Process first range query term (opened interval)
+     */
+    public function openedRQFirstTerm()
+    {
+        $this->_rqFirstTerm = $this->_currentToken->text;
+    }
+
+    /**
+     * Process last range query term (opened interval)
+     *
+     * @throws Zend_Search_Lucene_Search_QueryParserException
+     */
+    public function openedRQLastTerm()
+    {
+        throw new Zend_Search_Lucene_Search_QueryParserException('Range queries are not supported yet.');
+
+        // $firstTerm = new Zend_Search_Lucene_Index_Term($this->_rqFirstTerm,        $this->_context->getField());
+        // $lastTerm  = new Zend_Search_Lucene_Index_Term($this->_currentToken->text, $this->_context->getField());
+
+        // $query = new Zend_Search_Lucene_Search_Query_Range($firstTerm, $lastTerm, false);
+        // $this->_context->addentry($query);
     }
 
+    /**
+     * Process first range query term (closed interval)
+     */
+    public function closedRQFirstTerm()
+    {
+        $this->_rqFirstTerm = $this->_currentToken->text;
+    }
+
+    /**
+     * Process last range query term (closed interval)
+     *
+     * @throws Zend_Search_Lucene_Search_QueryParserException
+     */
+    public function closedRQLastTerm()
+    {
+        throw new Zend_Search_Lucene_Search_QueryParserException('Range queries are not supported yet.');
+
+        // $firstTerm = new Zend_Search_Lucene_Index_Term($this->_rqFirstTerm,        $this->_context->getField());
+        // $lastTerm  = new Zend_Search_Lucene_Index_Term($this->_currentToken->text, $this->_context->getField());
+
+        // $query = new Zend_Search_Lucene_Search_Query_Range($firstTerm, $lastTerm, true);
+        // $this->_context->addentry($query);
+    }
 }
 
diff --git a/search/Zend/Search/Lucene/Search/QueryParserContext.php b/search/Zend/Search/Lucene/Search/QueryParserContext.php
new file mode 100644 (file)
index 0000000..9d172c6
--- /dev/null
@@ -0,0 +1,416 @@
+<?php
+/**
+ * Zend Framework
+ *
+ * LICENSE
+ *
+ * This source file is subject to the new BSD license that is bundled
+ * with this package in the file LICENSE.txt.
+ * It is also available through the world-wide-web at this URL:
+ * http://framework.zend.com/license/new-bsd
+ * If you did not receive a copy of the license and are unable to
+ * obtain it through the world-wide-web, please send an email
+ * to license@zend.com so we can send you a copy immediately.
+ *
+ * @category   Zend
+ * @package    Zend_Search_Lucene
+ * @subpackage Search
+ * @copyright  Copyright (c) 2005-2007 Zend Technologies USA Inc. (http://www.zend.com)
+ * @license    http://framework.zend.com/license/new-bsd     New BSD License
+ */
+
+/** Zend_Search_Lucene_FSM */
+require_once $CFG->dirroot.'/search/Zend/Search/Lucene/FSM.php';
+
+
+/** Zend_Search_Lucene_Index_Term */
+require_once $CFG->dirroot.'/search/Zend/Search/Lucene/Index/Term.php';
+
+/** Zend_Search_Lucene_Search_QueryToken */
+require_once $CFG->dirroot.'/search/Zend/Search/Lucene/Search/QueryToken.php';
+
+/** Zend_Search_Lucene_Search_Query_Term */
+require_once $CFG->dirroot.'/search/Zend/Search/Lucene/Search/Query/Term.php';
+
+/** Zend_Search_Lucene_Search_Query_MultiTerm */
+require_once $CFG->dirroot.'/search/Zend/Search/Lucene/Search/Query/MultiTerm.php';
+
+/** Zend_Search_Lucene_Search_Query_Boolean */
+require_once $CFG->dirroot.'/search/Zend/Search/Lucene/Search/Query/Boolean.php';
+
+/** Zend_Search_Lucene_Search_Query_Phrase */
+require_once $CFG->dirroot.'/search/Zend/Search/Lucene/Search/Query/Phrase.php';
+
+/** Zend_Search_Lucene_Exception */
+require_once $CFG->dirroot.'/search/Zend/Search/Lucene/Exception.php';
+
+/** Zend_Search_Lucene_Search_QueryParserException */
+require_once $CFG->dirroot.'/search/Zend/Search/Lucene/Search/QueryParserException.php';
+
+/** Zend_Search_Lucene_Search_BooleanExpressionRecognizer */
+require_once $CFG->dirroot.'/search/Zend/Search/Lucene/Search/BooleanExpressionRecognizer.php';
+
+/** Zend_Search_Lucene_Search_QueryEntry */
+require_once $CFG->dirroot.'/search/Zend/Search/Lucene/Search/QueryEntry.php';
+
+
+/**
+ * @category   Zend
+ * @package    Zend_Search_Lucene
+ * @subpackage Search
+ * @copyright  Copyright (c) 2005-2007 Zend Technologies USA Inc. (http://www.zend.com)
+ * @license    http://framework.zend.com/license/new-bsd     New BSD License
+ */
+class Zend_Search_Lucene_Search_QueryParserContext
+{
+    /**
+     * Default field for the context.
+     *
+     * null means, that term should be searched through all fields
+     * Zend_Search_Lucene_Search_Query::rewriteQuery($index) transletes such queries to several
+     *
+     * @var string|null
+     */
+    private $_defaultField;
+
+    /**
+     * Field specified for next entry
+     *
+     * @var string
+     */
+    private $_nextEntryField = null;
+
+    /**
+     * True means, that term is required.
+     * False means, that term is prohibited.
+     * null means, that term is neither prohibited, nor required
+     *
+     * @var boolean
+     */
+    private $_nextEntrySign = null;
+
+
+    /**
+     * Entries grouping mode
+     */
+    const GM_SIGNS   = 0;  // Signs mode: '+term1 term2 -term3 +(subquery1) -(subquery2)'
+    const GM_BOOLEAN = 1;  // Boolean operators mode: 'term1 and term2  or  (subquery1) and not (subquery2)'
+
+    /**
+     * Grouping mode
+     *
+     * @var integer
+     */
+    private $_mode = null;
+
+    /**
+     * Entries signs.
+     * Used in GM_SIGNS grouping mode
+     *
+     * @var arrays
+     */
+    private $_signs = array();
+
+    /**
+     * Query entries
+     * Each entry is a Zend_Search_Lucene_Search_QueryEntry object or
+     * boolean operator (Zend_Search_Lucene_Search_QueryToken class constant)
+     *
+     * @var array
+     */
+    private $_entries = array();
+
+    /**
+     * Query string encoding
+     *
+     * @var string
+     */
+    private $_encoding;
+
+
+    /**
+     * Context object constructor
+     *
+     * @param string $encoding
+     * @param string|null $defaultField
+     */
+    public function __construct($encoding, $defaultField = null)
+    {
+        $this->_encoding     = $encoding;
+        $this->_defaultField = $defaultField;
+    }
+
+
+    /**
+     * Get context default field
+     *
+     * @return string|null
+     */
+    public function getField()
+    {
+        return ($this->_nextEntryField !== null)  ?  $this->_nextEntryField : $this->_defaultField;
+    }
+
+    /**
+     * Set field for next entry
+     *
+     * @param string $field
+     */
+    public function setNextEntryField($field)
+    {
+        $this->_nextEntryField = $field;
+    }
+
+
+    /**
+     * Set sign for next entry
+     *
+     * @param integer $sign
+     * @throws Zend_Search_Lucene_Exception
+     */
+    public function setNextEntrySign($sign)
+    {
+        if ($this->_mode === self::GM_BOOLEAN) {
+            throw new Zend_Search_Lucene_Search_QueryParserException('It\'s not allowed to mix boolean and signs styles in the same subquery.');
+        }
+
+        $this->_mode = self::GM_SIGNS;
+
+        if ($sign == Zend_Search_Lucene_Search_QueryToken::TT_REQUIRED) {
+            $this->_nextEntrySign = true;
+        } else if ($sign == Zend_Search_Lucene_Search_QueryToken::TT_PROHIBITED) {
+            $this->_nextEntrySign = false;
+        } else {
+            throw new Zend_Search_Lucene_Exception('Unrecognized sign type.');
+        }
+    }
+
+
+    /**
+     * Add entry to a query
+     *
+     * @param Zend_Search_Lucene_Search_QueryEntry $entry
+     */
+    public function addEntry(Zend_Search_Lucene_Search_QueryEntry $entry)
+    {
+        if ($this->_mode !== self::GM_BOOLEAN) {
+            $this->_signs[] = $this->_nextEntrySign;
+        }
+
+        $this->_entries[] = $entry;
+
+        $this->_nextEntryField = null;
+        $this->_nextEntrySign  = null;
+    }
+
+
+    /**
+     * Process fuzzy search or proximity search modifier
+     *
+     * @throws Zend_Search_Lucene_Search_QueryParserException
+     */
+    public function processFuzzyProximityModifier($parameter = null)
+    {
+        // Check, that modifier has came just after word or phrase
+        if ($this->_nextEntryField !== null  ||  $this->_nextEntrySign !== null) {
+            throw new Zend_Search_Lucene_Search_QueryParserException('\'~\' modifier must follow word or phrase.');
+        }
+
+        $lastEntry = array_pop($this->_entries);
+
+        if (!$lastEntry instanceof Zend_Search_Lucene_Search_QueryEntry) {
+            // there are no entries or last entry is boolean operator
+            throw new Zend_Search_Lucene_Search_QueryParserException('\'~\' modifier must follow word or phrase.');
+        }
+
+        $lastEntry->processFuzzyProximityModifier($parameter);
+
+        $this->_entries[] = $lastEntry;
+    }
+
+    /**
+     * Set boost factor to the entry
+     *
+     * @param float $boostFactor
+     */
+    public function boost($boostFactor)
+    {
+        // Check, that modifier has came just after word or phrase
+        if ($this->_nextEntryField !== null  ||  $this->_nextEntrySign !== null) {
+            throw new Zend_Search_Lucene_Search_QueryParserException('\'^\' modifier must follow word, phrase or subquery.');
+        }
+
+        $lastEntry = array_pop($this->_entries);
+
+        if (!$lastEntry instanceof Zend_Search_Lucene_Search_QueryEntry) {
+            // there are no entries or last entry is boolean operator
+            throw new Zend_Search_Lucene_Search_QueryParserException('\'^\' modifier must follow word, phrase or subquery.');
+        }
+
+        $lastEntry->boost($boostFactor);
+
+        $this->_entries[] = $lastEntry;
+    }
+
+    /**
+     * Process logical operator
+     *
+     * @param integer $operator
+     */
+    public function addLogicalOperator($operator)
+    {
+        if ($this->_mode === self::GM_SIGNS) {
+            throw new Zend_Search_Lucene_Search_QueryParserException('It\'s not allowed to mix boolean and signs styles in the same subquery.');
+        }
+
+        $this->_mode = self::GM_BOOLEAN;
+
+        $this->_entries[] = $operator;
+    }
+
+
+    /**
+     * Generate 'signs style' query from the context
+     * '+term1 term2 -term3 +(<subquery1>) ...'
+     *
+     * @return Zend_Search_Lucene_Search_Query
+     */
+    public function _signStyleExpressionQuery()
+    {
+        $query = new Zend_Search_Lucene_Search_Query_Boolean();
+
+        if (Zend_Search_Lucene_Search_QueryParser::getDefaultOperator() == Zend_Search_Lucene_Search_QueryParser::B_AND) {
+            $defaultSign = true; // required
+        } else {
+            // Zend_Search_Lucene_Search_QueryParser::B_OR
+            $defaultSign = null; // optional
+        }
+
+        foreach ($this->_entries as $entryId => $entry) {
+            $sign = ($this->_signs[$entryId] !== null) ?  $this->_signs[$entryId] : $defaultSign;
+            $query->addSubquery($entry->getQuery($this->_encoding), $sign);
+        }
+
+        return $query;
+    }
+
+
+    /**
+     * Generate 'boolean style' query from the context
+     * 'term1 and term2   or   term3 and (<subquery1>) and not (<subquery2>)'
+     *
+     * @return Zend_Search_Lucene_Search_Query
+     * @throws Zend_Search_Lucene
+     */
+    private function _booleanExpressionQuery()
+    {
+        /**
+         * We treat each level of an expression as a boolean expression in
+         * a Disjunctive Normal Form
+         *
+         * AND operator has higher precedence than OR
+         *
+         * Thus logical query is a disjunction of one or more conjunctions of
+         * one or more query entries
+         */
+
+        $expressionRecognizer = new Zend_Search_Lucene_Search_BooleanExpressionRecognizer();
+
+        try {
+            foreach ($this->_entries as $entry) {
+                if ($entry instanceof Zend_Search_Lucene_Search_QueryEntry) {
+                    $expressionRecognizer->processLiteral($entry);
+                } else {
+                    switch ($entry) {
+                        case Zend_Search_Lucene_Search_QueryToken::TT_AND_LEXEME:
+                            $expressionRecognizer->processOperator(Zend_Search_Lucene_Search_BooleanExpressionRecognizer::IN_AND_OPERATOR);
+                            break;
+
+                        case Zend_Search_Lucene_Search_QueryToken::TT_OR_LEXEME:
+                            $expressionRecognizer->processOperator(Zend_Search_Lucene_Search_BooleanExpressionRecognizer::IN_OR_OPERATOR);
+                            break;
+
+                        case Zend_Search_Lucene_Search_QueryToken::TT_NOT_LEXEME:
+                            $expressionRecognizer->processOperator(Zend_Search_Lucene_Search_BooleanExpressionRecognizer::IN_NOT_OPERATOR);
+                            break;
+
+                        default:
+                            throw new Zend_Search_Lucene('Boolean expression error. Unknown operator type.');
+                    }
+                }
+            }
+
+            $conjuctions = $expressionRecognizer->finishExpression();
+        } catch (Zend_Search_Exception $e) {
+            // throw new Zend_Search_Lucene_Search_QueryParserException('Boolean expression error. Error message: \'' .
+            //                                                          $e->getMessage() . '\'.' );
+            // It's query syntax error message and it should be user friendly. So FSM message is omitted
+            throw new Zend_Search_Lucene_Search_QueryParserException('Boolean expression error.');
+        }
+
+        // Remove 'only negative' conjunctions
+        foreach ($conjuctions as $conjuctionId => $conjuction) {
+            $nonNegativeEntryFound = false;
+
+            foreach ($conjuction as $conjuctionEntry) {
+                if ($conjuctionEntry[1]) {
+                    $nonNegativeEntryFound = true;
+                    break;
+                }
+            }
+
+            if (!$nonNegativeEntryFound) {
+               unset($conjuctions[$conjuctionId]);
+            }
+        }
+
+
+        $subqueries = array();
+        foreach ($conjuctions as  $conjuction) {
+            // Check, if it's a one term conjuction
+            if (count($conjuction) == 1) {
+                $subqueries[] = $conjuction[0][0]->getQuery($this->_encoding);
+            } else {
+                $subquery = new Zend_Search_Lucene_Search_Query_Boolean();
+
+                foreach ($conjuction as $conjuctionEntry) {
+                    $subquery->addSubquery($conjuctionEntry[0]->getQuery($this->_encoding), $conjuctionEntry[1]);
+                }
+
+                $subqueries[] = $subquery;
+            }
+        }
+
+        if (count($subqueries) == 0) {
+            return new Zend_Search_Lucene_Search_Query_Empty();
+        }
+
+        if (count($subqueries) == 1) {
+            return $subqueries[0];
+        }
+
+
+        $query = new Zend_Search_Lucene_Search_Query_Boolean();
+
+        foreach ($subqueries as $subquery) {
+            // Non-requirered entry/subquery
+            $query->addSubquery($subquery);
+        }
+
+        return $query;
+    }
+
+    /**
+     * Generate query from current context
+     *
+     * @return Zend_Search_Lucene_Search_Query
+     */
+    public function getQuery()
+    {
+        if ($this->_mode === self::GM_BOOLEAN) {
+            return $this->_booleanExpressionQuery();
+        } else {
+            return $this->_signStyleExpressionQuery();
+        }
+    }
+}
diff --git a/search/Zend/Search/Lucene/Search/QueryParserException.php b/search/Zend/Search/Lucene/Search/QueryParserException.php
new file mode 100644 (file)
index 0000000..8ca791f
--- /dev/null
@@ -0,0 +1,40 @@
+<?php
+/**
+ * Zend Framework
+ *
+ * LICENSE
+ *
+ * This source file is subject to the new BSD license that is bundled
+ * with this package in the file LICENSE.txt.
+ * It is also available through the world-wide-web at this URL:
+ * http://framework.zend.com/license/new-bsd
+ * If you did not receive a copy of the license and are unable to
+ * obtain it through the world-wide-web, please send an email
+ * to license@zend.com so we can send you a copy immediately.
+ *
+ * @category   Zend
+ * @package    Zend_Search_Lucene
+ * @subpackage Search
+ * @copyright  Copyright (c) 2005-2007 Zend Technologies USA Inc. (http://www.zend.com)
+ * @license    http://framework.zend.com/license/new-bsd     New BSD License
+ */
+
+
+/**
+ * Zend_Search_Lucene base exception
+ */
+require_once $CFG->dirroot.'/search/Zend/Search/Lucene/Exception.php';
+
+
+/**
+ * @category   Zend
+ * @package    Zend_Search_Lucene
+ * @subpackage Search
+ * @copyright  Copyright (c) 2005-2007 Zend Technologies USA Inc. (http://www.zend.com)
+ * @license    http://framework.zend.com/license/new-bsd     New BSD License
+ *
+ * Special exception type, which may be used to intercept wrong user input
+ */
+class Zend_Search_Lucene_Search_QueryParserException extends Zend_Search_Lucene_Exception
+{}
+
index 56d3522c711bfe6bde8ce837297fe1e435269e94..cf153096d2a1bc24ad49c5b9014e1304afba6474 100644 (file)
  * @category   Zend
  * @package    Zend_Search_Lucene
  * @subpackage Search
- * @copyright  Copyright (c) 2006 Zend Technologies USA Inc. (http://www.zend.com)
+ * @copyright  Copyright (c) 2005-2007 Zend Technologies USA Inc. (http://www.zend.com)
  * @license    http://framework.zend.com/license/new-bsd     New BSD License
  */
 
 
 /** Zend_Search_Lucene_Exception */
-require_once 'Zend/Search/Lucene/Exception.php';
+require_once $CFG->dirroot.'/search/Zend/Search/Lucene/Exception.php';
 
 
 /**
  * @category   Zend
  * @package    Zend_Search_Lucene
  * @subpackage Search
- * @copyright  Copyright (c) 2006 Zend Technologies USA Inc. (http://www.zend.com)
+ * @copyright  Copyright (c) 2005-2007 Zend Technologies USA Inc. (http://www.zend.com)
  * @license    http://framework.zend.com/license/new-bsd     New BSD License
  */
 class Zend_Search_Lucene_Search_QueryToken
 {
     /**
-     * Token type Word.
+     * Token types.
      */
-    const TOKTYPE_WORD = 0;
+    const TT_WORD                 = 0;  // Word
+    const TT_PHRASE               = 1;  // Phrase (one or several quoted words)
+    const TT_FIELD                = 2;  // Field name in 'field:word', field:<phrase> or field:(<subquery>) pairs
+    const TT_FIELD_INDICATOR      = 3;  // ':'
+    const TT_REQUIRED             = 4;  // '+'
+    const TT_PROHIBITED           = 5;  // '-'
+    const TT_FUZZY_PROX_MARK      = 6;  // '~'
+    const TT_BOOSTING_MARK        = 7;  // '^'
+    const TT_RANGE_INCL_START     = 8;  // '['
+    const TT_RANGE_INCL_END       = 9;  // ']'
+    const TT_RANGE_EXCL_START     = 10; // '{'
+    const TT_RANGE_EXCL_END       = 11; // '}'
+    const TT_SUBQUERY_START       = 12; // '('
+    const TT_SUBQUERY_END         = 13; // ')'
+    const TT_AND_LEXEME           = 14; // 'AND' or 'and'
+    const TT_OR_LEXEME            = 15; // 'OR'  or 'or'
+    const TT_NOT_LEXEME           = 16; // 'NOT' or 'not'
+    const TT_TO_LEXEME            = 17; // 'TO'  or 'to'
+    const TT_NUMBER               = 18; // Number, like: 10, 0.8, .64, ....
 
-    /**
-     * Token type Field.
-     * Field indicator in 'field:word' pair
-     */
-    const TOKTYPE_FIELD = 1;
 
     /**
-     * Token type Sign.
-     * '+' (required) or '-' (absentee) sign
+     * Returns all possible lexeme types.
+     * It's used for syntax analyzer state machine initialization
+     *
+     * @return array
      */
-    const TOKTYPE_SIGN = 2;
+    public static function getTypes()
+    {
+        return array(   self::TT_WORD,
+                        self::TT_PHRASE,
+                        self::TT_FIELD,
+                        self::TT_FIELD_INDICATOR,
+                        self::TT_REQUIRED,
+                        self::TT_PROHIBITED,
+                        self::TT_FUZZY_PROX_MARK,
+                        self::TT_BOOSTING_MARK,
+                        self::TT_RANGE_INCL_START,
+                        self::TT_RANGE_INCL_END,
+                        self::TT_RANGE_EXCL_START,
+                        self::TT_RANGE_EXCL_END,
+                        self::TT_SUBQUERY_START,
+                        self::TT_SUBQUERY_END,
+                        self::TT_AND_LEXEME,
+                        self::TT_OR_LEXEME,
+                        self::TT_NOT_LEXEME,
+                        self::TT_TO_LEXEME,
+                        self::TT_NUMBER
+                     );
+    }
+
 
     /**
-     * Token type Bracket.
-     * '(' or ')'
+     * TokenCategories
      */
-    const TOKTYPE_BRACKET = 3;
+    const TC_WORD           = 0;   // Word
+    const TC_PHRASE         = 1;   // Phrase (one or several quoted words)
+    const TC_NUMBER         = 2;   // Nubers, which are used with syntax elements. Ex. roam~0.8
+    const TC_SYNTAX_ELEMENT = 3;   // +  -  ( )  [ ]  { }  !  ||  && ~ ^
 
 
     /**
@@ -71,34 +111,118 @@ class Zend_Search_Lucene_Search_QueryToken
      */
     public $text;
 
+    /**
+     * Token position within query.
+     *
+     * @var integer
+     */
+    public $position;
+
 
     /**
      * IndexReader constructor needs token type and token text as a parameters.
      *
-     * @param $tokType integer
-     * @param $tokText string
+     * @param integer $tokenCategory
+     * @param string  $tokText
+     * @param integer $position
      */
-    public function __construct($tokType, $tokText)
+    public function __construct($tokenCategory, $tokenText, $position)
     {
-        switch ($tokType) {
-            case self::TOKTYPE_BRACKET:
-                // fall through to the next case
-            case self::TOKTYPE_FIELD:
-                // fall through to the next case
-            case self::TOKTYPE_SIGN:
-                // fall through to the next case
-            case self::TOKTYPE_WORD:
+        $this->text     = $tokenText;
+        $this->position = $position + 1; // Start from 1
+
+        switch ($tokenCategory) {
+            case self::TC_WORD:
+                if (  strtolower($tokenText) == 'and') {
+                    $this->type = self::TT_AND_LEXEME;
+                } else if (strtolower($tokenText) == 'or') {
+                    $this->type = self::TT_OR_LEXEME;
+                } else if (strtolower($tokenText) == 'not') {
+                    $this->type = self::TT_NOT_LEXEME;
+                } else if (strtolower($tokenText) == 'to') {
+                    $this->type = self::TT_TO_LEXEME;
+                } else {
+                    $this->type = self::TT_WORD;
+                }
                 break;
-            default:
-                throw new Zend_Search_Lucene_Exception("Unrecognized token type \"$tokType\".");
-        }
 
-        if (!strlen($tokText)) {
-            throw new Zend_Search_Lucene_Exception('Token text must be supplied.');
-        }
+            case self::TC_PHRASE:
+                $this->type = self::TT_PHRASE;
+                break;
+
+            case self::TC_NUMBER:
+                $this->type = self::TT_NUMBER;
+                break;
+
+            case self::TC_SYNTAX_ELEMENT:
+                switch ($tokenText) {
+                    case ':':
+                        $this->type = self::TT_FIELD_INDICATOR;
+                        break;
 
-        $this->type = $tokType;
-        $this->text = $tokText;
+                    case '+':
+                        $this->type = self::TT_REQUIRED;
+                        break;
+
+                    case '-':
+                        $this->type = self::TT_PROHIBITED;
+                        break;
+
+                    case '~':
+                        $this->type = self::TT_FUZZY_PROX_MARK;
+                        break;
+
+                    case '^':
+                        $this->type = self::TT_BOOSTING_MARK;
+                        break;
+
+                    case '[':
+                        $this->type = self::TT_RANGE_INCL_START;
+                        break;
+
+                    case ']':
+                        $this->type = self::TT_RANGE_INCL_END;
+                        break;
+
+                    case '{':
+                        $this->type = self::TT_RANGE_EXCL_START;
+                        break;
+
+                    case '}':
+                        $this->type = self::TT_RANGE_EXCL_END;
+                        break;
+
+                    case '(':
+                        $this->type = self::TT_SUBQUERY_START;
+                        break;
+
+                    case ')':
+                        $this->type = self::TT_SUBQUERY_END;
+                        break;
+
+                    case '!':
+                        $this->type = self::TT_NOT_LEXEME;
+                        break;
+
+                    case '&&':
+                        $this->type = self::TT_AND_LEXEME;
+                        break;
+
+                    case '||':
+                        $this->type = self::TT_OR_LEXEME;
+                        break;
+
+                    default:
+                        throw new Zend_Search_Lucene_Exception('Unrecognized query syntax lexeme: \'' . $tokenText . '\'');
+                }
+                break;
+
+            case self::TC_NUMBER:
+                $this->type = self::TT_NUMBER;
+
+            default:
+                throw new Zend_Search_Lucene_Exception('Unrecognized lexeme type: \'' . $tokenCategory . '\'');
+        }
     }
 }
 
index 4fe870bedc1a0716886110e100a7568d5f51a638..2965eaa7538a5bc1adf07e0dc8eb4998fa3d3e46 100644 (file)
 
 
 /** Zend_Search_Lucene_Search_QueryToken */
-require_once 'Zend/Search/Lucene/Search/QueryToken.php';
+require_once $CFG->dirroot.'/search/Zend/Search/Lucene/Search/QueryToken.php';
 
 /** Zend_Search_Lucene_Exception */
-require_once 'Zend/Search/Lucene/Exception.php';
+require_once $CFG->dirroot.'/search/Zend/Search/Lucene/Exception.php';
 
 
 /**
index 74ecb1dda67d6710fe331d02d2cc60ba9b4ccad7..016d232a24486f70951e2b2f42ae1ca1294ccbd3 100644 (file)
  * @category   Zend
  * @package    Zend_Search_Lucene
  * @subpackage Search
- * @copyright  Copyright (c) 2006 Zend Technologies USA Inc. (http://www.zend.com)
+ * @copyright  Copyright (c) 2005-2007 Zend Technologies USA Inc. (http://www.zend.com)
  * @license    http://framework.zend.com/license/new-bsd     New BSD License
  */
 
 
 /** Zend_Search_Lucene_Search_Similarity_Default */
-require_once 'Zend/Search/Lucene/Search/Similarity/Default.php';
+require_once $CFG->dirroot.'/search/Zend/Search/Lucene/Search/Similarity/Default.php';
 
 
 /**
  * @category   Zend
  * @package    Zend_Search_Lucene
  * @subpackage Search
- * @copyright  Copyright (c) 2006 Zend Technologies USA Inc. (http://www.zend.com)
+ * @copyright  Copyright (c) 2005-2007 Zend Technologies USA Inc. (http://www.zend.com)
  * @license    http://framework.zend.com/license/new-bsd     New BSD License
  */
 abstract class Zend_Search_Lucene_Search_Similarity
@@ -38,7 +38,7 @@ abstract class Zend_Search_Lucene_Search_Similarity
      *
      * @var Zend_Search_Lucene_Search_Similarity
      */
-    static private $_defaultImpl;
+    private static $_defaultImpl;
 
     /**
      * Cache of decoded bytes.
@@ -46,7 +46,7 @@ abstract class Zend_Search_Lucene_Search_Similarity
      *
      * @var array
      */
-    static private $_normTable = array( 0   => 0.0,
+    private static $_normTable = array( 0   => 0.0,
                                         1   => 5.820766E-10,
                                         2   => 6.9849193E-10,
                                         3   => 8.1490725E-10,
@@ -310,7 +310,7 @@ abstract class Zend_Search_Lucene_Search_Similarity
      *
      * @param Zend_Search_Lucene_Search_Similarity $similarity
      */
-    static public function setDefault(Zend_Search_Lucene_Search_Similarity $similarity)
+    public static function setDefault(Zend_Search_Lucene_Search_Similarity $similarity)
     {
         self::$_defaultImpl = $similarity;
     }
@@ -322,7 +322,7 @@ abstract class Zend_Search_Lucene_Search_Similarity
      *
      * @return Zend_Search_Lucene_Search_Similarity
      */
-    static public function getDefault()
+    public static function getDefault()
     {
         if (!self::$_defaultImpl instanceof Zend_Search_Lucene_Search_Similarity) {
             self::$_defaultImpl = new Zend_Search_Lucene_Search_Similarity_Default();
@@ -381,7 +381,7 @@ abstract class Zend_Search_Lucene_Search_Similarity
      * @param integer $byte
      * @return float
      */
-    static public function decodeNorm($byte)
+    public static function decodeNorm($byte)
     {
         return self::$_normTable[$byte & 0xFF];
     }
@@ -412,7 +412,7 @@ abstract class Zend_Search_Lucene_Search_Similarity
      * @param integer $b
      * @return float
      */
-    static private function _floatToByte($f)
+    private static function _floatToByte($f)
     {
         // round negatives up to zero
         if ($f <= 0.0) {
@@ -495,10 +495,10 @@ abstract class Zend_Search_Lucene_Search_Similarity
      * Returns a score factor for the term
      *
      * @param mixed $input
-     * @param Zend_Search_Lucene $reader
+     * @param Zend_Search_Lucene_Interface $reader
      * @return a score factor for the term
      */
-    public function idf($input, $reader)
+    public function idf($input, Zend_Search_Lucene_Interface $reader)
     {
         if (!is_array($input)) {
             return $this->idfFreq($reader->docFreq($input), $reader->count());
index 6cafb59668d54887e5c431060794d5928a9ccdb8..8263f2a72f8347495cdd6f54548c714e6c570dde 100644 (file)
  * @category   Zend
  * @package    Zend_Search_Lucene
  * @subpackage Search
- * @copyright  Copyright (c) 2006 Zend Technologies USA Inc. (http://www.zend.com)
+ * @copyright  Copyright (c) 2005-2007 Zend Technologies USA Inc. (http://www.zend.com)
  * @license    http://framework.zend.com/license/new-bsd     New BSD License
  */
 
 
+/** Zend_Search_Lucene_Search_Similarity */
+require_once $CFG->dirroot.'/search/Zend/Search/Lucene/Search/Similarity.php';
+
+
 /**
  * @category   Zend
  * @package    Zend_Search_Lucene
  * @subpackage Search
- * @copyright  Copyright (c) 2006 Zend Technologies USA Inc. (http://www.zend.com)
+ * @copyright  Copyright (c) 2005-2007 Zend Technologies USA Inc. (http://www.zend.com)
  * @license    http://framework.zend.com/license/new-bsd     New BSD License
  */
 class Zend_Search_Lucene_Search_Similarity_Default extends Zend_Search_Lucene_Search_Similarity
index 248f5cb2b41b2edb6d69e9152a6b1e89002f00f2..2faba2861bf620f36d1af6cc35cdc917a4b3d60f 100644 (file)
@@ -15,7 +15,7 @@
  * @category   Zend
  * @package    Zend_Search_Lucene
  * @subpackage Search
- * @copyright  Copyright (c) 2006 Zend Technologies USA Inc. (http://www.zend.com)
+ * @copyright  Copyright (c) 2005-2007 Zend Technologies USA Inc. (http://www.zend.com)
  * @license    http://framework.zend.com/license/new-bsd     New BSD License
  */
 
  * @category   Zend
  * @package    Zend_Search_Lucene
  * @subpackage Search
- * @copyright  Copyright (c) 2006 Zend Technologies USA Inc. (http://www.zend.com)
+ * @copyright  Copyright (c) 2005-2007 Zend Technologies USA Inc. (http://www.zend.com)
  * @license    http://framework.zend.com/license/new-bsd     New BSD License
  */
 abstract class Zend_Search_Lucene_Search_Weight
 {
+    /**
+     * Normalization factor.
+     * This value is stored only for query expanation purpose and not used in any other place
+     *
+     * @var float
+     */
+    protected $_queryNorm;
+
+    /**
+     * Weight value
+     *
+     * Weight value may be initialized in sumOfSquaredWeights() or normalize()
+     * because they both are invoked either in Query::_initWeight (for top-level query) or
+     * in corresponding methods of parent query's weights
+     *
+     * @var float
+     */
+    protected $_value;
+
+
     /**
      * The weight for this query.
      *
      * @return float
      */
-    abstract public function getValue();
+    public function getValue()
+    {
+        return $this->_value;
+    }
 
     /**
      * The sum of squared weights of contained query clauses.
diff --git a/search/Zend/Search/Lucene/Search/Weight/Boolean.php b/search/Zend/Search/Lucene/Search/Weight/Boolean.php
new file mode 100644 (file)
index 0000000..7a42ed2
--- /dev/null
@@ -0,0 +1,136 @@
+<?php
+/**
+ * Zend Framework
+ *
+ * LICENSE
+ *
+ * This source file is subject to the new BSD license that is bundled
+ * with this package in the file LICENSE.txt.
+ * It is also available through the world-wide-web at this URL:
+ * http://framework.zend.com/license/new-bsd
+ * If you did not receive a copy of the license and are unable to
+ * obtain it through the world-wide-web, please send an email
+ * to license@zend.com so we can send you a copy immediately.
+ *
+ * @category   Zend
+ * @package    Zend_Search_Lucene
+ * @subpackage Search
+ * @copyright  Copyright (c) 2005-2007 Zend Technologies USA Inc. (http://www.zend.com)
+ * @license    http://framework.zend.com/license/new-bsd     New BSD License
+ */
+
+
+/** Zend_Search_Lucene_Search_Weight */
+require_once $CFG->dirroot.'/search/Zend/Search/Lucene/Search/Weight.php';
+
+
+/**
+ * @category   Zend
+ * @package    Zend_Search_Lucene
+ * @subpackage Search
+ * @copyright  Copyright (c) 2005-2007 Zend Technologies USA Inc. (http://www.zend.com)
+ * @license    http://framework.zend.com/license/new-bsd     New BSD License
+ */
+class Zend_Search_Lucene_Search_Weight_Boolean extends Zend_Search_Lucene_Search_Weight
+{
+    /**
+     * IndexReader.
+     *
+     * @var Zend_Search_Lucene_Interface
+     */
+    private $_reader;
+
+    /**
+     * The query that this concerns.
+     *
+     * @var Zend_Search_Lucene_Search_Query
+     */
+    private $_query;
+
+    /**
+     * Queries weights
+     * Array of Zend_Search_Lucene_Search_Weight
+     *
+     * @var array
+     */
+    private $_weights;
+
+
+    /**
+     * Zend_Search_Lucene_Search_Weight_Boolean constructor
+     * query - the query that this concerns.
+     * reader - index reader
+     *
+     * @param Zend_Search_Lucene_Search_Query $query
+     * @param Zend_Search_Lucene_Interface    $reader
+     */
+    public function __construct(Zend_Search_Lucene_Search_Query $query,
+                                Zend_Search_Lucene_Interface    $reader)
+    {
+        $this->_query   = $query;
+        $this->_reader  = $reader;
+        $this->_weights = array();
+
+        $signs = $query->getSigns();
+
+        foreach ($query->getSubqueries() as $num => $subquery) {
+            if ($signs === null || $signs[$num] === null || $signs[$num]) {
+                $this->_weights[$num] = $subquery->createWeight($reader);
+            }
+        }
+    }
+
+
+    /**
+     * The weight for this query
+     * Standard Weight::$_value is not used for boolean queries
+     *
+     * @return float
+     */
+    public function getValue()
+    {
+        return $this->_query->getBoost();
+    }
+
+
+    /**
+     * The sum of squared weights of contained query clauses.
+     *
+     * @return float
+     */
+    public function sumOfSquaredWeights()
+    {
+        $sum = 0;
+        foreach ($this->_weights as $weight) {
+            // sum sub weights
+            $sum += $weight->sumOfSquaredWeights();
+        }
+
+        // boost each sub-weight
+        $sum *= $this->_query->getBoost() * $this->_query->getBoost();
+
+        // check for empty query (like '-something -another')
+        if ($sum == 0) {
+            $sum = 1.0;
+        }
+        return $sum;
+    }
+
+
+    /**
+     * Assigns the query normalization factor to this.
+     *
+     * @param float $queryNorm
+     */
+    public function normalize($queryNorm)
+    {
+        // incorporate boost
+        $queryNorm *= $this->_query->getBoost();
+
+        foreach ($this->_weights as $weight) {
+            $weight->normalize($queryNorm);
+        }
+    }
+}
+
+
diff --git a/search/Zend/Search/Lucene/Search/Weight/Empty.php b/search/Zend/Search/Lucene/Search/Weight/Empty.php
new file mode 100644 (file)
index 0000000..00c1bce
--- /dev/null
@@ -0,0 +1,56 @@
+<?php
+/**
+ * Zend Framework
+ *
+ * LICENSE
+ *
+ * This source file is subject to the new BSD license that is bundled
+ * with this package in the file LICENSE.txt.
+ * It is also available through the world-wide-web at this URL:
+ * http://framework.zend.com/license/new-bsd
+ * If you did not receive a copy of the license and are unable to
+ * obtain it through the world-wide-web, please send an email
+ * to license@zend.com so we can send you a copy immediately.
+ *
+ * @category   Zend
+ * @package    Zend_Search_Lucene
+ * @subpackage Search
+ * @copyright  Copyright (c) 2005-2007 Zend Technologies USA Inc. (http://www.zend.com)
+ * @license    http://framework.zend.com/license/new-bsd     New BSD License
+ */
+
+
+/** Zend_Search_Lucene_Search_Weight */
+require_once $CFG->dirroot.'/search/Zend/Search/Lucene/Search/Weight.php';
+
+
+/**
+ * @category   Zend
+ * @package    Zend_Search_Lucene
+ * @subpackage Search
+ * @copyright  Copyright (c) 2005-2007 Zend Technologies USA Inc. (http://www.zend.com)
+ * @license    http://framework.zend.com/license/new-bsd     New BSD License
+ */
+class Zend_Search_Lucene_Search_Weight_Empty extends Zend_Search_Lucene_Search_Weight
+{
+    /**
+     * The sum of squared weights of contained query clauses.
+     *
+     * @return float
+     */
+    public function sumOfSquaredWeights()
+    {
+        return 1;
+    }
+
+
+    /**
+     * Assigns the query normalization factor to this.
+     *
+     * @param float $queryNorm
+     */
+    public function normalize($queryNorm)
+    {
+    }
+}
+
index 448bb064ebcfd6274a6d8358a90931a8231e9dc4..122cfa4035a1d523b869b7fa94eb7cc66156eed3 100644 (file)
  * @category   Zend
  * @package    Zend_Search_Lucene
  * @subpackage Search
- * @copyright  Copyright (c) 2006 Zend Technologies USA Inc. (http://www.zend.com)
+ * @copyright  Copyright (c) 2005-2007 Zend Technologies USA Inc. (http://www.zend.com)
  * @license    http://framework.zend.com/license/new-bsd     New BSD License
  */
 
 
 /** Zend_Search_Lucene_Search_Weight */
-require_once 'Zend/Search/Lucene/Search/Weight.php';
+require_once $CFG->dirroot.'/search/Zend/Search/Lucene/Search/Weight.php';
 
 
 /**
  * @category   Zend
  * @package    Zend_Search_Lucene
  * @subpackage Search
- * @copyright  Copyright (c) 2006 Zend Technologies USA Inc. (http://www.zend.com)
+ * @copyright  Copyright (c) 2005-2007 Zend Technologies USA Inc. (http://www.zend.com)
  * @license    http://framework.zend.com/license/new-bsd     New BSD License
  */
 class Zend_Search_Lucene_Search_Weight_MultiTerm extends Zend_Search_Lucene_Search_Weight
@@ -36,14 +36,14 @@ class Zend_Search_Lucene_Search_Weight_MultiTerm extends Zend_Search_Lucene_Sear
     /**
      * IndexReader.
      *
-     * @var Zend_Search_Lucene
+     * @var Zend_Search_Lucene_Interface
      */
     private $_reader;
 
     /**
      * The query that this concerns.
      *
-     * @var Zend_Search_Lucene_Search_Query_MultiTerm
+     * @var Zend_Search_Lucene_Search_Query
      */
     private $_query;
 
@@ -61,10 +61,11 @@ class Zend_Search_Lucene_Search_Weight_MultiTerm extends Zend_Search_Lucene_Sear
      * query - the query that this concerns.
      * reader - index reader
      *
-     * @param Zend_Search_Lucene_Search_Query_MultiTerm $query
-     * @param Zend_Search_Lucene $reader
+     * @param Zend_Search_Lucene_Search_Query $query
+     * @param Zend_Search_Lucene_Interface    $reader
      */
-    public function __construct($query, $reader)
+    public function __construct(Zend_Search_Lucene_Search_Query $query,
+                                Zend_Search_Lucene_Interface    $reader)
     {
         $this->_query   = $query;
         $this->_reader  = $reader;
@@ -72,10 +73,10 @@ class Zend_Search_Lucene_Search_Weight_MultiTerm extends Zend_Search_Lucene_Sear
 
         $signs = $query->getSigns();
 
-        foreach ($query->getTerms() as $num => $term) {
-            if ($signs === null || $signs[$num] === null || $signs[$num]) {
-                $this->_weights[$num] = new Zend_Search_Lucene_Search_Weight_Term($term, $query, $reader);
-                $query->setWeight($num, $this->_weights[$num]);
+        foreach ($query->getTerms() as $id => $term) {
+            if ($signs === null || $signs[$id] === null || $signs[$id]) {
+                $this->_weights[$id] = new Zend_Search_Lucene_Search_Weight_Term($term, $query, $reader);
+                $query->setWeight($id, $this->_weights[$id]);
             }
         }
     }
@@ -83,6 +84,7 @@ class Zend_Search_Lucene_Search_Weight_MultiTerm extends Zend_Search_Lucene_Sear
 
     /**
      * The weight for this query
+     * Standard Weight::$_value is not used for boolean queries
      *
      * @return float
      */
index 536659614cb1fe5dff044f09b6e8ee42ce621578..7faaa7ad130ea99a3cbe4f964ec525c23590fe94 100644 (file)
@@ -15,7 +15,7 @@
  * @category   Zend
  * @package    Zend_Search_Lucene
  * @subpackage Search
- * @copyright  Copyright (c) 2006 Zend Technologies USA Inc. (http://www.zend.com)
+ * @copyright  Copyright (c) 2005-2007 Zend Technologies USA Inc. (http://www.zend.com)
  * @license    http://framework.zend.com/license/new-bsd     New BSD License
  */
 
 /**
  * Zend_Search_Lucene_Search_Weight
  */
-require_once 'Zend/Search/Lucene/Search/Weight.php';
+require_once $CFG->dirroot.'/search/Zend/Search/Lucene/Search/Weight.php';
 
 
 /**
  * @category   Zend
  * @package    Zend_Search_Lucene
  * @subpackage Search
- * @copyright  Copyright (c) 2006 Zend Technologies USA Inc. (http://www.zend.com)
+ * @copyright  Copyright (c) 2005-2007 Zend Technologies USA Inc. (http://www.zend.com)
  * @license    http://framework.zend.com/license/new-bsd     New BSD License
  */
 class Zend_Search_Lucene_Search_Weight_Phrase extends Zend_Search_Lucene_Search_Weight
@@ -38,7 +38,7 @@ class Zend_Search_Lucene_Search_Weight_Phrase extends Zend_Search_Lucene_Search_
     /**
      * IndexReader.
      *
-     * @var Zend_Search_Lucene
+     * @var Zend_Search_Lucene_Interface
      */
     private $_reader;
 
@@ -49,13 +49,6 @@ class Zend_Search_Lucene_Search_Weight_Phrase extends Zend_Search_Lucene_Search_
      */
     private $_query;
 
-    /**
-     * Weight value
-     *
-     * @var float
-     */
-    private $_value;
-
     /**
      * Score factor
      *
@@ -63,46 +56,19 @@ class Zend_Search_Lucene_Search_Weight_Phrase extends Zend_Search_Lucene_Search_
      */
     private $_idf;
 
-    /**
-     * Normalization factor
-     *
-     * @var float
-     */
-    private $_queryNorm;
-
-
-    /**
-     * Query weight
-     *
-     * @var float
-     */
-    private $_queryWeight;
-
-
     /**
      * Zend_Search_Lucene_Search_Weight_Phrase constructor
      *
      * @param Zend_Search_Lucene_Search_Query_Phrase $query
-     * @param Zend_Search_Lucene $reader
+     * @param Zend_Search_Lucene_Interface           $reader
      */
-    public function __construct(Zend_Search_Lucene_Search_Query_Phrase $query, Zend_Search_Lucene $reader)
+    public function __construct(Zend_Search_Lucene_Search_Query_Phrase $query,
+                                Zend_Search_Lucene_Interface           $reader)
     {
         $this->_query  = $query;
         $this->_reader = $reader;
     }
 
-
-    /**
-     * The weight for this query
-     *
-     * @return float
-     */
-    public function getValue()
-    {
-        return $this->_value;
-    }
-
-
     /**
      * The sum of squared weights of contained query clauses.
      *
index d502896a5b72882a892289c6e23f45f023579a51..478cb2a8058c1fe7ff18ec6a20887c03a7a4d3f6 100644 (file)
  * @category   Zend
  * @package    Zend_Search_Lucene
  * @subpackage Search
- * @copyright  Copyright (c) 2006 Zend Technologies USA Inc. (http://www.zend.com)
+ * @copyright  Copyright (c) 2005-2007 Zend Technologies USA Inc. (http://www.zend.com)
  * @license    http://framework.zend.com/license/new-bsd     New BSD License
  */
 
 
 /** Zend_Search_Lucene_Search_Weight */
-require_once 'Zend/Search/Lucene/Search/Weight.php';
+require_once $CFG->dirroot.'/search/Zend/Search/Lucene/Search/Weight.php';
 
 
 /**
  * @category   Zend
  * @package    Zend_Search_Lucene
  * @subpackage Search
- * @copyright  Copyright (c) 2006 Zend Technologies USA Inc. (http://www.zend.com)
+ * @copyright  Copyright (c) 2005-2007 Zend Technologies USA Inc. (http://www.zend.com)
  * @license    http://framework.zend.com/license/new-bsd     New BSD License
  */
 class Zend_Search_Lucene_Search_Weight_Term extends Zend_Search_Lucene_Search_Weight
@@ -36,7 +36,7 @@ class Zend_Search_Lucene_Search_Weight_Term extends Zend_Search_Lucene_Search_We
     /**
      * IndexReader.
      *
-     * @var Zend_Search_Lucene
+     * @var Zend_Search_Lucene_Interface
      */
     private $_reader;
 
@@ -54,13 +54,6 @@ class Zend_Search_Lucene_Search_Weight_Term extends Zend_Search_Lucene_Search_We
      */
     private $_query;
 
-    /**
-     * Weight value
-     *
-     * @var float
-     */
-    private $_value;
-
     /**
      * Score factor
      *
@@ -68,14 +61,6 @@ class Zend_Search_Lucene_Search_Weight_Term extends Zend_Search_Lucene_Search_We
      */
     private $_idf;
 
-    /**
-     * Normalization factor
-     *
-     * @var float
-     */
-    private $_queryNorm;
-
-
     /**
      * Query weight
      *
@@ -88,9 +73,13 @@ class Zend_Search_Lucene_Search_Weight_Term extends Zend_Search_Lucene_Search_We
      * Zend_Search_Lucene_Search_Weight_Term constructor
      * reader - index reader
      *
-     * @param Zend_Search_Lucene $reader
+     * @param Zend_Search_Lucene_Index_Term   $term
+     * @param Zend_Search_Lucene_Search_Query $query
+     * @param Zend_Search_Lucene_Interface    $reader
      */
-    public function __construct($term, $query, $reader)
+    public function __construct(Zend_Search_Lucene_Index_Term   $term,
+                                Zend_Search_Lucene_Search_Query $query,
+                                Zend_Search_Lucene_Interface    $reader)
     {
         $this->_term   = $term;
         $this->_query  = $query;
@@ -98,17 +87,6 @@ class Zend_Search_Lucene_Search_Weight_Term extends Zend_Search_Lucene_Search_We
     }
 
 
-    /**
-     * The weight for this query
-     *
-     * @return float
-     */
-    public function getValue()
-    {
-        return $this->_value;
-    }
-
-
     /**
      * The sum of squared weights of contained query clauses.
      *
index 01ea380e3c8e30dbdd827e4b7c21ac5696612f98..67e2b295f528de5b860770936a37501f31430e9b 100644 (file)
@@ -15,7 +15,7 @@
  * @category   Zend
  * @package    Zend_Search_Lucene
  * @subpackage Storage
- * @copyright  Copyright (c) 2006 Zend Technologies USA Inc. (http://www.zend.com)
+ * @copyright  Copyright (c) 2005-2007 Zend Technologies USA Inc. (http://www.zend.com)
  * @license    http://framework.zend.com/license/new-bsd     New BSD License
  */
 
@@ -24,7 +24,7 @@
  * @category   Zend
  * @package    Zend_Search_Lucene
  * @subpackage Storage
- * @copyright  Copyright (c) 2006 Zend Technologies USA Inc. (http://www.zend.com)
+ * @copyright  Copyright (c) 2005-2007 Zend Technologies USA Inc. (http://www.zend.com)
  * @license    http://framework.zend.com/license/new-bsd     New BSD License
  */
 abstract class Zend_Search_Lucene_Storage_Directory
@@ -111,10 +111,16 @@ abstract class Zend_Search_Lucene_Storage_Directory
     /**
      * Returns a Zend_Search_Lucene_Storage_File object for a given $filename in the directory.
      *
+     * If $shareHandler option is true, then file handler can be shared between File Object
+     * requests. It speed-ups performance, but makes problems with file position.
+     * Shared handler are good for short atomic requests.
+     * Non-shared handlers are useful for stream file reading (especial for compound files).
+     *
      * @param string $filename
+     * @param boolean $shareHandler
      * @return Zend_Search_Lucene_Storage_File
      */
-    abstract public function getFileObject($filename);
+    abstract public function getFileObject($filename, $shareHandler = true);
 
 }
 
index 8d675c35b5d9367ea504846ff34b34eea8ecd9e2..7ac76810ab6a5d1d095f0becad20169c2cf15521 100644 (file)
  * @category   Zend
  * @package    Zend_Search_Lucene
  * @subpackage Storage
- * @copyright  Copyright (c) 2006 Zend Technologies USA Inc. (http://www.zend.com)
+ * @copyright  Copyright (c) 2005-2007 Zend Technologies USA Inc. (http://www.zend.com)
  * @license    http://framework.zend.com/license/new-bsd     New BSD License
  */
 
 
 /** Zend_Search_Lucene_Storage_Directory */
-require_once 'Zend/Search/Lucene/Storage/Directory.php';
+require_once $CFG->dirroot.'/search/Zend/Search/Lucene/Storage/Directory.php';
 
 /** Zend_Search_Lucene_Storage_File_Filesystem */
-require_once 'Zend/Search/Lucene/Storage/File/Filesystem.php';
+require_once $CFG->dirroot.'/search/Zend/Search/Lucene/Storage/File/Filesystem.php';
 
 
 /**
@@ -33,7 +33,7 @@ require_once 'Zend/Search/Lucene/Storage/File/Filesystem.php';
  * @category   Zend
  * @package    Zend_Search_Lucene
  * @subpackage Storage
- * @copyright  Copyright (c) 2006 Zend Technologies USA Inc. (http://www.zend.com)
+ * @copyright  Copyright (c) 2005-2007 Zend Technologies USA Inc. (http://www.zend.com)
  * @license    http://framework.zend.com/license/new-bsd     New BSD License
  */
 class Zend_Search_Lucene_Storage_Directory_Filesystem extends Zend_Search_Lucene_Storage_Directory
@@ -64,7 +64,7 @@ class Zend_Search_Lucene_Storage_Directory_Filesystem extends Zend_Search_Lucene
      * @return boolean
      */
 
-    static public function mkdirs($dir, $mode = 0777, $recursive = true)
+    public static function mkdirs($dir, $mode = 0777, $recursive = true)
     {
         if (is_null($dir) || $dir === '') {
             return false;
@@ -113,7 +113,7 @@ class Zend_Search_Lucene_Storage_Directory_Filesystem extends Zend_Search_Lucene
             $fileObject->close();
         }
 
-        unset($this->_fileHandlers);
+        $this->_fileHandlers = array();
     }
 
 
@@ -127,15 +127,14 @@ class Zend_Search_Lucene_Storage_Directory_Filesystem extends Zend_Search_Lucene
         $result = array();
 
         $dirContent = opendir( $this->_dirPath );
-        while ($file = readdir($dirContent)) {
+        while (($file = readdir($dirContent)) !== false) {
             if (($file == '..')||($file == '.'))   continue;
 
-            $fullName = $this->_dirPath . '/' . $file;
-
             if( !is_dir($this->_dirPath . '/' . $file) ) {
                 $result[] = $file;
             }
         }
+        closedir($dirContent);
 
         return $result;
     }
@@ -165,11 +164,17 @@ class Zend_Search_Lucene_Storage_Directory_Filesystem extends Zend_Search_Lucene
      */
     public function deleteFile($filename)
     {
+        /**
+         * @todo add support of "deletable" file
+         * "deletable" is used on Windows systems if file can't be deleted
+         * (while it is still open).
+         */
+
         if (isset($this->_fileHandlers[$filename])) {
             $this->_fileHandlers[$filename]->close();
         }
         unset($this->_fileHandlers[$filename]);
-        unlink($this->_dirPath .'/'. $filename);
+        unlink($this->_dirPath . '/' . $filename);
     }
 
 
@@ -219,24 +224,40 @@ class Zend_Search_Lucene_Storage_Directory_Filesystem extends Zend_Search_Lucene
      * @param string $from
      * @param string $to
      * @return void
+     * @throws Zend_Search_Lucene_Exception
      */
     public function renameFile($from, $to)
     {
-        if ($this->_fileHandlers[$from] !== null) {
+        global $php_errormsg;
+
+        if (isset($this->_fileHandlers[$from])) {
             $this->_fileHandlers[$from]->close();
         }
         unset($this->_fileHandlers[$from]);
 
-        if ($this->_fileHandlers[$to] !== null) {
+        if (isset($this->_fileHandlers[$to])) {
             $this->_fileHandlers[$to]->close();
         }
         unset($this->_fileHandlers[$to]);
 
         if (file_exists($this->_dirPath . '/' . $to)) {
-            unlink($this->_dirPath . '/' . $to);
+            if (!unlink($this->_dirPath . '/' . $to)) {
+                throw new Zend_Search_Lucene_Exception('Delete operation failed');
+            }
+        }
+
+        $trackErrors = ini_get('track_errors');
+        ini_set('track_errors', '1');
+
+        $success = @rename($this->_dirPath . '/' . $from, $this->_dirPath . '/' . $to);
+        if (!$success) {
+            ini_set('track_errors', $trackErrors);
+            throw new Zend_Search_Lucene_Exception($php_errormsg);
         }
 
-        return @rename($this->_dirPath . '/' . $from, $this->_dirPath . '/' . $to);
+        ini_set('track_errors', $trackErrors);
+
+        return $success;
     }
 
 
@@ -255,17 +276,29 @@ class Zend_Search_Lucene_Storage_Directory_Filesystem extends Zend_Search_Lucene
     /**
      * Returns a Zend_Search_Lucene_Storage_File object for a given $filename in the directory.
      *
+     * If $shareHandler option is true, then file handler can be shared between File Object
+     * requests. It speed-ups performance, but makes problems with file position.
+     * Shared handler are good for short atomic requests.
+     * Non-shared handlers are useful for stream file reading (especial for compound files).
+     *
      * @param string $filename
+     * @param boolean $shareHandler
      * @return Zend_Search_Lucene_Storage_File
      */
-    public function getFileObject($filename)
+    public function getFileObject($filename, $shareHandler = true)
     {
+        $fullFilename = $this->_dirPath . '/' . $filename;
+
+        if (!$shareHandler) {
+            return new Zend_Search_Lucene_Storage_File_Filesystem($fullFilename);
+        }
+
         if (isset( $this->_fileHandlers[$filename] )) {
             $this->_fileHandlers[$filename]->seek(0);
             return $this->_fileHandlers[$filename];
         }
 
-        $this->_fileHandlers[$filename] = new Zend_Search_Lucene_Storage_File_Filesystem($this->_dirPath . '/' . $filename);
+        $this->_fileHandlers[$filename] = new Zend_Search_Lucene_Storage_File_Filesystem($fullFilename);
         return $this->_fileHandlers[$filename];
     }
 }
index 5a195ae85f4063f91111485b2d6b5c5d321a6c66..8370c719db7047bc337fb3f3a906dba89c400049 100644 (file)
  * @category   Zend
  * @package    Zend_Search_Lucene
  * @subpackage Storage
- * @copyright  Copyright (c) 2006 Zend Technologies USA Inc. (http://www.zend.com)
+ * @copyright  Copyright (c) 2005-2007 Zend Technologies USA Inc. (http://www.zend.com)
  * @license    http://framework.zend.com/license/new-bsd     New BSD License
  */
 
 
 
 /** Zend_Search_Lucene_Exception */
-require_once 'Zend/Search/Lucene/Exception.php';
+require_once $CFG->dirroot.'/search/Zend/Search/Lucene/Exception.php';
 
 
 /**
  * @category   Zend
  * @package    Zend_Search_Lucene
  * @subpackage Storage
- * @copyright  Copyright (c) 2006 Zend Technologies USA Inc. (http://www.zend.com)
+ * @copyright  Copyright (c) 2005-2007 Zend Technologies USA Inc. (http://www.zend.com)
  * @license    http://framework.zend.com/license/new-bsd     New BSD License
  */
 abstract class Zend_Search_Lucene_Storage_File
@@ -69,6 +69,15 @@ abstract class Zend_Search_Lucene_Storage_File
      */
     abstract public function tell();
 
+    /**
+     * Flush output.
+     *
+     * Returns true on success or false on failure.
+     *
+     * @return boolean
+     */
+    abstract public function flush();
+
     /**
      * Writes $length number of bytes (all, if $length===null) to the end
      * of the file.
@@ -78,6 +87,20 @@ abstract class Zend_Search_Lucene_Storage_File
      */
     abstract protected function _fwrite($data, $length=null);
 
+    /**
+     * Lock file
+     *
+     * Lock type may be a LOCK_SH (shared lock) or a LOCK_EX (exclusive lock)
+     *
+     * @param integer $lockType
+     * @return boolean
+     */
+    abstract public function lock($lockType, $nonBlockinLock = false);
+
+    /**
+     * Unlock file
+     */
+    abstract public function unlock();
 
     /**
      * Reads a byte from the current position in the file
@@ -401,4 +424,4 @@ abstract class Zend_Search_Lucene_Storage_File
     {
         return $this->_fread($this->readVInt());
     }
-}
\ No newline at end of file
+}
index 7c33543dd6e27ebd8724b064ee6c4275613b739a..1f2097eb4f67d7eab5dacb512a1db7b9ce1d826b 100644 (file)
  * @category   Zend
  * @package    Zend_Search_Lucene
  * @subpackage Storage
- * @copyright  Copyright (c) 2006 Zend Technologies USA Inc. (http://www.zend.com)
+ * @copyright  Copyright (c) 2005-2007 Zend Technologies USA Inc. (http://www.zend.com)
  * @license    http://framework.zend.com/license/new-bsd     New BSD License
  */
 
 
 /** Zend_Search_Lucene_Storage_File */
-require_once 'Zend/Search/Lucene/Storage/File.php';
+require_once $CFG->dirroot.'/search/Zend/Search/Lucene/Storage/File.php';
 
 /** Zend_Search_Lucene_Exception */
-require_once 'Zend/Search/Lucene/Exception.php';
+require_once $CFG->dirroot.'/search/Zend/Search/Lucene/Exception.php';
 
 
 /**
  * @category   Zend
  * @package    Zend_Search_Lucene
  * @subpackage Storage
- * @copyright  Copyright (c) 2006 Zend Technologies USA Inc. (http://www.zend.com)
+ * @copyright  Copyright (c) 2005-2007 Zend Technologies USA Inc. (http://www.zend.com)
  * @license    http://framework.zend.com/license/new-bsd     New BSD License
  */
 class Zend_Search_Lucene_Storage_File_Filesystem extends Zend_Search_Lucene_Storage_File
@@ -53,12 +53,12 @@ class Zend_Search_Lucene_Storage_File_Filesystem extends Zend_Search_Lucene_Stor
     {
         global $php_errormsg;
 
-        $trackErrors = ini_get( "track_errors");
+        $trackErrors = ini_get('track_errors');
         ini_set('track_errors', '1');
 
         $this->_fileHandle = @fopen($filename, $mode);
 
-        if ($this->_fileHandle===false) {
+        if ($this->_fileHandle === false) {
             ini_set('track_errors', $trackErrors);
             throw new Zend_Search_Lucene_Exception($php_errormsg);
         }
@@ -100,6 +100,17 @@ class Zend_Search_Lucene_Storage_File_Filesystem extends Zend_Search_Lucene_Stor
         return ftell($this->_fileHandle);
     }
 
+    /**
+     * Flush output.
+     *
+     * Returns true on success or false on failure.
+     *
+     * @return boolean
+     */
+    public function flush()
+    {
+        return fflush($this->_fileHandle);
+    }
 
     /**
      * Close File object
@@ -167,5 +178,39 @@ class Zend_Search_Lucene_Storage_File_Filesystem extends Zend_Search_Lucene_Stor
             fwrite($this->_fileHandle, $data, $length);
         }
     }
+
+    /**
+     * Lock file
+     *
+     * Lock type may be a LOCK_SH (shared lock) or a LOCK_EX (exclusive lock)
+     *
+     * @param integer $lockType
+     * @param boolean $nonBlockinLock
+     * @return boolean
+     */
+    public function lock($lockType, $nonBlockinLock = false)
+    {
+        if ($nonBlockinLock) {
+            return flock($this->_fileHandle, $lockType | LOCK_NB);
+        } else {
+            return flock($this->_fileHandle, $lockType);
+        }
+    }
+
+    /**
+     * Unlock file
+     *
+     * Returns true on success
+     *
+     * @return boolean
+     */
+    public function unlock()
+    {
+        if ($this->_fileHandle !== null ) {
+            return flock($this->_fileHandle, LOCK_UN);
+        } else {
+            return true;
+        }
+    }
 }
 
diff --git a/search/Zend/Search/Lucene/Storage/File/Memory.php b/search/Zend/Search/Lucene/Storage/File/Memory.php
new file mode 100644 (file)
index 0000000..e3567c6
--- /dev/null
@@ -0,0 +1,555 @@
+<?php
+/**
+ * Zend Framework
+ *
+ * LICENSE
+ *
+ * This source file is subject to the new BSD license that is bundled
+ * with this package in the file LICENSE.txt.
+ * It is also available through the world-wide-web at this URL:
+ * http://framework.zend.com/license/new-bsd
+ * If you did not receive a copy of the license and are unable to
+ * obtain it through the world-wide-web, please send an email
+ * to license@zend.com so we can send you a copy immediately.
+ *
+ * @category   Zend
+ * @package    Zend_Search_Lucene
+ * @subpackage Storage
+ * @copyright  Copyright (c) 2005-2007 Zend Technologies USA Inc. (http://www.zend.com)
+ * @license    http://framework.zend.com/license/new-bsd     New BSD License
+ */
+
+
+/** Zend_Search_Lucene_Storage_File */
+require_once $CFG->dirroot.'/search/Zend/Search/Lucene/Storage/File.php';
+
+/** Zend_Search_Lucene_Exception */
+require_once $CFG->dirroot.'/search/Zend/Search/Lucene/Exception.php';
+
+
+/**
+ * @category   Zend
+ * @package    Zend_Search_Lucene
+ * @subpackage Storage
+ * @copyright  Copyright (c) 2005-2007 Zend Technologies USA Inc. (http://www.zend.com)
+ * @license    http://framework.zend.com/license/new-bsd     New BSD License
+ */
+class Zend_Search_Lucene_Storage_File_Memory extends Zend_Search_Lucene_Storage_File
+{
+    /**
+     * FileData
+     *
+     * @var string
+     */
+    private $_data;
+
+    /**
+     * File Position
+     *
+     * @var integer
+     */
+    private $_position = 0;
+
+
+    /**
+     * Object constractor
+     *
+     * @param string $data
+     */
+    public function __construct($data)
+    {
+        $this->_data = $data;
+    }
+
+    /**
+     * Reads $length number of bytes at the current position in the
+     * file and advances the file pointer.
+     *
+     * @param integer $length
+     * @return string
+     */
+    protected function _fread($length = 1)
+    {
+        $returnValue = substr($this->_data, $this->_position, $length);
+        $this->_position += $length;
+        return $returnValue;
+    }
+
+
+    /**
+     * Sets the file position indicator and advances the file pointer.
+     * The new position, measured in bytes from the beginning of the file,
+     * is obtained by adding offset to the position specified by whence,
+     * whose values are defined as follows:
+     * SEEK_SET - Set position equal to offset bytes.
+     * SEEK_CUR - Set position to current location plus offset.
+     * SEEK_END - Set position to end-of-file plus offset. (To move to
+     * a position before the end-of-file, you need to pass a negative value
+     * in offset.)
+     * Upon success, returns 0; otherwise, returns -1
+     *
+     * @param integer $offset
+     * @param integer $whence
+     * @return integer
+     */
+    public function seek($offset, $whence=SEEK_SET)
+    {
+        switch ($whence) {
+            case SEEK_SET:
+                $this->_position = $offset;
+                break;
+
+            case SEEK_CUR:
+                $this->_position += $offset;
+                break;
+
+            case SEEK_END:
+                $this->_position = strlen($this->_data);
+                $this->_position += $offset;
+                break;
+
+            default:
+                break;
+        }
+    }
+
+    /**
+     * Get file position.
+     *
+     * @return integer
+     */
+    public function tell()
+    {
+        return $this->_position;
+    }
+
+    /**
+     * Flush output.
+     *
+     * Returns true on success or false on failure.
+     *
+     * @return boolean
+     */
+    public function flush()
+    {
+        // Do nothing
+
+        return true;
+    }
+
+    /**
+     * Writes $length number of bytes (all, if $length===null) to the end
+     * of the file.
+     *
+     * @param string $data
+     * @param integer $length
+     */
+    protected function _fwrite($data, $length=null)
+    {
+        // We do not need to check if file position points to the end of "file".
+        // Only append operation is supported now
+
+        if ($length !== null) {
+            $this->_data .= substr($data, 0, $length);
+        } else {
+            $this->_data .= $data;
+        }
+
+        $this->_position = strlen($this->_data);
+    }
+
+    /**
+     * Lock file
+     *
+     * Lock type may be a LOCK_SH (shared lock) or a LOCK_EX (exclusive lock)
+     *
+     * @param integer $lockType
+     * @return boolean
+     */
+    public function lock($lockType, $nonBlockinLock = false)
+    {
+        // Memory files can't be shared
+        // do nothing
+
+        return true;
+    }
+
+    /**
+     * Unlock file
+     */
+    public function unlock()
+    {
+        // Memory files can't be shared
+        // do nothing
+    }
+
+    /**
+     * Reads a byte from the current position in the file
+     * and advances the file pointer.
+     *
+     * @return integer
+     */
+    public function readByte()
+    {
+        return ord($this->_data[$this->_position++]);
+    }
+
+    /**
+     * Writes a byte to the end of the file.
+     *
+     * @param integer $byte
+     */
+    public function writeByte($byte)
+    {
+        // We do not need to check if file position points to the end of "file".
+        // Only append operation is supported now
+
+        $this->_data .= chr($byte);
+        $this->_position = strlen($this->_data);
+
+        return 1;
+    }
+
+    /**
+     * Read num bytes from the current position in the file
+     * and advances the file pointer.
+     *
+     * @param integer $num
+     * @return string
+     */
+    public function readBytes($num)
+    {
+        $returnValue = substr($this->_data, $this->_position, $num);
+        $this->_position += $num;
+
+        return $returnValue;
+    }
+
+    /**
+     * Writes num bytes of data (all, if $num===null) to the end
+     * of the string.
+     *
+     * @param string $data
+     * @param integer $num
+     */
+    public function writeBytes($data, $num=null)
+    {
+        // We do not need to check if file position points to the end of "file".
+        // Only append operation is supported now
+
+        if ($num !== null) {
+            $this->_data .= substr($data, 0, $num);
+        } else {
+            $this->_data .= $data;
+        }
+
+        $this->_position = strlen($this->_data);
+    }
+
+
+    /**
+     * Reads an integer from the current position in the file
+     * and advances the file pointer.
+     *
+     * @return integer
+     */
+    public function readInt()
+    {
+        $str = substr($this->_data, $this->_position, 4);
+        $this->_position += 4;
+
+        return  ord($str{0}) << 24 |
+                ord($str{1}) << 16 |
+                ord($str{2}) << 8  |
+                ord($str{3});
+    }
+
+
+    /**
+     * Writes an integer to the end of file.
+     *
+     * @param integer $value
+     */
+    public function writeInt($value)
+    {
+        // We do not need to check if file position points to the end of "file".
+        // Only append operation is supported now
+
+        settype($value, 'integer');
+        $this->_data .= chr($value>>24 & 0xFF) .
+                        chr($value>>16 & 0xFF) .
+                        chr($value>>8  & 0xFF) .
+                        chr($value     & 0xFF);
+
+        $this->_position = strlen($this->_data);
+    }
+
+
+    /**
+     * Returns a long integer from the current position in the file
+     * and advances the file pointer.
+     *
+     * @return integer
+     * @throws Zend_Search_Lucene_Exception
+     */
+    public function readLong()
+    {
+        $str = substr($this->_data, $this->_position, 8);
+        $this->_position += 8;
+
+        /**
+         * Check, that we work in 64-bit mode.
+         * fseek() uses long for offset. Thus, largest index segment file size in 32bit mode is 2Gb
+         */
+        if (PHP_INT_SIZE > 4) {
+            return  ord($str{0}) << 56  |
+                    ord($str{1}) << 48  |
+                    ord($str{2}) << 40  |
+                    ord($str{3}) << 32  |
+                    ord($str{4}) << 24  |
+                    ord($str{5}) << 16  |
+                    ord($str{6}) << 8   |
+                    ord($str{7});
+        } else {
+            if ((ord($str{0})          != 0) ||
+                (ord($str{1})          != 0) ||
+                (ord($str{2})          != 0) ||
+                (ord($str{3})          != 0) ||
+                ((ord($str{0}) & 0x80) != 0)) {
+                     throw new Zend_Search_Lucene_Exception('Largest supported segment size (for 32-bit mode) is 2Gb');
+                 }
+
+            return  ord($str{4}) << 24  |
+                    ord($str{5}) << 16  |
+                    ord($str{6}) << 8   |
+                    ord($str{7});
+        }
+    }
+
+    /**
+     * Writes long integer to the end of file
+     *
+     * @param integer $value
+     * @throws Zend_Search_Lucene_Exception
+     */
+    public function writeLong($value)
+    {
+        // We do not need to check if file position points to the end of "file".
+        // Only append operation is supported now
+
+        /**
+         * Check, that we work in 64-bit mode.
+         * fseek() and ftell() use long for offset. Thus, largest index segment file size in 32bit mode is 2Gb
+         */
+        if (PHP_INT_SIZE > 4) {
+            settype($value, 'integer');
+            $this->_data .= chr($value>>56 & 0xFF) .
+                            chr($value>>48 & 0xFF) .
+                            chr($value>>40 & 0xFF) .
+                            chr($value>>32 & 0xFF) .
+                            chr($value>>24 & 0xFF) .
+                            chr($value>>16 & 0xFF) .
+                            chr($value>>8  & 0xFF) .
+                            chr($value     & 0xFF);
+        } else {
+            if ($value > 0x7FFFFFFF) {
+                throw new Zend_Search_Lucene_Exception('Largest supported segment size (for 32-bit mode) is 2Gb');
+            }
+
+            $this->_data .= chr(0) . chr(0) . chr(0) . chr(0) .
+                            chr($value>>24 & 0xFF) .
+                            chr($value>>16 & 0xFF) .
+                            chr($value>>8  & 0xFF) .
+                            chr($value     & 0xFF);
+        }
+
+        $this->_position = strlen($this->_data);
+    }
+
+
+
+    /**
+     * Returns a variable-length integer from the current
+     * position in the file and advances the file pointer.
+     *
+     * @return integer
+     */
+    public function readVInt()
+    {
+        $nextByte = ord($this->_data[$this->_position++]);
+        $val = $nextByte & 0x7F;
+
+        for ($shift=7; ($nextByte & 0x80) != 0; $shift += 7) {
+            $nextByte = ord($this->_data[$this->_position++]);
+            $val |= ($nextByte & 0x7F) << $shift;
+        }
+        return $val;
+    }
+
+    /**
+     * Writes a variable-length integer to the end of file.
+     *
+     * @param integer $value
+     */
+    public function writeVInt($value)
+    {
+        // We do not need to check if file position points to the end of "file".
+        // Only append operation is supported now
+
+        settype($value, 'integer');
+        while ($value > 0x7F) {
+            $this->_data .= chr( ($value & 0x7F)|0x80 );
+            $value >>= 7;
+        }
+        $this->_data .= chr($value);
+
+        $this->_position = strlen($this->_data);
+    }
+
+
+    /**
+     * Reads a string from the current position in the file
+     * and advances the file pointer.
+     *
+     * @return string
+     */
+    public function readString()
+    {
+        $strlen = $this->readVInt();
+        if ($strlen == 0) {
+            return '';
+        } else {
+            /**
+             * This implementation supports only Basic Multilingual Plane
+             * (BMP) characters (from 0x0000 to 0xFFFF) and doesn't support
+             * "supplementary characters" (characters whose code points are
+             * greater than 0xFFFF)
+             * Java 2 represents these characters as a pair of char (16-bit)
+             * values, the first from the high-surrogates range (0xD800-0xDBFF),
+             * the second from the low-surrogates range (0xDC00-0xDFFF). Then
+             * they are encoded as usual UTF-8 characters in six bytes.
+             * Standard UTF-8 representation uses four bytes for supplementary
+             * characters.
+             */
+
+            $str_val = substr($this->_data, $this->_position, $strlen);
+            $this->_position += $strlen;
+
+            for ($count = 0; $count < $strlen; $count++ ) {
+                if (( ord($str_val{$count}) & 0xC0 ) == 0xC0) {
+                    $addBytes = 1;
+                    if (ord($str_val{$count}) & 0x20 ) {
+                        $addBytes++;
+
+                        // Never used. Java2 doesn't encode strings in four bytes
+                        if (ord($str_val{$count}) & 0x10 ) {
+                            $addBytes++;
+                        }
+                    }
+                    $str_val .= substr($this->_data, $this->_position, $addBytes);
+                    $this->_position += $addBytes;
+                    $strlen          += $addBytes;
+
+                    // Check for null character. Java2 encodes null character
+                    // in two bytes.
+                    if (ord($str_val{$count})   == 0xC0 &&
+                        ord($str_val{$count+1}) == 0x80   ) {
+                        $str_val{$count} = 0;
+                        $str_val = substr($str_val,0,$count+1)
+                                 . substr($str_val,$count+2);
+                    }
+                    $count += $addBytes;
+                }
+            }
+
+            return $str_val;
+        }
+    }
+
+    /**
+     * Writes a string to the end of file.
+     *
+     * @param string $str
+     * @throws Zend_Search_Lucene_Exception
+     */
+    public function writeString($str)
+    {
+        /**
+         * This implementation supports only Basic Multilingual Plane
+         * (BMP) characters (from 0x0000 to 0xFFFF) and doesn't support
+         * "supplementary characters" (characters whose code points are
+         * greater than 0xFFFF)
+         * Java 2 represents these characters as a pair of char (16-bit)
+         * values, the first from the high-surrogates range (0xD800-0xDBFF),
+         * the second from the low-surrogates range (0xDC00-0xDFFF). Then
+         * they are encoded as usual UTF-8 characters in six bytes.
+         * Standard UTF-8 representation uses four bytes for supplementary
+         * characters.
+         */
+
+        // We do not need to check if file position points to the end of "file".
+        // Only append operation is supported now
+
+        // convert input to a string before iterating string characters
+        settype($str, 'string');
+
+        $chars = $strlen = strlen($str);
+        $containNullChars = false;
+
+        for ($count = 0; $count < $strlen; $count++ ) {
+            /**
+             * String is already in Java 2 representation.
+             * We should only calculate actual string length and replace
+             * \x00 by \xC0\x80
+             */
+            if ((ord($str{$count}) & 0xC0) == 0xC0) {
+                $addBytes = 1;
+                if (ord($str{$count}) & 0x20 ) {
+                    $addBytes++;
+
+                    // Never used. Java2 doesn't encode strings in four bytes
+                    // and we dont't support non-BMP characters
+                    if (ord($str{$count}) & 0x10 ) {
+                        $addBytes++;
+                    }
+                }
+                $chars -= $addBytes;
+
+                if (ord($str{$count}) == 0 ) {
+                    $containNullChars = true;
+                }
+                $count += $addBytes;
+            }
+        }
+
+        if ($chars < 0) {
+            throw new Zend_Search_Lucene_Exception('Invalid UTF-8 string');
+        }
+
+        $this->writeVInt($chars);
+        if ($containNullChars) {
+            $this->_data .= str_replace($str, "\x00", "\xC0\x80");
+
+        } else {
+            $this->_data .= $str;
+        }
+
+        $this->_position = strlen($this->_data);
+    }
+
+
+    /**
+     * Reads binary data from the current position in the file
+     * and advances the file pointer.
+     *
+     * @return string
+     */
+    public function readBinary()
+    {
+        $length = $this->readVInt();
+        $returnValue = substr($this->_data, $this->_position, $length);
+        $this->_position += $length;
+        return $returnValue;
+    }
+}
+