]> git.mjollnir.org Git - moodle.git/commitdiff
Commiting all changes reported in MDL-14646
authordiml <diml>
Fri, 2 May 2008 11:58:37 +0000 (11:58 +0000)
committerdiml <diml>
Fri, 2 May 2008 11:58:37 +0000 (11:58 +0000)
search/Zend/Search/Exception.php
search/Zend/Search/Lucene.php
search/Zend/Search/Lucene/Analysis/Analyzer.php
search/Zend/Search/Lucene/Analysis/Analyzer/Common/Text.php
search/Zend/Search/Lucene/Analysis/Analyzer/Common/Utf8.php
search/Zend/Search/Lucene/Analysis/TokenFilter/LowerCaseUtf8.php [new file with mode: 0644]
search/Zend/Search/Lucene/Document.php
search/Zend/Search/Lucene/Exception.php
search/Zend/Search/Lucene/Search/Query/Term.php
search/Zend/Search/Lucene/Search/QueryParser.php

index 291cc43ed5032d3ace0223be995f0ef8e92df412..63bdf565b2533c70fe34a9cee7dce799b87d81b4 100644 (file)
@@ -14,7 +14,7 @@
  *
  * @category   Zend
  * @package    Zend_Search
- * @copyright  Copyright (c) 2005-2007 Zend Technologies USA Inc. (http://www.zend.com)
+ * @copyright  Copyright (c) 2005-2008 Zend Technologies USA Inc. (http://www.zend.com)
  * @license    http://framework.zend.com/license/new-bsd     New BSD License
  */
 
 /**
  * Framework base exception
  */
-require_once $CFG->dirroot.'/search/Zend/Exception.php';
+require_once "Zend/Exception.php";
 
 
 /**
  * @category   Zend
  * @package    Zend_Search
- * @copyright  Copyright (c) 2005-2007 Zend Technologies USA Inc. (http://www.zend.com)
+ * @copyright  Copyright (c) 2005-2008 Zend Technologies USA Inc. (http://www.zend.com)
  * @license    http://framework.zend.com/license/new-bsd     New BSD License
  */
 class Zend_Search_Exception extends Zend_Exception
index 1f15c9a0ba12239e247a3ac1cc48284ccb449115..0cd4d8a12c14189cc307239edf63fea06cf98f7e 100644 (file)
  *
  * @category   Zend
  * @package    Zend_Search_Lucene
- * @copyright  Copyright (c) 2005-2007 Zend Technologies USA Inc. (http://www.zend.com)
+ * @copyright  Copyright (c) 2005-2008 Zend Technologies USA Inc. (http://www.zend.com)
  * @license    http://framework.zend.com/license/new-bsd     New BSD License
  */
 
-
 /** Zend_Search_Lucene_Exception */
-require_once $CFG->dirroot.'/search/Zend/Search/Lucene/Exception.php';
+require_once "Zend/Search/Lucene/Exception.php";
 
 /** Zend_Search_Lucene_Document */
-require_once $CFG->dirroot.'/search/Zend/Search/Lucene/Document.php';
+require_once "Zend/Search/Lucene/Document.php";
 
 /** Zend_Search_Lucene_Document_Html */
-require_once $CFG->dirroot.'/search/Zend/Search/Lucene/Document/Html.php';
+require_once "Zend/Search/Lucene/Document/Html.php";
 
-/** Zend_Search_Lucene_Storage_Directory */
-require_once $CFG->dirroot.'/search/Zend/Search/Lucene/Storage/Directory/Filesystem.php';
+/** Zend_Search_Lucene_Storage_Directory_Filesystem */
+require_once "Zend/Search/Lucene/Storage/Directory/Filesystem.php";
 
 /** Zend_Search_Lucene_Storage_File_Memory */
-require_once $CFG->dirroot.'/search/Zend/Search/Lucene/Storage/File/Memory.php';
+require_once "Zend/Search/Lucene/Storage/File/Memory.php";
 
 /** Zend_Search_Lucene_Index_Term */
-require_once $CFG->dirroot.'/search/Zend/Search/Lucene/Index/Term.php';
+require_once "Zend/Search/Lucene/Index/Term.php";
 
 /** Zend_Search_Lucene_Index_TermInfo */
-require_once $CFG->dirroot.'/search/Zend/Search/Lucene/Index/TermInfo.php';
+require_once "Zend/Search/Lucene/Index/TermInfo.php";
 
 /** Zend_Search_Lucene_Index_SegmentInfo */
-require_once $CFG->dirroot.'/search/Zend/Search/Lucene/Index/SegmentInfo.php';
+require_once "Zend/Search/Lucene/Index/SegmentInfo.php";
 
 /** Zend_Search_Lucene_Index_FieldInfo */
-require_once $CFG->dirroot.'/search/Zend/Search/Lucene/Index/FieldInfo.php';
+require_once "Zend/Search/Lucene/Index/FieldInfo.php";
 
 /** Zend_Search_Lucene_Index_Writer */
-require_once $CFG->dirroot.'/search/Zend/Search/Lucene/Index/Writer.php';
+require_once "Zend/Search/Lucene/Index/Writer.php";
 
 /** Zend_Search_Lucene_Search_QueryParser */
-require_once $CFG->dirroot.'/search/Zend/Search/Lucene/Search/QueryParser.php';
+require_once "Zend/Search/Lucene/Search/QueryParser.php";
 
 /** Zend_Search_Lucene_Search_QueryHit */
-require_once $CFG->dirroot.'/search/Zend/Search/Lucene/Search/QueryHit.php';
+require_once "Zend/Search/Lucene/Search/QueryHit.php";
 
 /** Zend_Search_Lucene_Search_Similarity */
-require_once $CFG->dirroot.'/search/Zend/Search/Lucene/Search/Similarity.php';
+require_once "Zend/Search/Lucene/Search/Similarity.php";
 
 /** Zend_Search_Lucene_Index_SegmentInfoPriorityQueue */
-require_once $CFG->dirroot.'/search/Zend/Search/Lucene/Index/SegmentInfoPriorityQueue.php';
+require_once "Zend/Search/Lucene/Index/SegmentInfoPriorityQueue.php";
+
+/** Zend_Search_Lucene_LockManager */
+require_once "Zend/Search/Lucene/LockManager.php";
+
 
 
 /** Zend_Search_Lucene_Interface */
-require_once $CFG->dirroot.'/search/Zend/Search/Lucene/Interface.php';
+require_once "Zend/Search/Lucene/Interface.php";
 
 /** Zend_Search_Lucene_Proxy */
-require_once $CFG->dirroot.'/search/Zend/Search/Lucene/Proxy.php';
+require_once "Zend/Search/Lucene/Proxy.php";
 
 
 /**
  * @category   Zend
  * @package    Zend_Search_Lucene
- * @copyright  Copyright (c) 2005-2007 Zend Technologies USA Inc. (http://www.zend.com)
+ * @copyright  Copyright (c) 2005-2008 Zend Technologies USA Inc. (http://www.zend.com)
  * @license    http://framework.zend.com/license/new-bsd     New BSD License
  */
 class Zend_Search_Lucene implements Zend_Search_Lucene_Interface
@@ -86,6 +89,15 @@ class Zend_Search_Lucene implements Zend_Search_Lucene_Interface
      */
     private static $_defaultSearchField = null;
 
+    /**
+     * Result set limit
+     *
+     * 0 means no limit
+     *
+     * @var integer
+     */
+    private static $_resultSetLimit = 0;
+
     /**
      * File system adapter.
      *
@@ -129,13 +141,6 @@ class Zend_Search_Lucene implements Zend_Search_Lucene_Interface
     private $_hasChanges = false;
 
 
-    /**
-     * Index lock object
-     *
-     * @var Zend_Search_Lucene_Storage_File
-     */
-    private $_lock;
-
     /**
      * Signal, that index is already closed, changes are fixed and resources are cleaned up
      *
@@ -150,7 +155,14 @@ class Zend_Search_Lucene implements Zend_Search_Lucene_Interface
      */
     private $_refCount = 0;
 
+    /**
+     * Current segment generation
+     *
+     * @var integer
+     */
+    private $_generation;
 
+    
     /**
      * Create index
      *
@@ -173,56 +185,101 @@ class Zend_Search_Lucene implements Zend_Search_Lucene_Interface
         return new Zend_Search_Lucene_Proxy(new Zend_Search_Lucene($directory, false));
     }
 
+    /** Generation retrieving counter */
+    const GENERATION_RETRIEVE_COUNT = 10;
+
+    /** Pause between generation retrieving attempts in milliseconds */
+    const GENERATION_RETRIEVE_PAUSE = 50;
+
     /**
-     * Opens the index.
+     * Get current generation number
      *
-     * IndexReader constructor needs Directory as a parameter. It should be
-     * a string with a path to the index folder or a Directory object.
+     * Returns generation number
+     * 0 means pre-2.1 index format
+     * -1 means there are no segments files.
      *
-     * @param mixed $directory
+     * @param Zend_Search_Lucene_Storage_Directory $directory
+     * @return integer
      * @throws Zend_Search_Lucene_Exception
      */
-    public function __construct($directory = null, $create = false)
+    public static function getActualGeneration(Zend_Search_Lucene_Storage_Directory $directory)
     {
-        if ($directory === null) {
-            throw new Zend_Search_Exception('No index directory specified');
-        }
-
-        if ($directory instanceof Zend_Search_Lucene_Storage_Directory_Filesystem) {
-            $this->_directory      = $directory;
-            $this->_closeDirOnExit = false;
-        } else {
-            $this->_directory      = new Zend_Search_Lucene_Storage_Directory_Filesystem($directory);
-            $this->_closeDirOnExit = true;
-        }
+        /**
+         * Zend_Search_Lucene uses segments.gen file to retrieve current generation number
+         *
+         * Apache Lucene index format documentation mentions this method only as a fallback method
+         *
+         * Nevertheless we use it according to the performance considerations
+         *
+         * @todo check if we can use some modification of Apache Lucene generation determination algorithm
+         *       without performance problems
+         */
+
+        try {
+            for ($count = 0; $count < self::GENERATION_RETRIEVE_COUNT; $count++) {
+                // Try to get generation file
+                $genFile = $directory->getFileObject('segments.gen', false);
+
+                $format = $genFile->readInt();
+                if ($format != (int)0xFFFFFFFE) {
+                    throw new Zend_Search_Lucene_Exception('Wrong segments.gen file format');
+                }
 
+                $gen1 = $genFile->readLong();
+                $gen2 = $genFile->readLong();
 
-        // Get a shared lock to the index
-        $this->_lock = $this->_directory->createFile('index.lock');
+                if ($gen1 == $gen2) {
+                    return $gen1;
+                }
 
-        $this->_segmentInfos = array();
+                usleep(self::GENERATION_RETRIEVE_PAUSE * 1000);
+            }
 
-        if ($create) {
-            // Throw an exception if index is under processing now
-            if (!$this->_lock->lock(LOCK_EX, true)) {
-                throw new Zend_Search_Lucene_Exception('Can\'t create index. It\'s under processing now');
+            // All passes are failed
+            throw new Zend_Search_Lucene_Exception('Index is under processing now');
+        } catch (Zend_Search_Lucene_Exception $e) {
+            if (strpos($e->getMessage(), 'is not readable') !== false) {
+                try {
+                    // Try to open old style segments file
+                    $segmentsFile = $directory->getFileObject('segments', false);
+
+                    // It's pre-2.1 index
+                    return 0;
+                } catch (Zend_Search_Lucene_Exception $e) {
+                    if (strpos($e->getMessage(), 'is not readable') !== false) {
+                        return -1;
+                    } else {
+                        throw $e;
+                    }
+                }
+            } else {
+                throw $e;
             }
+        }
 
-            // Writer will create segments file for empty segments list
-            $this->_writer = new Zend_Search_Lucene_Index_Writer($this->_directory, $this->_segmentInfos, true);
+        return -1;
+    }
 
-            if (!$this->_lock->lock(LOCK_SH)) {
-                throw new Zend_Search_Lucene_Exception('Can\'t reduce lock level from Exclusive to Shared');
-            }
-        } else {
-            // Wait if index is under switching from one set of segments to another (Index_Writer::_updateSegments())
-            if (!$this->_lock->lock(LOCK_SH)) {
-                throw new Zend_Search_Lucene_Exception('Can\'t obtain shared index lock');
-            }
-            $this->_writer = null;
+    /**
+     * Get segments file name
+     *
+     * @param integer $generation
+     * @return string
+     */
+    public static function getSegmentFileName($generation)
+    {
+        if ($generation == 0) {
+            return 'segments';
         }
 
+        return 'segments_' . base_convert($generation, 10, 36);
+    }
 
+    /**
+     * Read segments file for pre-2.1 Lucene index format
+     */
+    private function _readPre21SegmentsFile()
+    {
         $segmentsFile = $this->_directory->getFileObject('segments');
 
         $format = $segmentsFile->readInt();
@@ -248,13 +305,156 @@ class Zend_Search_Lucene implements Zend_Search_Lucene_Interface
             $segSize = $segmentsFile->readInt();
             $this->_docCount += $segSize;
 
-            $this->_segmentInfos[] =
-                                new Zend_Search_Lucene_Index_SegmentInfo($segName,
+            $this->_segmentInfos[$segName] =
+                                new Zend_Search_Lucene_Index_SegmentInfo($this->_directory,
+                                                                         $segName,
+                                                                         $segSize);
+        }
+    }
+
+    /**
+     * Read segments file
+     *
+     * @throws Zend_Search_Lucene_Exception
+     */
+    private function _readSegmentsFile()
+    {
+        $segmentsFile = $this->_directory->getFileObject(self::getSegmentFileName($this->_generation));
+
+        $format = $segmentsFile->readInt();
+
+        if ($format != (int)0xFFFFFFFD) {
+            throw new Zend_Search_Lucene_Exception('Wrong segments file format');
+        }
+
+        // read version
+        // $segmentsFile->readLong();
+        $segmentsFile->readInt(); $segmentsFile->readInt();
+
+        // read segment name counter
+        $segmentsFile->readInt();
+
+        $segments = $segmentsFile->readInt();
+
+        $this->_docCount = 0;
+
+        // read segmentInfos
+        for ($count = 0; $count < $segments; $count++) {
+            $segName = $segmentsFile->readString();
+            $segSize = $segmentsFile->readInt();
+
+            // 2.1+ specific properties
+            //$delGen          = $segmentsFile->readLong();
+            $delGenHigh        = $segmentsFile->readInt();
+            $delGenLow         = $segmentsFile->readInt();
+            if ($delGenHigh == (int)0xFFFFFFFF  && $delGenLow == (int)0xFFFFFFFF) {
+                $delGen = -1; // There are no deletes
+            } else {
+                $delGen = ($delGenHigh << 32) | $delGenLow;
+            }
+
+            $hasSingleNormFile = $segmentsFile->readByte();
+            $numField          = $segmentsFile->readInt();
+
+            $normGens = array();
+            if ($numField != (int)0xFFFFFFFF) {
+                for ($count1 = 0; $count1 < $numField; $count1++) {
+                    $normGens[] = $segmentsFile->readLong();
+                }
+
+                throw new Zend_Search_Lucene_Exception('Separate norm files are not supported. Optimize index to use it with Zend_Search_Lucene.');
+            }
+
+            $isCompound        = $segmentsFile->readByte();
+
+
+            $this->_docCount += $segSize;
+
+            $this->_segmentInfos[$segName] =
+                                new Zend_Search_Lucene_Index_SegmentInfo($this->_directory,
+                                                                         $segName,
                                                                          $segSize,
-                                                                         $this->_directory);
+                                                                         $delGen,
+                                                                         $hasSingleNormFile,
+                                                                         $isCompound);
         }
     }
 
+    /**
+     * Opens the index.
+     *
+     * IndexReader constructor needs Directory as a parameter. It should be
+     * a string with a path to the index folder or a Directory object.
+     *
+     * @param mixed $directory
+     * @throws Zend_Search_Lucene_Exception
+     */
+    public function __construct($directory = null, $create = false)
+    {
+        if ($directory === null) {
+            throw new Zend_Search_Exception('No index directory specified');
+        }
+
+        if ($directory instanceof Zend_Search_Lucene_Storage_Directory_Filesystem) {
+            $this->_directory      = $directory;
+            $this->_closeDirOnExit = false;
+        } else {
+            $this->_directory      = new Zend_Search_Lucene_Storage_Directory_Filesystem($directory);
+            $this->_closeDirOnExit = true;
+        }
+
+        $this->_segmentInfos = array();
+
+        // Mark index as "under processing" to prevent other processes from premature index cleaning
+        Zend_Search_Lucene_LockManager::obtainReadLock($this->_directory);
+        
+        // Escalate read lock to prevent current generation index files to be deleted while opening process is not done 
+//        Zend_Search_Lucene_LockManager::escalateReadLock($this->_directory);
+        
+        
+        $this->_generation = self::getActualGeneration($this->_directory);
+        
+        if ($create) {
+               try {
+                       Zend_Search_Lucene_LockManager::obtainWriteLock($this->_directory);
+               } catch (Zend_Search_Lucene_Exception $e) {
+                       if (strpos($e->getMessage(), 'Can\'t obtain exclusive index lock') === false) {
+                               throw $e;
+                       } else {
+                               throw new Zend_Search_Lucene_Exception('Can\'t create index. It\'s under processing now');
+                       }
+               }
+
+            if ($this->_generation == -1) {
+                // Directory doesn't contain existing index, start from 1
+                $this->_generation = 1;
+                $nameCounter = 0;
+            } else {
+                // Directory contains existing index
+                $segmentsFile = $this->_directory->getFileObject(self::getSegmentFileName($this->_generation));
+                $segmentsFile->seek(12); // 12 = 4 (int, file format marker) + 8 (long, index version)
+
+                $nameCounter = $segmentsFile->readInt();
+                $this->_generation++;
+            }
+
+            Zend_Search_Lucene_Index_Writer::createIndex($this->_directory, $this->_generation, $nameCounter);
+
+            Zend_Search_Lucene_LockManager::releaseWriteLock($this->_directory);
+        }
+
+        if ($this->_generation == -1) {
+            throw new Zend_Search_Lucene_Exception('Index doesn\'t exists in the specified directory.');
+        } else if ($this->_generation == 0) {
+            $this->_readPre21SegmentsFile();
+        } else {
+            $this->_readSegmentsFile();
+        }
+        
+        // De-escalate read lock to prevent current generation index files to be deleted while opening process is not done 
+//        Zend_Search_Lucene_LockManager::escalateReadLock($this->_directory);
+    }
+
     /**
      * Close current index and free resources
      */
@@ -267,9 +467,9 @@ class Zend_Search_Lucene implements Zend_Search_Lucene_Interface
 
         $this->commit();
 
-        // Free shared lock
-        $this->_lock->unlock();
-
+        // Release "under processing" flag
+        Zend_Search_Lucene_LockManager::releaseReadLock($this->_directory);
+                
         if ($this->_closeDirOnExit) {
             $this->_directory->close();
         }
@@ -431,6 +631,30 @@ class Zend_Search_Lucene implements Zend_Search_Lucene_Interface
         return self::$_defaultSearchField;
     }
 
+    /**
+     * Set result set limit.
+     *
+     * 0 (default) means no limit
+     *
+     * @param integer $limit
+     */
+    public static function setResultSetLimit($limit)
+    {
+        self::$_resultSetLimit = $limit;
+    }
+
+    /**
+     * Set result set limit.
+     *
+     * 0 means no limit
+     *
+     * @return integer
+     */
+    public static function getResultSetLimit()
+    {
+        return self::$_resultSetLimit;
+    }
+
     /**
      * Retrieve index maxBufferedDocs option
      *
@@ -585,11 +809,15 @@ class Zend_Search_Lucene implements Zend_Search_Lucene_Interface
                     $topScore = $docScore;
                 }
             }
+
+            if (self::$_resultSetLimit != 0  &&  count($hits) >= self::$_resultSetLimit) {
+                break;
+            }
         }
 
         if (count($hits) == 0) {
             // skip sorting, which may cause a error on empty index
-               return array();
+            return array();
         }
 
         if ($topScore > 1) {
@@ -977,6 +1205,8 @@ class Zend_Search_Lucene implements Zend_Search_Lucene_Interface
     {
         $this->getIndexWriter()->addDocument($document);
         $this->_docCount++;
+        
+        $this->_hasChanges = true;
     }
 
 
@@ -1002,14 +1232,12 @@ class Zend_Search_Lucene implements Zend_Search_Lucene_Interface
             foreach ($this->_segmentInfos as $segInfo) {
                 $segInfo->writeChanges();
             }
-
-            $this->_hasChanges = false;
-        }
-
-        if ($this->_writer !== null) {
-            $this->_writer->commit();
-
+            
+            $this->getIndexWriter()->commit();
+            
             $this->_updateDocCount();
+            
+            $this->_hasChanges = false;
         }
     }
 
@@ -1059,9 +1287,7 @@ class Zend_Search_Lucene implements Zend_Search_Lucene_Interface
                 $result[] = $segmentInfo->currentTerm();
             }
 
-            $segmentInfo->nextTerm();
-            // check, if segment dictionary is finished
-            if ($segmentInfo->currentTerm() !== null) {
+            if ($segmentInfo->nextTerm() !== null) {
                 // Put segment back into the priority queue
                 $segmentInfoQueue->put($segmentInfo);
             }
@@ -1071,6 +1297,125 @@ class Zend_Search_Lucene implements Zend_Search_Lucene_Interface
     }
 
 
+    /**
+     * Terms stream queue
+     *
+     * @var Zend_Search_Lucene_Index_SegmentInfoPriorityQueue
+     */
+    private $_termsStreamQueue = null;
+
+    /**
+     * Last Term in a terms stream
+     *
+     * @var Zend_Search_Lucene_Index_Term
+     */
+    private $_lastTerm = null;
+
+    /**
+     * Reset terms stream.
+     */
+    public function resetTermsStream()
+    {
+        $this->_termsStreamQueue = new Zend_Search_Lucene_Index_SegmentInfoPriorityQueue();
+
+        foreach ($this->_segmentInfos as $segmentInfo) {
+            $segmentInfo->reset();
+
+            // Skip "empty" segments
+            if ($segmentInfo->currentTerm() !== null) {
+                $this->_termsStreamQueue->put($segmentInfo);
+            }
+        }
+
+        $this->nextTerm();
+    }
+
+    /**
+     * Skip terms stream up to specified term preffix.
+     *
+     * Prefix contains fully specified field info and portion of searched term
+     *
+     * @param Zend_Search_Lucene_Index_Term $prefix
+     */
+    public function skipTo(Zend_Search_Lucene_Index_Term $prefix)
+    {
+        $segments = array();
+
+        while (($segmentInfo = $this->_termsStreamQueue->pop()) !== null) {
+            $segments[] = $segmentInfo;
+        }
+
+        foreach ($segments as $segmentInfo) {
+            $segmentInfo->skipTo($prefix);
+
+            if ($segmentInfo->currentTerm() !== null) {
+                $this->_termsStreamQueue->put($segmentInfo);
+            }
+        }
+
+        $this->nextTerm();
+    }
+
+    /**
+     * Scans terms dictionary and returns next term
+     *
+     * @return Zend_Search_Lucene_Index_Term|null
+     */
+    public function nextTerm()
+    {
+        while (($segmentInfo = $this->_termsStreamQueue->pop()) !== null) {
+            if ($this->_termsStreamQueue->top() === null ||
+                $this->_termsStreamQueue->top()->currentTerm()->key() !=
+                            $segmentInfo->currentTerm()->key()) {
+                // We got new term
+                $this->_lastTerm = $segmentInfo->currentTerm();
+
+                if ($segmentInfo->nextTerm() !== null) {
+                    // Put segment back into the priority queue
+                    $this->_termsStreamQueue->put($segmentInfo);
+                }
+
+                return $this->_lastTerm;
+            }
+
+            if ($segmentInfo->nextTerm() !== null) {
+                // Put segment back into the priority queue
+                $this->_termsStreamQueue->put($segmentInfo);
+            }
+        }
+
+        // End of stream
+        $this->_lastTerm = null;
+
+        return null;
+    }
+
+    /**
+     * Returns term in current position
+     *
+     * @return Zend_Search_Lucene_Index_Term|null
+     */
+    public function currentTerm()
+    {
+        return $this->_lastTerm;
+    }
+
+    /**
+     * Close terms stream
+     *
+     * Should be used for resources clean up if stream is not read up to the end
+     */
+    public function closeTermsStream()
+    {
+        while (($segmentInfo = $this->_termsStreamQueue->pop()) !== null) {
+            $segmentInfo->closeTermsStream();
+        }
+
+        $this->_termsStreamQueue = null;
+        $this->_lastTerm         = null;
+    }
+
+
     /*************************************************************************
     @todo UNIMPLEMENTED
     *************************************************************************/
index 7a29c763d90e54e3bc7c8cade532bdae8623fe9a..def78ac5906e79da8a02377b553d8a7f5bba893e 100644 (file)
  * @category   Zend
  * @package    Zend_Search_Lucene
  * @subpackage Analysis
- * @copyright  Copyright (c) 2005-2007 Zend Technologies USA Inc. (http://www.zend.com)
+ * @copyright  Copyright (c) 2005-2008 Zend Technologies USA Inc. (http://www.zend.com)
  * @license    http://framework.zend.com/license/new-bsd     New BSD License
  */
 
 
 /** Zend_Search_Lucene_Analysis_Token */
-require_once $CFG->dirroot.'/search/Zend/Search/Lucene/Analysis/Token.php';
+require_once "{$CFG->dirroot}/search/Zend/Search/Lucene/Analysis/Token.php";
 
 /** Zend_Search_Lucene_Analysis_Analyzer_Common_Utf8 */
-require_once $CFG->dirroot.'/search/Zend/Search/Lucene/Analysis/Analyzer/Common/Utf8.php';
+require_once "{$CFG->dirroot}/search/Zend/Search/Lucene/Analysis/Analyzer/Common/Utf8.php";
+
+/** Zend_Search_Lucene_Analysis_Analyzer_Common_Utf8_CaseInsensitive */
+require_once "{$CFG->dirroot}/search/Zend/Search/Lucene/Analysis/Analyzer/Common/Utf8/CaseInsensitive.php";
 
 /** Zend_Search_Lucene_Analysis_Analyzer_Common_Utf8Num */
-require_once $CFG->dirroot.'/search/Zend/Search/Lucene/Analysis/Analyzer/Common/Utf8Num.php';
+require_once "{$CFG->dirroot}/search/Zend/Search/Lucene/Analysis/Analyzer/Common/Utf8Num.php";
+
+/** Zend_Search_Lucene_Analysis_Analyzer_Common_Utf8Num_CaseInsensitive */
+require_once "{$CFG->dirroot}/search/Zend/Search/Lucene/Analysis/Analyzer/Common/Utf8Num/CaseInsensitive.php";
 
 /** Zend_Search_Lucene_Analysis_Analyzer_Common_Text */
-require_once $CFG->dirroot.'/search/Zend/Search/Lucene/Analysis/Analyzer/Common/Text.php';
+require_once "{$CFG->dirroot}/search/Zend/Search/Lucene/Analysis/Analyzer/Common/Text.php";
 
 /** Zend_Search_Lucene_Analysis_Analyzer_Common_Text_CaseInsensitive */
-require_once $CFG->dirroot.'/search/Zend/Search/Lucene/Analysis/Analyzer/Common/Text/CaseInsensitive.php';
+require_once "{$CFG->dirroot}/search/Zend/Search/Lucene/Analysis/Analyzer/Common/Text/CaseInsensitive.php";
 
 /** Zend_Search_Lucene_Analysis_Analyzer_Common_TextNum */
-require_once $CFG->dirroot.'/search/Zend/Search/Lucene/Analysis/Analyzer/Common/TextNum.php';
+require_once "{$CFG->dirroot}/search/Zend/Search/Lucene/Analysis/Analyzer/Common/TextNum.php";
 
 /** Zend_Search_Lucene_Analysis_Analyzer_Common_TextNum_CaseInsensitive */
-require_once $CFG->dirroot.'/search/Zend/Search/Lucene/Analysis/Analyzer/Common/TextNum/CaseInsensitive.php';
+require_once "{$CFG->dirroot}/search/Zend/Search/Lucene/Analysis/Analyzer/Common/TextNum/CaseInsensitive.php";
 
 /** Zend_Search_Lucene_Analysis_TokenFilter_StopWords */
-require_once $CFG->dirroot.'/search/Zend/Search/Lucene/Analysis/TokenFilter/StopWords.php';
+require_once 'Zend/Search/Lucene/Analysis/TokenFilter/StopWords.php';
 
 /** Zend_Search_Lucene_Analysis_TokenFilter_ShortWords */
-require_once $CFG->dirroot.'/search/Zend/Search/Lucene/Analysis/TokenFilter/ShortWords.php';
+require_once "{$CFG->dirroot}/search/Zend/Search/Lucene/Analysis/TokenFilter/ShortWords.php";
 
 
 /**
@@ -61,7 +67,7 @@ require_once $CFG->dirroot.'/search/Zend/Search/Lucene/Analysis/TokenFilter/Shor
  * @category   Zend
  * @package    Zend_Search_Lucene
  * @subpackage Analysis
- * @copyright  Copyright (c) 2005-2007 Zend Technologies USA Inc. (http://www.zend.com)
+ * @copyright  Copyright (c) 2005-2008 Zend Technologies USA Inc. (http://www.zend.com)
  * @license    http://framework.zend.com/license/new-bsd     New BSD License
  */
 
@@ -97,9 +103,10 @@ abstract class Zend_Search_Lucene_Analysis_Analyzer
      * @param string $data
      * @return array
      */
-    public function tokenize($data, $encoding = 'UTF-8')
+    public function tokenize($data, $encoding = '')
     {
         $this->setInput($data, $encoding);
+
         $tokenList = array();
         while (($nextToken = $this->nextToken()) !== null) {
             $tokenList[] = $nextToken;
@@ -160,7 +167,7 @@ abstract class Zend_Search_Lucene_Analysis_Analyzer
     public static function getDefault()
     {
         if (!self::$_defaultImpl instanceof Zend_Search_Lucene_Analysis_Analyzer) {
-            self::$_defaultImpl = new Zend_Search_Lucene_Analysis_Analyzer_Common_Utf8();
+            self::$_defaultImpl = new Zend_Search_Lucene_Analysis_Analyzer_Common_Text_CaseInsensitive();
         }
 
         return self::$_defaultImpl;
index d084ebc4c5f913b8d205b9ea8bb83d4f758a55cd..46bf196e12b8ef2bb59eefdf01e4a19c31ae784d 100644 (file)
  * @category   Zend
  * @package    Zend_Search_Lucene
  * @subpackage Analysis
- * @copyright  Copyright (c) 2005-2007 Zend Technologies USA Inc. (http://www.zend.com)
+ * @copyright  Copyright (c) 2005-2008 Zend Technologies USA Inc. (http://www.zend.com)
  * @license    http://framework.zend.com/license/new-bsd     New BSD License
  */
 
 
 /** Zend_Search_Lucene_Analysis_Analyzer_Common */
-require_once $CFG->dirroot.'/search/Zend/Search/Lucene/Analysis/Analyzer/Common.php';
+require_once 'Zend/Search/Lucene/Analysis/Analyzer/Common.php';
 
 
 /**
  * @category   Zend
  * @package    Zend_Search_Lucene
  * @subpackage Analysis
- * @copyright  Copyright (c) 2005-2007 Zend Technologies USA Inc. (http://www.zend.com)
+ * @copyright  Copyright (c) 2005-2008 Zend Technologies USA Inc. (http://www.zend.com)
  * @license    http://framework.zend.com/license/new-bsd     New BSD License
  */
 
@@ -53,7 +53,9 @@ class Zend_Search_Lucene_Analysis_Analyzer_Common_Text extends Zend_Search_Lucen
         }
 
         // convert input into ascii
-        $this->_input = iconv($this->_encoding, 'ASCII//TRANSLIT', $this->_input);
+        //$this->_input = iconv($this->_encoding, 'ASCII//TRANSLIT', $this->_input);
+        $this->_input = mb_convert_encoding($this->_input, 'ASCII', 'auto');
+        
         $this->_encoding = 'ASCII';
     }
 
@@ -75,7 +77,7 @@ class Zend_Search_Lucene_Analysis_Analyzer_Common_Text extends Zend_Search_Lucen
             if (! preg_match('/[a-zA-Z]+/', $this->_input, $match, PREG_OFFSET_CAPTURE, $this->_position)) {
                 // It covers both cases a) there are no matches (preg_match(...) === 0)
                 // b) error occured (preg_match(...) === FALSE)
-               return null;
+                return null;
             }
 
             $str = $match[0][0];
index 674a3d9e643871a6b263b3e367897dde21219f1c..768e7847c6f18d47bc664e67b4427a6b7720ac9d 100644 (file)
  * @category   Zend
  * @package    Zend_Search_Lucene
  * @subpackage Analysis
- * @copyright  Copyright (c) 2005-2007 Zend Technologies USA Inc. (http://www.zend.com)
+ * @copyright  Copyright (c) 2005-2008 Zend Technologies USA Inc. (http://www.zend.com)
  * @license    http://framework.zend.com/license/new-bsd     New BSD License
  */
 
 
 /** Zend_Search_Lucene_Analysis_Analyzer_Common */
-require_once $CFG->dirroot.'/search/Zend/Search/Lucene/Analysis/Analyzer/Common.php';
+require_once 'Zend/Search/Lucene/Analysis/Analyzer/Common.php';
 
 
 /**
  * @category   Zend
  * @package    Zend_Search_Lucene
  * @subpackage Analysis
- * @copyright  Copyright (c) 2005-2007 Zend Technologies USA Inc. (http://www.zend.com)
+ * @copyright  Copyright (c) 2005-2008 Zend Technologies USA Inc. (http://www.zend.com)
  * @license    http://framework.zend.com/license/new-bsd     New BSD License
  */
 
@@ -47,13 +47,20 @@ class Zend_Search_Lucene_Analysis_Analyzer_Common_Utf8 extends Zend_Search_Lucen
      * @var integer
      */
     private $_bytePosition;
-
+    
     /**
-     * Stream length
+     * Object constructor
      *
-     * @var integer
+     * @throws Zend_Search_Lucene_Exception
      */
-    private $_streamLength;
+    public function __construct()
+    {
+        if (@preg_match('/\pL/u', 'a') != 1) {
+            // PCRE unicode support is turned off
+            require_once 'Zend/Search/Lucene/Exception.php';
+            throw new Zend_Search_Lucene_Exception('Utf8 analyzer needs PCRE unicode support to be enabled.');
+        }
+    }
 
     /**
      * Reset token stream
@@ -66,56 +73,9 @@ class Zend_Search_Lucene_Analysis_Analyzer_Common_Utf8 extends Zend_Search_Lucen
         // convert input into UTF-8
         if (strcasecmp($this->_encoding, 'utf8' ) != 0  &&
             strcasecmp($this->_encoding, 'utf-8') != 0 ) {
-                $this->_input = iconv($this->_encoding, 'UTF-8', $this->_input);
+                $this->_input = @iconv($this->_encoding, 'UTF-8', $this->_input);
                 $this->_encoding = 'UTF-8';
         }
-
-        // Get UTF-8 string length.
-        // It also checks if it's a correct utf-8 string
-        $this->_streamLength = iconv_strlen($this->_input, 'UTF-8');
-    }
-
-    /**
-     * Check, that character is a letter
-     *
-     * @param string $char
-     * @return boolean
-     */
-    private static function _isAlpha($char)
-    {
-        if (strlen($char) > 1) {
-            // It's an UTF-8 character
-            return true;
-        }
-
-        return ctype_alpha($char);
-    }
-
-    /**
-     * Get next UTF-8 char
-     *
-     * @param string $char
-     * @return boolean
-     */
-    private function _nextChar()
-    {
-        $char = $this->_input[$this->_bytePosition++];
-
-        if (( ord($char) & 0xC0 ) == 0xC0) {
-            $addBytes = 1;
-            if (ord($char) & 0x20 ) {
-                $addBytes++;
-                if (ord($char) & 0x10 ) {
-                    $addBytes++;
-                }
-            }
-            $char .= substr($this->_input, $this->_bytePosition, $addBytes);
-            $this->_bytePosition += $addBytes;
-        }
-
-        $this->_position++;
-
-        return $char;
     }
 
     /**
@@ -131,39 +91,35 @@ class Zend_Search_Lucene_Analysis_Analyzer_Common_Utf8 extends Zend_Search_Lucen
             return null;
         }
 
-        while ($this->_position < $this->_streamLength) {
-            // skip white space
-            while ($this->_position < $this->_streamLength &&
-                   !self::_isAlpha($char = $this->_nextChar())) {
-                $char = '';
-            }
-
-            $termStartPosition = $this->_position - 1;
-            $termText = $char;
-
-            // read token
-            while ($this->_position < $this->_streamLength &&
-                   self::_isAlpha($char = $this->_nextChar())) {
-                $termText .= $char;
-            }
-
-            // Empty token, end of stream.
-            if ($termText == '') {
+        do {
+            if (! preg_match('/[\p{L}]+/u', $this->_input, $match, PREG_OFFSET_CAPTURE, $this->_bytePosition)) {
+                // It covers both cases a) there are no matches (preg_match(...) === 0)
+                // b) error occured (preg_match(...) === FALSE)
                 return null;
             }
 
-            $token = new Zend_Search_Lucene_Analysis_Token(
-                                      $termText,
-                                      $termStartPosition,
-                                      $this->_position - 1);
-            $token = $this->normalize($token);
-            if ($token !== null) {
-                return $token;
-            }
-            // Continue if token is skipped
-        }
-
-        return null;
+            // matched string
+            $matchedWord = $match[0][0];
+            
+            // binary position of the matched word in the input stream
+            $binStartPos = $match[0][1];
+            
+            // character position of the matched word in the input stream
+            $startPos = $this->_position + 
+                        iconv_strlen(substr($this->_input,
+                                            $this->_bytePosition,
+                                            $binStartPos - $this->_bytePosition),
+                                     'UTF-8');
+            // character postion of the end of matched word in the input stream
+            $endPos = $startPos + iconv_strlen($matchedWord, 'UTF-8');
+
+            $this->_bytePosition = $binStartPos + strlen($matchedWord);
+            $this->_position     = $endPos;
+
+            $token = $this->normalize(new Zend_Search_Lucene_Analysis_Token($matchedWord, $startPos, $endPos));
+        } while ($token === null); // try again if token is skipped
+
+        return $token;
     }
 }
 
diff --git a/search/Zend/Search/Lucene/Analysis/TokenFilter/LowerCaseUtf8.php b/search/Zend/Search/Lucene/Analysis/TokenFilter/LowerCaseUtf8.php
new file mode 100644 (file)
index 0000000..78cb5e6
--- /dev/null
@@ -0,0 +1,70 @@
+<?php
+/**
+ * Zend Framework
+ *
+ * LICENSE
+ *
+ * This source file is subject to the new BSD license that is bundled
+ * with this package in the file LICENSE.txt.
+ * It is also available through the world-wide-web at this URL:
+ * http://framework.zend.com/license/new-bsd
+ * If you did not receive a copy of the license and are unable to
+ * obtain it through the world-wide-web, please send an email
+ * to license@zend.com so we can send you a copy immediately.
+ *
+ * @category   Zend
+ * @package    Zend_Search_Lucene
+ * @subpackage Analysis
+ * @copyright  Copyright (c) 2005-2008 Zend Technologies USA Inc. (http://www.zend.com)
+ * @license    http://framework.zend.com/license/new-bsd     New BSD License
+ */
+
+
+/** Zend_Search_Lucene_Analysis_TokenFilter */
+require_once "{$CFG->dirroot}/search/Zend/Search/Lucene/Analysis/TokenFilter.php";
+
+
+/**
+ * Lower case Token filter.
+ *
+ * @category   Zend
+ * @package    Zend_Search_Lucene
+ * @subpackage Analysis
+ * @copyright  Copyright (c) 2005-2008 Zend Technologies USA Inc. (http://www.zend.com)
+ * @license    http://framework.zend.com/license/new-bsd     New BSD License
+ */
+
+class Zend_Search_Lucene_Analysis_TokenFilter_LowerCaseUtf8 extends Zend_Search_Lucene_Analysis_TokenFilter
+{
+    /**
+     * Object constructor
+     */
+    public function __construct()
+    {
+        global $CFG;
+        if (!function_exists('mb_strtolower')) {
+            // mbstring extension is disabled
+            require_once "{$CFG->dirroot}/search/Zend/Search/Lucene/Exception.php";
+            throw new Zend_Search_Lucene_Exception('Utf8 compatible lower case filter needs mbstring extension to be enabled.');
+        }
+    }
+    
+    /**
+     * Normalize Token or remove it (if null is returned)
+     *
+     * @param Zend_Search_Lucene_Analysis_Token $srcToken
+     * @return Zend_Search_Lucene_Analysis_Token
+     */
+    public function normalize(Zend_Search_Lucene_Analysis_Token $srcToken)
+    {
+        $newToken = new Zend_Search_Lucene_Analysis_Token(
+                                     mb_strtolower($srcToken->getTermText(), 'UTF-8'),
+                                     $srcToken->getStartOffset(),
+                                     $srcToken->getEndOffset());
+
+        $newToken->setPositionIncrement($srcToken->getPositionIncrement());
+
+        return $newToken;
+    }
+}
+
index 6309719568d44a6be6b1381a734931e84db0256a..de4281efa7c8b5f02d11ef7abd0886f71952ab8f 100644 (file)
  * @category   Zend
  * @package    Zend_Search_Lucene
  * @subpackage Document
- * @copyright  Copyright (c) 2005-2007 Zend Technologies USA Inc. (http://www.zend.com)
+ * @copyright  Copyright (c) 2005-2008 Zend Technologies USA Inc. (http://www.zend.com)
  * @license    http://framework.zend.com/license/new-bsd     New BSD License
  */
 
 
 /** Zend_Search_Lucene_Field */
-require_once $CFG->dirroot.'/search/Zend/Search/Lucene/Field.php';
+require_once "Zend/Search/Lucene/Field.php";
 
 
 /**
@@ -30,7 +30,7 @@ require_once $CFG->dirroot.'/search/Zend/Search/Lucene/Field.php';
  * @category   Zend
  * @package    Zend_Search_Lucene
  * @subpackage Document
- * @copyright  Copyright (c) 2005-2007 Zend Technologies USA Inc. (http://www.zend.com)
+ * @copyright  Copyright (c) 2005-2008 Zend Technologies USA Inc. (http://www.zend.com)
  * @license    http://framework.zend.com/license/new-bsd     New BSD License
  */
 class Zend_Search_Lucene_Document
@@ -54,10 +54,10 @@ class Zend_Search_Lucene_Document
      * @param  $offset
      * @return string
      */
-       public function __get($offset)
-       {
-               return $this->getFieldValue($offset);
-       }
+    public function __get($offset)
+    {
+        return $this->getFieldValue($offset);
+    }
 
 
     /**
@@ -78,7 +78,7 @@ class Zend_Search_Lucene_Document
      */
     public function getFieldNames()
     {
-       return array_keys($this->_fields);
+        return array_keys($this->_fields);
     }
 
 
@@ -105,7 +105,7 @@ class Zend_Search_Lucene_Document
      */
     public function getFieldValue($fieldName)
     {
-       return $this->getField($fieldName)->value;
+        return $this->getField($fieldName)->value;
     }
 
     /**
@@ -116,6 +116,6 @@ class Zend_Search_Lucene_Document
      */
     public function getFieldUtf8Value($fieldName)
     {
-       return $this->getField($fieldName)->getUtf8Value();
+        return $this->getField($fieldName)->getUtf8Value();
     }
 }
index 9d06e89522a61f8235c5c4eef97e9a3d5f996602..d08b30dbb1de990edb3ab841064ed95a00333e58 100644 (file)
@@ -14,7 +14,7 @@
  *
  * @category   Zend
  * @package    Zend_Search_Lucene
- * @copyright  Copyright (c) 2005-2007 Zend Technologies USA Inc. (http://www.zend.com)
+ * @copyright  Copyright (c) 2005-2008 Zend Technologies USA Inc. (http://www.zend.com)
  * @license    http://framework.zend.com/license/new-bsd     New BSD License
  */
 
 /**
  * Framework base exception
  */
-require_once $CFG->dirroot.'/search/Zend/Search/Exception.php';
+require_once "Zend/Search/Exception.php";
 
 
 /**
  * @category   Zend
  * @package    Zend_Search_Lucene
- * @copyright  Copyright (c) 2005-2007 Zend Technologies USA Inc. (http://www.zend.com)
+ * @copyright  Copyright (c) 2005-2008 Zend Technologies USA Inc. (http://www.zend.com)
  * @license    http://framework.zend.com/license/new-bsd     New BSD License
  */
 class Zend_Search_Lucene_Exception extends Zend_Search_Exception
index 0240104e3258dbadc402d67c47a2a6bdf0ccdf82..f9aa071b8ecfd7bed30e857ad259dc1556c11b28 100644 (file)
  * @category   Zend
  * @package    Zend_Search_Lucene
  * @subpackage Search
- * @copyright  Copyright (c) 2005-2007 Zend Technologies USA Inc. (http://www.zend.com)
+ * @copyright  Copyright (c) 2005-2008 Zend Technologies USA Inc. (http://www.zend.com)
  * @license    http://framework.zend.com/license/new-bsd     New BSD License
  */
 
 
 /** Zend_Search_Lucene_Search_Query */
-require_once $CFG->dirroot.'/search/Zend/Search/Lucene/Search/Query.php';
+require_once "{$CFG->dirroot}/search/Zend/Search/Lucene/Search/Query.php";
 
 /** Zend_Search_Lucene_Search_Weight_Term */
-require_once $CFG->dirroot.'/search/Zend/Search/Lucene/Search/Weight/Term.php';
+require_once "{$CFG->dirroot}/search/Zend/Search/Lucene/Search/Weight/Term.php";
 
 
 /**
  * @category   Zend
  * @package    Zend_Search_Lucene
  * @subpackage Search
- * @copyright  Copyright (c) 2005-2007 Zend Technologies USA Inc. (http://www.zend.com)
+ * @copyright  Copyright (c) 2005-2008 Zend Technologies USA Inc. (http://www.zend.com)
  * @license    http://framework.zend.com/license/new-bsd     New BSD License
  */
 class Zend_Search_Lucene_Search_Query_Term extends Zend_Search_Lucene_Search_Query
@@ -65,7 +65,7 @@ class Zend_Search_Lucene_Search_Query_Term extends Zend_Search_Lucene_Search_Que
      * @param Zend_Search_Lucene_Index_Term $term
      * @param boolean $sign
      */
-    public function __construct($term)
+    public function __construct(Zend_Search_Lucene_Index_Term $term)
     {
         $this->_term = $term;
     }
index 1a3d5712de820f444c47178013910078765980af..b1092a5af6560d558702d33c08aaf48aa4d36f7e 100644 (file)
  * @category   Zend
  * @package    Zend_Search_Lucene
  * @subpackage Search
- * @copyright  Copyright (c) 2005-2007 Zend Technologies USA Inc. (http://www.zend.com)
+ * @copyright  Copyright (c) 2005-2008 Zend Technologies USA Inc. (http://www.zend.com)
  * @license    http://framework.zend.com/license/new-bsd     New BSD License
  */
 
 
 /** Zend_Search_Lucene_Index_Term */
-require_once $CFG->dirroot.'/search/Zend/Search/Lucene/Index/Term.php';
+require_once "{$CFG->dirroot}/search/Zend/Search/Lucene/Index/Term.php";
 
 /** Zend_Search_Lucene_Search_Query_Term */
-require_once $CFG->dirroot.'/search/Zend/Search/Lucene/Search/Query/Term.php';
+require_once "{$CFG->dirroot}/search/Zend/Search/Lucene/Search/Query/Term.php";
 
 /** Zend_Search_Lucene_Search_Query_MultiTerm */
-require_once $CFG->dirroot.'/search/Zend/Search/Lucene/Search/Query/MultiTerm.php';
+require_once "{$CFG->dirroot}/search/Zend/Search/Lucene/Search/Query/MultiTerm.php";
 
 /** Zend_Search_Lucene_Search_Query_Boolean */
-require_once $CFG->dirroot.'/search/Zend/Search/Lucene/Search/Query/Boolean.php';
+require_once "{$CFG->dirroot}/search/Zend/Search/Lucene/Search/Query/Boolean.php";
 
 /** Zend_Search_Lucene_Search_Query_Phrase */
-require_once $CFG->dirroot.'/search/Zend/Search/Lucene/Search/Query/Phrase.php';
+require_once "{$CFG->dirroot}/search/Zend/Search/Lucene/Search/Query/Phrase.php";
+
+/** Zend_Search_Lucene_Search_Query_Wildcard */
+require_once "{$CFG->dirroot}/search/Zend/Search/Lucene/Search/Query/Wildcard.php";
+
+/** Zend_Search_Lucene_Search_Query_Range */
+require_once "{$CFG->dirroot}/search/Zend/Search/Lucene/Search/Query/Range.php";
+
+/** Zend_Search_Lucene_Search_Query_Fuzzy */
+require_once "{$CFG->dirroot}/search/Zend/Search/Lucene/Search/Query/Fuzzy.php";
 
 /** Zend_Search_Lucene_Search_Query_Empty */
-require_once $CFG->dirroot.'/search/Zend/Search/Lucene/Search/Query/Empty.php';
+require_once "{$CFG->dirroot}/search/Zend/Search/Lucene/Search/Query/Empty.php";
+
+/** Zend_Search_Lucene_Search_Query_Insignificant */
+require_once "{$CFG->dirroot}/search/Zend/Search/Lucene/Search/Query/Insignificant.php";
 
 
 /** Zend_Search_Lucene_Search_QueryLexer */
-require_once $CFG->dirroot.'/search/Zend/Search/Lucene/Search/QueryLexer.php';
+require_once "{$CFG->dirroot}/search/Zend/Search/Lucene/Search/QueryLexer.php";
 
 /** Zend_Search_Lucene_Search_QueryParserContext */
-require_once $CFG->dirroot.'/search/Zend/Search/Lucene/Search/QueryParserContext.php';
+require_once "{$CFG->dirroot}/search/Zend/Search/Lucene/Search/QueryParserContext.php";
 
 
 /** Zend_Search_Lucene_FSM */
-require_once $CFG->dirroot.'/search/Zend/Search/Lucene/FSM.php';
+require_once "{$CFG->dirroot}/search/Zend/Search/Lucene/FSM.php";
 
 /** Zend_Search_Lucene_Exception */
-require_once $CFG->dirroot.'/search/Zend/Search/Lucene/Exception.php';
+require_once "{$CFG->dirroot}/search/Zend/Search/Lucene/Exception.php";
 
 /** Zend_Search_Lucene_Search_QueryParserException */
-require_once $CFG->dirroot.'/search/Zend/Search/Lucene/Search/QueryParserException.php';
+require_once "{$CFG->dirroot}/search/Zend/Search/Lucene/Search/QueryParserException.php";
 
 
 /**
  * @category   Zend
  * @package    Zend_Search_Lucene
  * @subpackage Search
- * @copyright  Copyright (c) 2005-2007 Zend Technologies USA Inc. (http://www.zend.com)
+ * @copyright  Copyright (c) 2005-2008 Zend Technologies USA Inc. (http://www.zend.com)
  * @license    http://framework.zend.com/license/new-bsd     New BSD License
  */
 class Zend_Search_Lucene_Search_QueryParser extends Zend_Search_Lucene_FSM
@@ -139,6 +151,26 @@ class Zend_Search_Lucene_Search_QueryParser extends Zend_Search_Lucene_FSM
      */
     private $_defaultEncoding = '';
 
+    /**
+     * Defines query parsing mode.
+     * 
+     * If this option is turned on, then query parser suppress query parser exceptions
+     * and constructs multi-term query using all words from a query.
+     * 
+     * That helps to avoid exceptions caused by queries, which don't conform to query language,
+     * but limits possibilities to check, that query entered by user has some inconsistencies.
+     * 
+     * 
+     * Default is true.
+     * 
+     * Use {@link Zend_Search_Lucene::suppressQueryParsingExceptions()},
+     * {@link Zend_Search_Lucene::dontSuppressQueryParsingExceptions()} and
+     * {@link Zend_Search_Lucene::checkQueryParsingExceptionsSuppressMode()} to operate
+     * with this setting.
+     * 
+     * @var boolean
+     */
+    private $_suppressQueryParsingExceptions = true;
 
     /**
      * Boolean operators constants
@@ -256,6 +288,18 @@ class Zend_Search_Lucene_Search_QueryParser extends Zend_Search_Lucene_FSM
         $this->_lexer = new Zend_Search_Lucene_Search_QueryLexer();
     }
 
+    /**
+     * Get query parser instance
+     * 
+     * @return Zend_Search_Lucene_Search_QueryParser
+     */
+    private static function _getInstance()
+    {
+        if (self::$_instance === null) {
+            self::$_instance = new self();
+        }
+        return self::$_instance;
+    }
 
     /**
      * Set query string default encoding
@@ -264,11 +308,7 @@ class Zend_Search_Lucene_Search_QueryParser extends Zend_Search_Lucene_FSM
      */
     public static function setDefaultEncoding($encoding)
     {
-        if (self::$_instance === null) {
-            self::$_instance = new Zend_Search_Lucene_Search_QueryParser();
-        }
-
-        self::$_instance->_defaultEncoding = $encoding;
+        self::_getInstance()->_defaultEncoding = $encoding;
     }
 
     /**
@@ -278,11 +318,7 @@ class Zend_Search_Lucene_Search_QueryParser extends Zend_Search_Lucene_FSM
      */
     public static function getDefaultEncoding()
     {
-        if (self::$_instance === null) {
-            self::$_instance = new Zend_Search_Lucene_Search_QueryParser();
-        }
-
-        return self::$_instance->_defaultEncoding;
+       return self::_getInstance()->_defaultEncoding;
     }
 
     /**
@@ -292,11 +328,7 @@ class Zend_Search_Lucene_Search_QueryParser extends Zend_Search_Lucene_FSM
      */
     public static function setDefaultOperator($operator)
     {
-        if (self::$_instance === null) {
-            self::$_instance = new Zend_Search_Lucene_Search_QueryParser();
-        }
-
-        self::$_instance->_defaultOperator = $operator;
+        self::_getInstance()->_defaultOperator = $operator;
     }
 
     /**
@@ -306,13 +338,34 @@ class Zend_Search_Lucene_Search_QueryParser extends Zend_Search_Lucene_FSM
      */
     public static function getDefaultOperator()
     {
-        if (self::$_instance === null) {
-            self::$_instance = new Zend_Search_Lucene_Search_QueryParser();
-        }
+        return self::_getInstance()->_defaultOperator;
+    }
 
-        return self::$_instance->_defaultOperator;
+    /**
+     * Turn on 'suppress query parser exceptions' mode.
+     */
+    public static function suppressQueryParsingExceptions()
+    {
+        self::_getInstance()->_suppressQueryParsingExceptions = true;
+    }
+    /**
+     * Turn off 'suppress query parser exceptions' mode.
+     */
+    public static function dontSuppressQueryParsingExceptions()
+    {
+        self::_getInstance()->_suppressQueryParsingExceptions = false;
     }
+    /**
+     * Check 'suppress query parser exceptions' mode.
+     * @return boolean
+     */
+    public static function queryParsingExceptionsSuppressed()
+    {
+        return self::_getInstance()->_suppressQueryParsingExceptions;
+    }
+    
 
+    
     /**
      * Parses a query string
      *
@@ -323,42 +376,62 @@ class Zend_Search_Lucene_Search_QueryParser extends Zend_Search_Lucene_FSM
      */
     public static function parse($strQuery, $encoding = null)
     {
-        if (self::$_instance === null) {
-            self::$_instance = new Zend_Search_Lucene_Search_QueryParser();
-        }
-
-        self::$_instance->_encoding     = ($encoding !== null) ? $encoding : self::$_instance->_defaultEncoding;
-        self::$_instance->_lastToken    = null;
-        self::$_instance->_context      = new Zend_Search_Lucene_Search_QueryParserContext(self::$_instance->_encoding);
-        self::$_instance->_contextStack = array();
-        self::$_instance->_tokens       = self::$_instance->_lexer->tokenize($strQuery, self::$_instance->_encoding);
-
-        // Empty query
-        if (count(self::$_instance->_tokens) == 0) {
-            return new Zend_Search_Lucene_Search_Query_Empty();
-        }
-
-
-        foreach (self::$_instance->_tokens as $token) {
-            try {
-                self::$_instance->_currentToken = $token;
-                self::$_instance->process($token->type);
-
-                self::$_instance->_lastToken = $token;
-            } catch (Exception $e) {
-                if (strpos($e->getMessage(), 'There is no any rule for') !== false) {
-                    throw new Zend_Search_Lucene_Search_QueryParserException( 'Syntax error at char position ' . $token->position . '.' );
+        self::_getInstance();
+        
+        // Reset FSM if previous parse operation didn't return it into a correct state 
+        self::$_instance->reset();
+        
+        try {
+            self::$_instance->_encoding     = ($encoding !== null) ? $encoding : self::$_instance->_defaultEncoding;
+            self::$_instance->_lastToken    = null;
+            self::$_instance->_context      = new Zend_Search_Lucene_Search_QueryParserContext(self::$_instance->_encoding);
+            self::$_instance->_contextStack = array();
+            self::$_instance->_tokens       = self::$_instance->_lexer->tokenize($strQuery, self::$_instance->_encoding);
+    
+            // Empty query
+            if (count(self::$_instance->_tokens) == 0) {
+                return new Zend_Search_Lucene_Search_Query_Insignificant();
+            }
+    
+    
+            foreach (self::$_instance->_tokens as $token) {
+                try {
+                    self::$_instance->_currentToken = $token;
+                    self::$_instance->process($token->type);
+    
+                    self::$_instance->_lastToken = $token;
+                } catch (Exception $e) {
+                    if (strpos($e->getMessage(), 'There is no any rule for') !== false) {
+                        throw new Zend_Search_Lucene_Search_QueryParserException( 'Syntax error at char position ' . $token->position . '.' );
+                    }
+    
+                    throw $e;
                 }
-
+            }
+    
+            if (count(self::$_instance->_contextStack) != 0) {
+                throw new Zend_Search_Lucene_Search_QueryParserException('Syntax Error: mismatched parentheses, every opening must have closing.' );
+            }
+    
+            return self::$_instance->_context->getQuery();
+        } catch (Zend_Search_Lucene_Search_QueryParserException $e) {
+            if (self::$_instance->_suppressQueryParsingExceptions) {
+                $queryTokens = Zend_Search_Lucene_Analysis_Analyzer::getDefault()->tokenize($strQuery, self::$_instance->_encoding);
+
+                $query = new Zend_Search_Lucene_Search_Query_MultiTerm();
+                $termsSign = (self::$_instance->_defaultOperator == self::B_AND) ? true /* required term */ :
+                                                                                   null /* optional term */;
+                                                                                   
+                foreach ($queryTokens as $token) {
+                    $query->addTerm(new Zend_Search_Lucene_Index_Term($token->getTermText()), $termsSign);
+                }
+                
+                
+                return $query;
+            } else {
                 throw $e;
             }
         }
-
-        if (count(self::$_instance->_contextStack) != 0) {
-            throw new Zend_Search_Lucene_Search_QueryParserException('Syntax Error: mismatched parentheses, every opening must have closing.' );
-        }
-
-        return self::$_instance->_context->getQuery();
     }
 
 
@@ -433,7 +506,7 @@ class Zend_Search_Lucene_Search_QueryParser extends Zend_Search_Lucene_FSM
 
             default:
                 // It's not a user input exception
-                throw new Zend_Search_Lucene_Exception('Lexeme modifier parameter must follow lexeme modifier. Char position .' );
+                throw new Zend_Search_Lucene_Exception('Lexeme modifier parameter must follow lexeme modifier. Char position 0.' );
         }
     }
 
@@ -485,13 +558,31 @@ class Zend_Search_Lucene_Search_QueryParser extends Zend_Search_Lucene_FSM
      */
     public function openedRQLastTerm()
     {
-        throw new Zend_Search_Lucene_Search_QueryParserException('Range queries are not supported yet.');
+        $tokens = Zend_Search_Lucene_Analysis_Analyzer::getDefault()->tokenize($this->_rqFirstTerm, $this->_encoding);
+        if (count($tokens) > 1) {
+            throw new Zend_Search_Lucene_Search_QueryParserException('Range query boundary terms must be non-multiple word terms');
+        } else if (count($tokens) == 1) {
+            $from = new Zend_Search_Lucene_Index_Term(reset($tokens)->getTermText(), $this->_context->getField());
+        } else {
+            $from = null;
+        }
 
-        // $firstTerm = new Zend_Search_Lucene_Index_Term($this->_rqFirstTerm,        $this->_context->getField());
-        // $lastTerm  = new Zend_Search_Lucene_Index_Term($this->_currentToken->text, $this->_context->getField());
+        $tokens = Zend_Search_Lucene_Analysis_Analyzer::getDefault()->tokenize($this->_currentToken->text, $this->_encoding);
+        if (count($tokens) > 1) {
+            throw new Zend_Search_Lucene_Search_QueryParserException('Range query boundary terms must be non-multiple word terms');
+        } else if (count($tokens) == 1) {
+            $to = new Zend_Search_Lucene_Index_Term(reset($tokens)->getTermText(), $this->_context->getField());
+        } else {
+            $to = null;
+        }
 
-        // $query = new Zend_Search_Lucene_Search_Query_Range($firstTerm, $lastTerm, false);
-        // $this->_context->addentry($query);
+        if ($from === null  &&  $to === null) {
+            throw new Zend_Search_Lucene_Search_QueryParserException('At least one range query boundary term must be non-empty term');
+        }
+
+        $rangeQuery = new Zend_Search_Lucene_Search_Query_Range($from, $to, false);
+        $entry      = new Zend_Search_Lucene_Search_QueryEntry_Subquery($rangeQuery);
+        $this->_context->addEntry($entry);
     }
 
     /**
@@ -509,13 +600,31 @@ class Zend_Search_Lucene_Search_QueryParser extends Zend_Search_Lucene_FSM
      */
     public function closedRQLastTerm()
     {
-        throw new Zend_Search_Lucene_Search_QueryParserException('Range queries are not supported yet.');
+        $tokens = Zend_Search_Lucene_Analysis_Analyzer::getDefault()->tokenize($this->_rqFirstTerm, $this->_encoding);
+        if (count($tokens) > 1) {
+            throw new Zend_Search_Lucene_Search_QueryParserException('Range query boundary terms must be non-multiple word terms');
+        } else if (count($tokens) == 1) {
+            $from = new Zend_Search_Lucene_Index_Term(reset($tokens)->getTermText(), $this->_context->getField());
+        } else {
+            $from = null;
+        }
 
-        // $firstTerm = new Zend_Search_Lucene_Index_Term($this->_rqFirstTerm,        $this->_context->getField());
-        // $lastTerm  = new Zend_Search_Lucene_Index_Term($this->_currentToken->text, $this->_context->getField());
+        $tokens = Zend_Search_Lucene_Analysis_Analyzer::getDefault()->tokenize($this->_currentToken->text, $this->_encoding);
+        if (count($tokens) > 1) {
+            throw new Zend_Search_Lucene_Search_QueryParserException('Range query boundary terms must be non-multiple word terms');
+        } else if (count($tokens) == 1) {
+            $to = new Zend_Search_Lucene_Index_Term(reset($tokens)->getTermText(), $this->_context->getField());
+        } else {
+            $to = null;
+        }
 
-        // $query = new Zend_Search_Lucene_Search_Query_Range($firstTerm, $lastTerm, true);
-        // $this->_context->addentry($query);
+        if ($from === null  &&  $to === null) {
+            throw new Zend_Search_Lucene_Search_QueryParserException('At least one range query boundary term must be non-empty term');
+        }
+
+        $rangeQuery = new Zend_Search_Lucene_Search_Query_Range($from, $to, true);
+        $entry      = new Zend_Search_Lucene_Search_QueryEntry_Subquery($rangeQuery);
+        $this->_context->addEntry($entry);
     }
 }