*
* @category Zend
* @package Zend_Search
- * @copyright Copyright (c) 2005-2007 Zend Technologies USA Inc. (http://www.zend.com)
+ * @copyright Copyright (c) 2005-2008 Zend Technologies USA Inc. (http://www.zend.com)
* @license http://framework.zend.com/license/new-bsd New BSD License
*/
/**
* Framework base exception
*/
-require_once $CFG->dirroot.'/search/Zend/Exception.php';
+require_once "Zend/Exception.php";
/**
* @category Zend
* @package Zend_Search
- * @copyright Copyright (c) 2005-2007 Zend Technologies USA Inc. (http://www.zend.com)
+ * @copyright Copyright (c) 2005-2008 Zend Technologies USA Inc. (http://www.zend.com)
* @license http://framework.zend.com/license/new-bsd New BSD License
*/
class Zend_Search_Exception extends Zend_Exception
*
* @category Zend
* @package Zend_Search_Lucene
- * @copyright Copyright (c) 2005-2007 Zend Technologies USA Inc. (http://www.zend.com)
+ * @copyright Copyright (c) 2005-2008 Zend Technologies USA Inc. (http://www.zend.com)
* @license http://framework.zend.com/license/new-bsd New BSD License
*/
-
/** Zend_Search_Lucene_Exception */
-require_once $CFG->dirroot.'/search/Zend/Search/Lucene/Exception.php';
+require_once "Zend/Search/Lucene/Exception.php";
/** Zend_Search_Lucene_Document */
-require_once $CFG->dirroot.'/search/Zend/Search/Lucene/Document.php';
+require_once "Zend/Search/Lucene/Document.php";
/** Zend_Search_Lucene_Document_Html */
-require_once $CFG->dirroot.'/search/Zend/Search/Lucene/Document/Html.php';
+require_once "Zend/Search/Lucene/Document/Html.php";
-/** Zend_Search_Lucene_Storage_Directory */
-require_once $CFG->dirroot.'/search/Zend/Search/Lucene/Storage/Directory/Filesystem.php';
+/** Zend_Search_Lucene_Storage_Directory_Filesystem */
+require_once "Zend/Search/Lucene/Storage/Directory/Filesystem.php";
/** Zend_Search_Lucene_Storage_File_Memory */
-require_once $CFG->dirroot.'/search/Zend/Search/Lucene/Storage/File/Memory.php';
+require_once "Zend/Search/Lucene/Storage/File/Memory.php";
/** Zend_Search_Lucene_Index_Term */
-require_once $CFG->dirroot.'/search/Zend/Search/Lucene/Index/Term.php';
+require_once "Zend/Search/Lucene/Index/Term.php";
/** Zend_Search_Lucene_Index_TermInfo */
-require_once $CFG->dirroot.'/search/Zend/Search/Lucene/Index/TermInfo.php';
+require_once "Zend/Search/Lucene/Index/TermInfo.php";
/** Zend_Search_Lucene_Index_SegmentInfo */
-require_once $CFG->dirroot.'/search/Zend/Search/Lucene/Index/SegmentInfo.php';
+require_once "Zend/Search/Lucene/Index/SegmentInfo.php";
/** Zend_Search_Lucene_Index_FieldInfo */
-require_once $CFG->dirroot.'/search/Zend/Search/Lucene/Index/FieldInfo.php';
+require_once "Zend/Search/Lucene/Index/FieldInfo.php";
/** Zend_Search_Lucene_Index_Writer */
-require_once $CFG->dirroot.'/search/Zend/Search/Lucene/Index/Writer.php';
+require_once "Zend/Search/Lucene/Index/Writer.php";
/** Zend_Search_Lucene_Search_QueryParser */
-require_once $CFG->dirroot.'/search/Zend/Search/Lucene/Search/QueryParser.php';
+require_once "Zend/Search/Lucene/Search/QueryParser.php";
/** Zend_Search_Lucene_Search_QueryHit */
-require_once $CFG->dirroot.'/search/Zend/Search/Lucene/Search/QueryHit.php';
+require_once "Zend/Search/Lucene/Search/QueryHit.php";
/** Zend_Search_Lucene_Search_Similarity */
-require_once $CFG->dirroot.'/search/Zend/Search/Lucene/Search/Similarity.php';
+require_once "Zend/Search/Lucene/Search/Similarity.php";
/** Zend_Search_Lucene_Index_SegmentInfoPriorityQueue */
-require_once $CFG->dirroot.'/search/Zend/Search/Lucene/Index/SegmentInfoPriorityQueue.php';
+require_once "Zend/Search/Lucene/Index/SegmentInfoPriorityQueue.php";
+
+/** Zend_Search_Lucene_LockManager */
+require_once "Zend/Search/Lucene/LockManager.php";
+
/** Zend_Search_Lucene_Interface */
-require_once $CFG->dirroot.'/search/Zend/Search/Lucene/Interface.php';
+require_once "Zend/Search/Lucene/Interface.php";
/** Zend_Search_Lucene_Proxy */
-require_once $CFG->dirroot.'/search/Zend/Search/Lucene/Proxy.php';
+require_once "Zend/Search/Lucene/Proxy.php";
/**
* @category Zend
* @package Zend_Search_Lucene
- * @copyright Copyright (c) 2005-2007 Zend Technologies USA Inc. (http://www.zend.com)
+ * @copyright Copyright (c) 2005-2008 Zend Technologies USA Inc. (http://www.zend.com)
* @license http://framework.zend.com/license/new-bsd New BSD License
*/
class Zend_Search_Lucene implements Zend_Search_Lucene_Interface
*/
private static $_defaultSearchField = null;
+ /**
+ * Result set limit
+ *
+ * 0 means no limit
+ *
+ * @var integer
+ */
+ private static $_resultSetLimit = 0;
+
/**
* File system adapter.
*
private $_hasChanges = false;
- /**
- * Index lock object
- *
- * @var Zend_Search_Lucene_Storage_File
- */
- private $_lock;
-
/**
* Signal, that index is already closed, changes are fixed and resources are cleaned up
*
*/
private $_refCount = 0;
+ /**
+ * Current segment generation
+ *
+ * @var integer
+ */
+ private $_generation;
+
/**
* Create index
*
return new Zend_Search_Lucene_Proxy(new Zend_Search_Lucene($directory, false));
}
+ /** Generation retrieving counter */
+ const GENERATION_RETRIEVE_COUNT = 10;
+
+ /** Pause between generation retrieving attempts in milliseconds */
+ const GENERATION_RETRIEVE_PAUSE = 50;
+
/**
- * Opens the index.
+ * Get current generation number
*
- * IndexReader constructor needs Directory as a parameter. It should be
- * a string with a path to the index folder or a Directory object.
+ * Returns generation number
+ * 0 means pre-2.1 index format
+ * -1 means there are no segments files.
*
- * @param mixed $directory
+ * @param Zend_Search_Lucene_Storage_Directory $directory
+ * @return integer
* @throws Zend_Search_Lucene_Exception
*/
- public function __construct($directory = null, $create = false)
+ public static function getActualGeneration(Zend_Search_Lucene_Storage_Directory $directory)
{
- if ($directory === null) {
- throw new Zend_Search_Exception('No index directory specified');
- }
-
- if ($directory instanceof Zend_Search_Lucene_Storage_Directory_Filesystem) {
- $this->_directory = $directory;
- $this->_closeDirOnExit = false;
- } else {
- $this->_directory = new Zend_Search_Lucene_Storage_Directory_Filesystem($directory);
- $this->_closeDirOnExit = true;
- }
+ /**
+ * Zend_Search_Lucene uses segments.gen file to retrieve current generation number
+ *
+ * Apache Lucene index format documentation mentions this method only as a fallback method
+ *
+ * Nevertheless we use it according to the performance considerations
+ *
+ * @todo check if we can use some modification of Apache Lucene generation determination algorithm
+ * without performance problems
+ */
+
+ try {
+ for ($count = 0; $count < self::GENERATION_RETRIEVE_COUNT; $count++) {
+ // Try to get generation file
+ $genFile = $directory->getFileObject('segments.gen', false);
+
+ $format = $genFile->readInt();
+ if ($format != (int)0xFFFFFFFE) {
+ throw new Zend_Search_Lucene_Exception('Wrong segments.gen file format');
+ }
+ $gen1 = $genFile->readLong();
+ $gen2 = $genFile->readLong();
- // Get a shared lock to the index
- $this->_lock = $this->_directory->createFile('index.lock');
+ if ($gen1 == $gen2) {
+ return $gen1;
+ }
- $this->_segmentInfos = array();
+ usleep(self::GENERATION_RETRIEVE_PAUSE * 1000);
+ }
- if ($create) {
- // Throw an exception if index is under processing now
- if (!$this->_lock->lock(LOCK_EX, true)) {
- throw new Zend_Search_Lucene_Exception('Can\'t create index. It\'s under processing now');
+ // All passes are failed
+ throw new Zend_Search_Lucene_Exception('Index is under processing now');
+ } catch (Zend_Search_Lucene_Exception $e) {
+ if (strpos($e->getMessage(), 'is not readable') !== false) {
+ try {
+ // Try to open old style segments file
+ $segmentsFile = $directory->getFileObject('segments', false);
+
+ // It's pre-2.1 index
+ return 0;
+ } catch (Zend_Search_Lucene_Exception $e) {
+ if (strpos($e->getMessage(), 'is not readable') !== false) {
+ return -1;
+ } else {
+ throw $e;
+ }
+ }
+ } else {
+ throw $e;
}
+ }
- // Writer will create segments file for empty segments list
- $this->_writer = new Zend_Search_Lucene_Index_Writer($this->_directory, $this->_segmentInfos, true);
+ return -1;
+ }
- if (!$this->_lock->lock(LOCK_SH)) {
- throw new Zend_Search_Lucene_Exception('Can\'t reduce lock level from Exclusive to Shared');
- }
- } else {
- // Wait if index is under switching from one set of segments to another (Index_Writer::_updateSegments())
- if (!$this->_lock->lock(LOCK_SH)) {
- throw new Zend_Search_Lucene_Exception('Can\'t obtain shared index lock');
- }
- $this->_writer = null;
+ /**
+ * Get segments file name
+ *
+ * @param integer $generation
+ * @return string
+ */
+ public static function getSegmentFileName($generation)
+ {
+ if ($generation == 0) {
+ return 'segments';
}
+ return 'segments_' . base_convert($generation, 10, 36);
+ }
+ /**
+ * Read segments file for pre-2.1 Lucene index format
+ */
+ private function _readPre21SegmentsFile()
+ {
$segmentsFile = $this->_directory->getFileObject('segments');
$format = $segmentsFile->readInt();
$segSize = $segmentsFile->readInt();
$this->_docCount += $segSize;
- $this->_segmentInfos[] =
- new Zend_Search_Lucene_Index_SegmentInfo($segName,
+ $this->_segmentInfos[$segName] =
+ new Zend_Search_Lucene_Index_SegmentInfo($this->_directory,
+ $segName,
+ $segSize);
+ }
+ }
+
+ /**
+ * Read segments file
+ *
+ * @throws Zend_Search_Lucene_Exception
+ */
+ private function _readSegmentsFile()
+ {
+ $segmentsFile = $this->_directory->getFileObject(self::getSegmentFileName($this->_generation));
+
+ $format = $segmentsFile->readInt();
+
+ if ($format != (int)0xFFFFFFFD) {
+ throw new Zend_Search_Lucene_Exception('Wrong segments file format');
+ }
+
+ // read version
+ // $segmentsFile->readLong();
+ $segmentsFile->readInt(); $segmentsFile->readInt();
+
+ // read segment name counter
+ $segmentsFile->readInt();
+
+ $segments = $segmentsFile->readInt();
+
+ $this->_docCount = 0;
+
+ // read segmentInfos
+ for ($count = 0; $count < $segments; $count++) {
+ $segName = $segmentsFile->readString();
+ $segSize = $segmentsFile->readInt();
+
+ // 2.1+ specific properties
+ //$delGen = $segmentsFile->readLong();
+ $delGenHigh = $segmentsFile->readInt();
+ $delGenLow = $segmentsFile->readInt();
+ if ($delGenHigh == (int)0xFFFFFFFF && $delGenLow == (int)0xFFFFFFFF) {
+ $delGen = -1; // There are no deletes
+ } else {
+ $delGen = ($delGenHigh << 32) | $delGenLow;
+ }
+
+ $hasSingleNormFile = $segmentsFile->readByte();
+ $numField = $segmentsFile->readInt();
+
+ $normGens = array();
+ if ($numField != (int)0xFFFFFFFF) {
+ for ($count1 = 0; $count1 < $numField; $count1++) {
+ $normGens[] = $segmentsFile->readLong();
+ }
+
+ throw new Zend_Search_Lucene_Exception('Separate norm files are not supported. Optimize index to use it with Zend_Search_Lucene.');
+ }
+
+ $isCompound = $segmentsFile->readByte();
+
+
+ $this->_docCount += $segSize;
+
+ $this->_segmentInfos[$segName] =
+ new Zend_Search_Lucene_Index_SegmentInfo($this->_directory,
+ $segName,
$segSize,
- $this->_directory);
+ $delGen,
+ $hasSingleNormFile,
+ $isCompound);
}
}
+ /**
+ * Opens the index.
+ *
+ * IndexReader constructor needs Directory as a parameter. It should be
+ * a string with a path to the index folder or a Directory object.
+ *
+ * @param mixed $directory
+ * @throws Zend_Search_Lucene_Exception
+ */
+ public function __construct($directory = null, $create = false)
+ {
+ if ($directory === null) {
+ throw new Zend_Search_Exception('No index directory specified');
+ }
+
+ if ($directory instanceof Zend_Search_Lucene_Storage_Directory_Filesystem) {
+ $this->_directory = $directory;
+ $this->_closeDirOnExit = false;
+ } else {
+ $this->_directory = new Zend_Search_Lucene_Storage_Directory_Filesystem($directory);
+ $this->_closeDirOnExit = true;
+ }
+
+ $this->_segmentInfos = array();
+
+ // Mark index as "under processing" to prevent other processes from premature index cleaning
+ Zend_Search_Lucene_LockManager::obtainReadLock($this->_directory);
+
+ // Escalate read lock to prevent current generation index files to be deleted while opening process is not done
+// Zend_Search_Lucene_LockManager::escalateReadLock($this->_directory);
+
+
+ $this->_generation = self::getActualGeneration($this->_directory);
+
+ if ($create) {
+ try {
+ Zend_Search_Lucene_LockManager::obtainWriteLock($this->_directory);
+ } catch (Zend_Search_Lucene_Exception $e) {
+ if (strpos($e->getMessage(), 'Can\'t obtain exclusive index lock') === false) {
+ throw $e;
+ } else {
+ throw new Zend_Search_Lucene_Exception('Can\'t create index. It\'s under processing now');
+ }
+ }
+
+ if ($this->_generation == -1) {
+ // Directory doesn't contain existing index, start from 1
+ $this->_generation = 1;
+ $nameCounter = 0;
+ } else {
+ // Directory contains existing index
+ $segmentsFile = $this->_directory->getFileObject(self::getSegmentFileName($this->_generation));
+ $segmentsFile->seek(12); // 12 = 4 (int, file format marker) + 8 (long, index version)
+
+ $nameCounter = $segmentsFile->readInt();
+ $this->_generation++;
+ }
+
+ Zend_Search_Lucene_Index_Writer::createIndex($this->_directory, $this->_generation, $nameCounter);
+
+ Zend_Search_Lucene_LockManager::releaseWriteLock($this->_directory);
+ }
+
+ if ($this->_generation == -1) {
+ throw new Zend_Search_Lucene_Exception('Index doesn\'t exists in the specified directory.');
+ } else if ($this->_generation == 0) {
+ $this->_readPre21SegmentsFile();
+ } else {
+ $this->_readSegmentsFile();
+ }
+
+ // De-escalate read lock to prevent current generation index files to be deleted while opening process is not done
+// Zend_Search_Lucene_LockManager::escalateReadLock($this->_directory);
+ }
+
/**
* Close current index and free resources
*/
$this->commit();
- // Free shared lock
- $this->_lock->unlock();
-
+ // Release "under processing" flag
+ Zend_Search_Lucene_LockManager::releaseReadLock($this->_directory);
+
if ($this->_closeDirOnExit) {
$this->_directory->close();
}
return self::$_defaultSearchField;
}
+ /**
+ * Set result set limit.
+ *
+ * 0 (default) means no limit
+ *
+ * @param integer $limit
+ */
+ public static function setResultSetLimit($limit)
+ {
+ self::$_resultSetLimit = $limit;
+ }
+
+ /**
+ * Set result set limit.
+ *
+ * 0 means no limit
+ *
+ * @return integer
+ */
+ public static function getResultSetLimit()
+ {
+ return self::$_resultSetLimit;
+ }
+
/**
* Retrieve index maxBufferedDocs option
*
$topScore = $docScore;
}
}
+
+ if (self::$_resultSetLimit != 0 && count($hits) >= self::$_resultSetLimit) {
+ break;
+ }
}
if (count($hits) == 0) {
// skip sorting, which may cause a error on empty index
- return array();
+ return array();
}
if ($topScore > 1) {
{
$this->getIndexWriter()->addDocument($document);
$this->_docCount++;
+
+ $this->_hasChanges = true;
}
foreach ($this->_segmentInfos as $segInfo) {
$segInfo->writeChanges();
}
-
- $this->_hasChanges = false;
- }
-
- if ($this->_writer !== null) {
- $this->_writer->commit();
-
+
+ $this->getIndexWriter()->commit();
+
$this->_updateDocCount();
+
+ $this->_hasChanges = false;
}
}
$result[] = $segmentInfo->currentTerm();
}
- $segmentInfo->nextTerm();
- // check, if segment dictionary is finished
- if ($segmentInfo->currentTerm() !== null) {
+ if ($segmentInfo->nextTerm() !== null) {
// Put segment back into the priority queue
$segmentInfoQueue->put($segmentInfo);
}
}
+ /**
+ * Terms stream queue
+ *
+ * @var Zend_Search_Lucene_Index_SegmentInfoPriorityQueue
+ */
+ private $_termsStreamQueue = null;
+
+ /**
+ * Last Term in a terms stream
+ *
+ * @var Zend_Search_Lucene_Index_Term
+ */
+ private $_lastTerm = null;
+
+ /**
+ * Reset terms stream.
+ */
+ public function resetTermsStream()
+ {
+ $this->_termsStreamQueue = new Zend_Search_Lucene_Index_SegmentInfoPriorityQueue();
+
+ foreach ($this->_segmentInfos as $segmentInfo) {
+ $segmentInfo->reset();
+
+ // Skip "empty" segments
+ if ($segmentInfo->currentTerm() !== null) {
+ $this->_termsStreamQueue->put($segmentInfo);
+ }
+ }
+
+ $this->nextTerm();
+ }
+
+ /**
+ * Skip terms stream up to specified term preffix.
+ *
+ * Prefix contains fully specified field info and portion of searched term
+ *
+ * @param Zend_Search_Lucene_Index_Term $prefix
+ */
+ public function skipTo(Zend_Search_Lucene_Index_Term $prefix)
+ {
+ $segments = array();
+
+ while (($segmentInfo = $this->_termsStreamQueue->pop()) !== null) {
+ $segments[] = $segmentInfo;
+ }
+
+ foreach ($segments as $segmentInfo) {
+ $segmentInfo->skipTo($prefix);
+
+ if ($segmentInfo->currentTerm() !== null) {
+ $this->_termsStreamQueue->put($segmentInfo);
+ }
+ }
+
+ $this->nextTerm();
+ }
+
+ /**
+ * Scans terms dictionary and returns next term
+ *
+ * @return Zend_Search_Lucene_Index_Term|null
+ */
+ public function nextTerm()
+ {
+ while (($segmentInfo = $this->_termsStreamQueue->pop()) !== null) {
+ if ($this->_termsStreamQueue->top() === null ||
+ $this->_termsStreamQueue->top()->currentTerm()->key() !=
+ $segmentInfo->currentTerm()->key()) {
+ // We got new term
+ $this->_lastTerm = $segmentInfo->currentTerm();
+
+ if ($segmentInfo->nextTerm() !== null) {
+ // Put segment back into the priority queue
+ $this->_termsStreamQueue->put($segmentInfo);
+ }
+
+ return $this->_lastTerm;
+ }
+
+ if ($segmentInfo->nextTerm() !== null) {
+ // Put segment back into the priority queue
+ $this->_termsStreamQueue->put($segmentInfo);
+ }
+ }
+
+ // End of stream
+ $this->_lastTerm = null;
+
+ return null;
+ }
+
+ /**
+ * Returns term in current position
+ *
+ * @return Zend_Search_Lucene_Index_Term|null
+ */
+ public function currentTerm()
+ {
+ return $this->_lastTerm;
+ }
+
+ /**
+ * Close terms stream
+ *
+ * Should be used for resources clean up if stream is not read up to the end
+ */
+ public function closeTermsStream()
+ {
+ while (($segmentInfo = $this->_termsStreamQueue->pop()) !== null) {
+ $segmentInfo->closeTermsStream();
+ }
+
+ $this->_termsStreamQueue = null;
+ $this->_lastTerm = null;
+ }
+
+
/*************************************************************************
@todo UNIMPLEMENTED
*************************************************************************/
* @category Zend
* @package Zend_Search_Lucene
* @subpackage Analysis
- * @copyright Copyright (c) 2005-2007 Zend Technologies USA Inc. (http://www.zend.com)
+ * @copyright Copyright (c) 2005-2008 Zend Technologies USA Inc. (http://www.zend.com)
* @license http://framework.zend.com/license/new-bsd New BSD License
*/
/** Zend_Search_Lucene_Analysis_Token */
-require_once $CFG->dirroot.'/search/Zend/Search/Lucene/Analysis/Token.php';
+require_once "{$CFG->dirroot}/search/Zend/Search/Lucene/Analysis/Token.php";
/** Zend_Search_Lucene_Analysis_Analyzer_Common_Utf8 */
-require_once $CFG->dirroot.'/search/Zend/Search/Lucene/Analysis/Analyzer/Common/Utf8.php';
+require_once "{$CFG->dirroot}/search/Zend/Search/Lucene/Analysis/Analyzer/Common/Utf8.php";
+
+/** Zend_Search_Lucene_Analysis_Analyzer_Common_Utf8_CaseInsensitive */
+require_once "{$CFG->dirroot}/search/Zend/Search/Lucene/Analysis/Analyzer/Common/Utf8/CaseInsensitive.php";
/** Zend_Search_Lucene_Analysis_Analyzer_Common_Utf8Num */
-require_once $CFG->dirroot.'/search/Zend/Search/Lucene/Analysis/Analyzer/Common/Utf8Num.php';
+require_once "{$CFG->dirroot}/search/Zend/Search/Lucene/Analysis/Analyzer/Common/Utf8Num.php";
+
+/** Zend_Search_Lucene_Analysis_Analyzer_Common_Utf8Num_CaseInsensitive */
+require_once "{$CFG->dirroot}/search/Zend/Search/Lucene/Analysis/Analyzer/Common/Utf8Num/CaseInsensitive.php";
/** Zend_Search_Lucene_Analysis_Analyzer_Common_Text */
-require_once $CFG->dirroot.'/search/Zend/Search/Lucene/Analysis/Analyzer/Common/Text.php';
+require_once "{$CFG->dirroot}/search/Zend/Search/Lucene/Analysis/Analyzer/Common/Text.php";
/** Zend_Search_Lucene_Analysis_Analyzer_Common_Text_CaseInsensitive */
-require_once $CFG->dirroot.'/search/Zend/Search/Lucene/Analysis/Analyzer/Common/Text/CaseInsensitive.php';
+require_once "{$CFG->dirroot}/search/Zend/Search/Lucene/Analysis/Analyzer/Common/Text/CaseInsensitive.php";
/** Zend_Search_Lucene_Analysis_Analyzer_Common_TextNum */
-require_once $CFG->dirroot.'/search/Zend/Search/Lucene/Analysis/Analyzer/Common/TextNum.php';
+require_once "{$CFG->dirroot}/search/Zend/Search/Lucene/Analysis/Analyzer/Common/TextNum.php";
/** Zend_Search_Lucene_Analysis_Analyzer_Common_TextNum_CaseInsensitive */
-require_once $CFG->dirroot.'/search/Zend/Search/Lucene/Analysis/Analyzer/Common/TextNum/CaseInsensitive.php';
+require_once "{$CFG->dirroot}/search/Zend/Search/Lucene/Analysis/Analyzer/Common/TextNum/CaseInsensitive.php";
/** Zend_Search_Lucene_Analysis_TokenFilter_StopWords */
-require_once $CFG->dirroot.'/search/Zend/Search/Lucene/Analysis/TokenFilter/StopWords.php';
+require_once 'Zend/Search/Lucene/Analysis/TokenFilter/StopWords.php';
/** Zend_Search_Lucene_Analysis_TokenFilter_ShortWords */
-require_once $CFG->dirroot.'/search/Zend/Search/Lucene/Analysis/TokenFilter/ShortWords.php';
+require_once "{$CFG->dirroot}/search/Zend/Search/Lucene/Analysis/TokenFilter/ShortWords.php";
/**
* @category Zend
* @package Zend_Search_Lucene
* @subpackage Analysis
- * @copyright Copyright (c) 2005-2007 Zend Technologies USA Inc. (http://www.zend.com)
+ * @copyright Copyright (c) 2005-2008 Zend Technologies USA Inc. (http://www.zend.com)
* @license http://framework.zend.com/license/new-bsd New BSD License
*/
* @param string $data
* @return array
*/
- public function tokenize($data, $encoding = 'UTF-8')
+ public function tokenize($data, $encoding = '')
{
$this->setInput($data, $encoding);
+
$tokenList = array();
while (($nextToken = $this->nextToken()) !== null) {
$tokenList[] = $nextToken;
public static function getDefault()
{
if (!self::$_defaultImpl instanceof Zend_Search_Lucene_Analysis_Analyzer) {
- self::$_defaultImpl = new Zend_Search_Lucene_Analysis_Analyzer_Common_Utf8();
+ self::$_defaultImpl = new Zend_Search_Lucene_Analysis_Analyzer_Common_Text_CaseInsensitive();
}
return self::$_defaultImpl;
* @category Zend
* @package Zend_Search_Lucene
* @subpackage Analysis
- * @copyright Copyright (c) 2005-2007 Zend Technologies USA Inc. (http://www.zend.com)
+ * @copyright Copyright (c) 2005-2008 Zend Technologies USA Inc. (http://www.zend.com)
* @license http://framework.zend.com/license/new-bsd New BSD License
*/
/** Zend_Search_Lucene_Analysis_Analyzer_Common */
-require_once $CFG->dirroot.'/search/Zend/Search/Lucene/Analysis/Analyzer/Common.php';
+require_once 'Zend/Search/Lucene/Analysis/Analyzer/Common.php';
/**
* @category Zend
* @package Zend_Search_Lucene
* @subpackage Analysis
- * @copyright Copyright (c) 2005-2007 Zend Technologies USA Inc. (http://www.zend.com)
+ * @copyright Copyright (c) 2005-2008 Zend Technologies USA Inc. (http://www.zend.com)
* @license http://framework.zend.com/license/new-bsd New BSD License
*/
}
// convert input into ascii
- $this->_input = iconv($this->_encoding, 'ASCII//TRANSLIT', $this->_input);
+ //$this->_input = iconv($this->_encoding, 'ASCII//TRANSLIT', $this->_input);
+ $this->_input = mb_convert_encoding($this->_input, 'ASCII', 'auto');
+
$this->_encoding = 'ASCII';
}
if (! preg_match('/[a-zA-Z]+/', $this->_input, $match, PREG_OFFSET_CAPTURE, $this->_position)) {
// It covers both cases a) there are no matches (preg_match(...) === 0)
// b) error occured (preg_match(...) === FALSE)
- return null;
+ return null;
}
$str = $match[0][0];
* @category Zend
* @package Zend_Search_Lucene
* @subpackage Analysis
- * @copyright Copyright (c) 2005-2007 Zend Technologies USA Inc. (http://www.zend.com)
+ * @copyright Copyright (c) 2005-2008 Zend Technologies USA Inc. (http://www.zend.com)
* @license http://framework.zend.com/license/new-bsd New BSD License
*/
/** Zend_Search_Lucene_Analysis_Analyzer_Common */
-require_once $CFG->dirroot.'/search/Zend/Search/Lucene/Analysis/Analyzer/Common.php';
+require_once 'Zend/Search/Lucene/Analysis/Analyzer/Common.php';
/**
* @category Zend
* @package Zend_Search_Lucene
* @subpackage Analysis
- * @copyright Copyright (c) 2005-2007 Zend Technologies USA Inc. (http://www.zend.com)
+ * @copyright Copyright (c) 2005-2008 Zend Technologies USA Inc. (http://www.zend.com)
* @license http://framework.zend.com/license/new-bsd New BSD License
*/
* @var integer
*/
private $_bytePosition;
-
+
/**
- * Stream length
+ * Object constructor
*
- * @var integer
+ * @throws Zend_Search_Lucene_Exception
*/
- private $_streamLength;
+ public function __construct()
+ {
+ if (@preg_match('/\pL/u', 'a') != 1) {
+ // PCRE unicode support is turned off
+ require_once 'Zend/Search/Lucene/Exception.php';
+ throw new Zend_Search_Lucene_Exception('Utf8 analyzer needs PCRE unicode support to be enabled.');
+ }
+ }
/**
* Reset token stream
// convert input into UTF-8
if (strcasecmp($this->_encoding, 'utf8' ) != 0 &&
strcasecmp($this->_encoding, 'utf-8') != 0 ) {
- $this->_input = iconv($this->_encoding, 'UTF-8', $this->_input);
+ $this->_input = @iconv($this->_encoding, 'UTF-8', $this->_input);
$this->_encoding = 'UTF-8';
}
-
- // Get UTF-8 string length.
- // It also checks if it's a correct utf-8 string
- $this->_streamLength = iconv_strlen($this->_input, 'UTF-8');
- }
-
- /**
- * Check, that character is a letter
- *
- * @param string $char
- * @return boolean
- */
- private static function _isAlpha($char)
- {
- if (strlen($char) > 1) {
- // It's an UTF-8 character
- return true;
- }
-
- return ctype_alpha($char);
- }
-
- /**
- * Get next UTF-8 char
- *
- * @param string $char
- * @return boolean
- */
- private function _nextChar()
- {
- $char = $this->_input[$this->_bytePosition++];
-
- if (( ord($char) & 0xC0 ) == 0xC0) {
- $addBytes = 1;
- if (ord($char) & 0x20 ) {
- $addBytes++;
- if (ord($char) & 0x10 ) {
- $addBytes++;
- }
- }
- $char .= substr($this->_input, $this->_bytePosition, $addBytes);
- $this->_bytePosition += $addBytes;
- }
-
- $this->_position++;
-
- return $char;
}
/**
return null;
}
- while ($this->_position < $this->_streamLength) {
- // skip white space
- while ($this->_position < $this->_streamLength &&
- !self::_isAlpha($char = $this->_nextChar())) {
- $char = '';
- }
-
- $termStartPosition = $this->_position - 1;
- $termText = $char;
-
- // read token
- while ($this->_position < $this->_streamLength &&
- self::_isAlpha($char = $this->_nextChar())) {
- $termText .= $char;
- }
-
- // Empty token, end of stream.
- if ($termText == '') {
+ do {
+ if (! preg_match('/[\p{L}]+/u', $this->_input, $match, PREG_OFFSET_CAPTURE, $this->_bytePosition)) {
+ // It covers both cases a) there are no matches (preg_match(...) === 0)
+ // b) error occured (preg_match(...) === FALSE)
return null;
}
- $token = new Zend_Search_Lucene_Analysis_Token(
- $termText,
- $termStartPosition,
- $this->_position - 1);
- $token = $this->normalize($token);
- if ($token !== null) {
- return $token;
- }
- // Continue if token is skipped
- }
-
- return null;
+ // matched string
+ $matchedWord = $match[0][0];
+
+ // binary position of the matched word in the input stream
+ $binStartPos = $match[0][1];
+
+ // character position of the matched word in the input stream
+ $startPos = $this->_position +
+ iconv_strlen(substr($this->_input,
+ $this->_bytePosition,
+ $binStartPos - $this->_bytePosition),
+ 'UTF-8');
+ // character postion of the end of matched word in the input stream
+ $endPos = $startPos + iconv_strlen($matchedWord, 'UTF-8');
+
+ $this->_bytePosition = $binStartPos + strlen($matchedWord);
+ $this->_position = $endPos;
+
+ $token = $this->normalize(new Zend_Search_Lucene_Analysis_Token($matchedWord, $startPos, $endPos));
+ } while ($token === null); // try again if token is skipped
+
+ return $token;
}
}
--- /dev/null
+<?php
+/**
+ * Zend Framework
+ *
+ * LICENSE
+ *
+ * This source file is subject to the new BSD license that is bundled
+ * with this package in the file LICENSE.txt.
+ * It is also available through the world-wide-web at this URL:
+ * http://framework.zend.com/license/new-bsd
+ * If you did not receive a copy of the license and are unable to
+ * obtain it through the world-wide-web, please send an email
+ * to license@zend.com so we can send you a copy immediately.
+ *
+ * @category Zend
+ * @package Zend_Search_Lucene
+ * @subpackage Analysis
+ * @copyright Copyright (c) 2005-2008 Zend Technologies USA Inc. (http://www.zend.com)
+ * @license http://framework.zend.com/license/new-bsd New BSD License
+ */
+
+
+/** Zend_Search_Lucene_Analysis_TokenFilter */
+require_once "{$CFG->dirroot}/search/Zend/Search/Lucene/Analysis/TokenFilter.php";
+
+
+/**
+ * Lower case Token filter.
+ *
+ * @category Zend
+ * @package Zend_Search_Lucene
+ * @subpackage Analysis
+ * @copyright Copyright (c) 2005-2008 Zend Technologies USA Inc. (http://www.zend.com)
+ * @license http://framework.zend.com/license/new-bsd New BSD License
+ */
+
+class Zend_Search_Lucene_Analysis_TokenFilter_LowerCaseUtf8 extends Zend_Search_Lucene_Analysis_TokenFilter
+{
+ /**
+ * Object constructor
+ */
+ public function __construct()
+ {
+ global $CFG;
+ if (!function_exists('mb_strtolower')) {
+ // mbstring extension is disabled
+ require_once "{$CFG->dirroot}/search/Zend/Search/Lucene/Exception.php";
+ throw new Zend_Search_Lucene_Exception('Utf8 compatible lower case filter needs mbstring extension to be enabled.');
+ }
+ }
+
+ /**
+ * Normalize Token or remove it (if null is returned)
+ *
+ * @param Zend_Search_Lucene_Analysis_Token $srcToken
+ * @return Zend_Search_Lucene_Analysis_Token
+ */
+ public function normalize(Zend_Search_Lucene_Analysis_Token $srcToken)
+ {
+ $newToken = new Zend_Search_Lucene_Analysis_Token(
+ mb_strtolower($srcToken->getTermText(), 'UTF-8'),
+ $srcToken->getStartOffset(),
+ $srcToken->getEndOffset());
+
+ $newToken->setPositionIncrement($srcToken->getPositionIncrement());
+
+ return $newToken;
+ }
+}
+
* @category Zend
* @package Zend_Search_Lucene
* @subpackage Document
- * @copyright Copyright (c) 2005-2007 Zend Technologies USA Inc. (http://www.zend.com)
+ * @copyright Copyright (c) 2005-2008 Zend Technologies USA Inc. (http://www.zend.com)
* @license http://framework.zend.com/license/new-bsd New BSD License
*/
/** Zend_Search_Lucene_Field */
-require_once $CFG->dirroot.'/search/Zend/Search/Lucene/Field.php';
+require_once "Zend/Search/Lucene/Field.php";
/**
* @category Zend
* @package Zend_Search_Lucene
* @subpackage Document
- * @copyright Copyright (c) 2005-2007 Zend Technologies USA Inc. (http://www.zend.com)
+ * @copyright Copyright (c) 2005-2008 Zend Technologies USA Inc. (http://www.zend.com)
* @license http://framework.zend.com/license/new-bsd New BSD License
*/
class Zend_Search_Lucene_Document
* @param $offset
* @return string
*/
- public function __get($offset)
- {
- return $this->getFieldValue($offset);
- }
+ public function __get($offset)
+ {
+ return $this->getFieldValue($offset);
+ }
/**
*/
public function getFieldNames()
{
- return array_keys($this->_fields);
+ return array_keys($this->_fields);
}
*/
public function getFieldValue($fieldName)
{
- return $this->getField($fieldName)->value;
+ return $this->getField($fieldName)->value;
}
/**
*/
public function getFieldUtf8Value($fieldName)
{
- return $this->getField($fieldName)->getUtf8Value();
+ return $this->getField($fieldName)->getUtf8Value();
}
}
*
* @category Zend
* @package Zend_Search_Lucene
- * @copyright Copyright (c) 2005-2007 Zend Technologies USA Inc. (http://www.zend.com)
+ * @copyright Copyright (c) 2005-2008 Zend Technologies USA Inc. (http://www.zend.com)
* @license http://framework.zend.com/license/new-bsd New BSD License
*/
/**
* Framework base exception
*/
-require_once $CFG->dirroot.'/search/Zend/Search/Exception.php';
+require_once "Zend/Search/Exception.php";
/**
* @category Zend
* @package Zend_Search_Lucene
- * @copyright Copyright (c) 2005-2007 Zend Technologies USA Inc. (http://www.zend.com)
+ * @copyright Copyright (c) 2005-2008 Zend Technologies USA Inc. (http://www.zend.com)
* @license http://framework.zend.com/license/new-bsd New BSD License
*/
class Zend_Search_Lucene_Exception extends Zend_Search_Exception
* @category Zend
* @package Zend_Search_Lucene
* @subpackage Search
- * @copyright Copyright (c) 2005-2007 Zend Technologies USA Inc. (http://www.zend.com)
+ * @copyright Copyright (c) 2005-2008 Zend Technologies USA Inc. (http://www.zend.com)
* @license http://framework.zend.com/license/new-bsd New BSD License
*/
/** Zend_Search_Lucene_Search_Query */
-require_once $CFG->dirroot.'/search/Zend/Search/Lucene/Search/Query.php';
+require_once "{$CFG->dirroot}/search/Zend/Search/Lucene/Search/Query.php";
/** Zend_Search_Lucene_Search_Weight_Term */
-require_once $CFG->dirroot.'/search/Zend/Search/Lucene/Search/Weight/Term.php';
+require_once "{$CFG->dirroot}/search/Zend/Search/Lucene/Search/Weight/Term.php";
/**
* @category Zend
* @package Zend_Search_Lucene
* @subpackage Search
- * @copyright Copyright (c) 2005-2007 Zend Technologies USA Inc. (http://www.zend.com)
+ * @copyright Copyright (c) 2005-2008 Zend Technologies USA Inc. (http://www.zend.com)
* @license http://framework.zend.com/license/new-bsd New BSD License
*/
class Zend_Search_Lucene_Search_Query_Term extends Zend_Search_Lucene_Search_Query
* @param Zend_Search_Lucene_Index_Term $term
* @param boolean $sign
*/
- public function __construct($term)
+ public function __construct(Zend_Search_Lucene_Index_Term $term)
{
$this->_term = $term;
}
* @category Zend
* @package Zend_Search_Lucene
* @subpackage Search
- * @copyright Copyright (c) 2005-2007 Zend Technologies USA Inc. (http://www.zend.com)
+ * @copyright Copyright (c) 2005-2008 Zend Technologies USA Inc. (http://www.zend.com)
* @license http://framework.zend.com/license/new-bsd New BSD License
*/
/** Zend_Search_Lucene_Index_Term */
-require_once $CFG->dirroot.'/search/Zend/Search/Lucene/Index/Term.php';
+require_once "{$CFG->dirroot}/search/Zend/Search/Lucene/Index/Term.php";
/** Zend_Search_Lucene_Search_Query_Term */
-require_once $CFG->dirroot.'/search/Zend/Search/Lucene/Search/Query/Term.php';
+require_once "{$CFG->dirroot}/search/Zend/Search/Lucene/Search/Query/Term.php";
/** Zend_Search_Lucene_Search_Query_MultiTerm */
-require_once $CFG->dirroot.'/search/Zend/Search/Lucene/Search/Query/MultiTerm.php';
+require_once "{$CFG->dirroot}/search/Zend/Search/Lucene/Search/Query/MultiTerm.php";
/** Zend_Search_Lucene_Search_Query_Boolean */
-require_once $CFG->dirroot.'/search/Zend/Search/Lucene/Search/Query/Boolean.php';
+require_once "{$CFG->dirroot}/search/Zend/Search/Lucene/Search/Query/Boolean.php";
/** Zend_Search_Lucene_Search_Query_Phrase */
-require_once $CFG->dirroot.'/search/Zend/Search/Lucene/Search/Query/Phrase.php';
+require_once "{$CFG->dirroot}/search/Zend/Search/Lucene/Search/Query/Phrase.php";
+
+/** Zend_Search_Lucene_Search_Query_Wildcard */
+require_once "{$CFG->dirroot}/search/Zend/Search/Lucene/Search/Query/Wildcard.php";
+
+/** Zend_Search_Lucene_Search_Query_Range */
+require_once "{$CFG->dirroot}/search/Zend/Search/Lucene/Search/Query/Range.php";
+
+/** Zend_Search_Lucene_Search_Query_Fuzzy */
+require_once "{$CFG->dirroot}/search/Zend/Search/Lucene/Search/Query/Fuzzy.php";
/** Zend_Search_Lucene_Search_Query_Empty */
-require_once $CFG->dirroot.'/search/Zend/Search/Lucene/Search/Query/Empty.php';
+require_once "{$CFG->dirroot}/search/Zend/Search/Lucene/Search/Query/Empty.php";
+
+/** Zend_Search_Lucene_Search_Query_Insignificant */
+require_once "{$CFG->dirroot}/search/Zend/Search/Lucene/Search/Query/Insignificant.php";
/** Zend_Search_Lucene_Search_QueryLexer */
-require_once $CFG->dirroot.'/search/Zend/Search/Lucene/Search/QueryLexer.php';
+require_once "{$CFG->dirroot}/search/Zend/Search/Lucene/Search/QueryLexer.php";
/** Zend_Search_Lucene_Search_QueryParserContext */
-require_once $CFG->dirroot.'/search/Zend/Search/Lucene/Search/QueryParserContext.php';
+require_once "{$CFG->dirroot}/search/Zend/Search/Lucene/Search/QueryParserContext.php";
/** Zend_Search_Lucene_FSM */
-require_once $CFG->dirroot.'/search/Zend/Search/Lucene/FSM.php';
+require_once "{$CFG->dirroot}/search/Zend/Search/Lucene/FSM.php";
/** Zend_Search_Lucene_Exception */
-require_once $CFG->dirroot.'/search/Zend/Search/Lucene/Exception.php';
+require_once "{$CFG->dirroot}/search/Zend/Search/Lucene/Exception.php";
/** Zend_Search_Lucene_Search_QueryParserException */
-require_once $CFG->dirroot.'/search/Zend/Search/Lucene/Search/QueryParserException.php';
+require_once "{$CFG->dirroot}/search/Zend/Search/Lucene/Search/QueryParserException.php";
/**
* @category Zend
* @package Zend_Search_Lucene
* @subpackage Search
- * @copyright Copyright (c) 2005-2007 Zend Technologies USA Inc. (http://www.zend.com)
+ * @copyright Copyright (c) 2005-2008 Zend Technologies USA Inc. (http://www.zend.com)
* @license http://framework.zend.com/license/new-bsd New BSD License
*/
class Zend_Search_Lucene_Search_QueryParser extends Zend_Search_Lucene_FSM
*/
private $_defaultEncoding = '';
+ /**
+ * Defines query parsing mode.
+ *
+ * If this option is turned on, then query parser suppress query parser exceptions
+ * and constructs multi-term query using all words from a query.
+ *
+ * That helps to avoid exceptions caused by queries, which don't conform to query language,
+ * but limits possibilities to check, that query entered by user has some inconsistencies.
+ *
+ *
+ * Default is true.
+ *
+ * Use {@link Zend_Search_Lucene::suppressQueryParsingExceptions()},
+ * {@link Zend_Search_Lucene::dontSuppressQueryParsingExceptions()} and
+ * {@link Zend_Search_Lucene::checkQueryParsingExceptionsSuppressMode()} to operate
+ * with this setting.
+ *
+ * @var boolean
+ */
+ private $_suppressQueryParsingExceptions = true;
/**
* Boolean operators constants
$this->_lexer = new Zend_Search_Lucene_Search_QueryLexer();
}
+ /**
+ * Get query parser instance
+ *
+ * @return Zend_Search_Lucene_Search_QueryParser
+ */
+ private static function _getInstance()
+ {
+ if (self::$_instance === null) {
+ self::$_instance = new self();
+ }
+ return self::$_instance;
+ }
/**
* Set query string default encoding
*/
public static function setDefaultEncoding($encoding)
{
- if (self::$_instance === null) {
- self::$_instance = new Zend_Search_Lucene_Search_QueryParser();
- }
-
- self::$_instance->_defaultEncoding = $encoding;
+ self::_getInstance()->_defaultEncoding = $encoding;
}
/**
*/
public static function getDefaultEncoding()
{
- if (self::$_instance === null) {
- self::$_instance = new Zend_Search_Lucene_Search_QueryParser();
- }
-
- return self::$_instance->_defaultEncoding;
+ return self::_getInstance()->_defaultEncoding;
}
/**
*/
public static function setDefaultOperator($operator)
{
- if (self::$_instance === null) {
- self::$_instance = new Zend_Search_Lucene_Search_QueryParser();
- }
-
- self::$_instance->_defaultOperator = $operator;
+ self::_getInstance()->_defaultOperator = $operator;
}
/**
*/
public static function getDefaultOperator()
{
- if (self::$_instance === null) {
- self::$_instance = new Zend_Search_Lucene_Search_QueryParser();
- }
+ return self::_getInstance()->_defaultOperator;
+ }
- return self::$_instance->_defaultOperator;
+ /**
+ * Turn on 'suppress query parser exceptions' mode.
+ */
+ public static function suppressQueryParsingExceptions()
+ {
+ self::_getInstance()->_suppressQueryParsingExceptions = true;
+ }
+ /**
+ * Turn off 'suppress query parser exceptions' mode.
+ */
+ public static function dontSuppressQueryParsingExceptions()
+ {
+ self::_getInstance()->_suppressQueryParsingExceptions = false;
}
+ /**
+ * Check 'suppress query parser exceptions' mode.
+ * @return boolean
+ */
+ public static function queryParsingExceptionsSuppressed()
+ {
+ return self::_getInstance()->_suppressQueryParsingExceptions;
+ }
+
+
/**
* Parses a query string
*
*/
public static function parse($strQuery, $encoding = null)
{
- if (self::$_instance === null) {
- self::$_instance = new Zend_Search_Lucene_Search_QueryParser();
- }
-
- self::$_instance->_encoding = ($encoding !== null) ? $encoding : self::$_instance->_defaultEncoding;
- self::$_instance->_lastToken = null;
- self::$_instance->_context = new Zend_Search_Lucene_Search_QueryParserContext(self::$_instance->_encoding);
- self::$_instance->_contextStack = array();
- self::$_instance->_tokens = self::$_instance->_lexer->tokenize($strQuery, self::$_instance->_encoding);
-
- // Empty query
- if (count(self::$_instance->_tokens) == 0) {
- return new Zend_Search_Lucene_Search_Query_Empty();
- }
-
-
- foreach (self::$_instance->_tokens as $token) {
- try {
- self::$_instance->_currentToken = $token;
- self::$_instance->process($token->type);
-
- self::$_instance->_lastToken = $token;
- } catch (Exception $e) {
- if (strpos($e->getMessage(), 'There is no any rule for') !== false) {
- throw new Zend_Search_Lucene_Search_QueryParserException( 'Syntax error at char position ' . $token->position . '.' );
+ self::_getInstance();
+
+ // Reset FSM if previous parse operation didn't return it into a correct state
+ self::$_instance->reset();
+
+ try {
+ self::$_instance->_encoding = ($encoding !== null) ? $encoding : self::$_instance->_defaultEncoding;
+ self::$_instance->_lastToken = null;
+ self::$_instance->_context = new Zend_Search_Lucene_Search_QueryParserContext(self::$_instance->_encoding);
+ self::$_instance->_contextStack = array();
+ self::$_instance->_tokens = self::$_instance->_lexer->tokenize($strQuery, self::$_instance->_encoding);
+
+ // Empty query
+ if (count(self::$_instance->_tokens) == 0) {
+ return new Zend_Search_Lucene_Search_Query_Insignificant();
+ }
+
+
+ foreach (self::$_instance->_tokens as $token) {
+ try {
+ self::$_instance->_currentToken = $token;
+ self::$_instance->process($token->type);
+
+ self::$_instance->_lastToken = $token;
+ } catch (Exception $e) {
+ if (strpos($e->getMessage(), 'There is no any rule for') !== false) {
+ throw new Zend_Search_Lucene_Search_QueryParserException( 'Syntax error at char position ' . $token->position . '.' );
+ }
+
+ throw $e;
}
-
+ }
+
+ if (count(self::$_instance->_contextStack) != 0) {
+ throw new Zend_Search_Lucene_Search_QueryParserException('Syntax Error: mismatched parentheses, every opening must have closing.' );
+ }
+
+ return self::$_instance->_context->getQuery();
+ } catch (Zend_Search_Lucene_Search_QueryParserException $e) {
+ if (self::$_instance->_suppressQueryParsingExceptions) {
+ $queryTokens = Zend_Search_Lucene_Analysis_Analyzer::getDefault()->tokenize($strQuery, self::$_instance->_encoding);
+
+ $query = new Zend_Search_Lucene_Search_Query_MultiTerm();
+ $termsSign = (self::$_instance->_defaultOperator == self::B_AND) ? true /* required term */ :
+ null /* optional term */;
+
+ foreach ($queryTokens as $token) {
+ $query->addTerm(new Zend_Search_Lucene_Index_Term($token->getTermText()), $termsSign);
+ }
+
+
+ return $query;
+ } else {
throw $e;
}
}
-
- if (count(self::$_instance->_contextStack) != 0) {
- throw new Zend_Search_Lucene_Search_QueryParserException('Syntax Error: mismatched parentheses, every opening must have closing.' );
- }
-
- return self::$_instance->_context->getQuery();
}
default:
// It's not a user input exception
- throw new Zend_Search_Lucene_Exception('Lexeme modifier parameter must follow lexeme modifier. Char position .' );
+ throw new Zend_Search_Lucene_Exception('Lexeme modifier parameter must follow lexeme modifier. Char position 0.' );
}
}
*/
public function openedRQLastTerm()
{
- throw new Zend_Search_Lucene_Search_QueryParserException('Range queries are not supported yet.');
+ $tokens = Zend_Search_Lucene_Analysis_Analyzer::getDefault()->tokenize($this->_rqFirstTerm, $this->_encoding);
+ if (count($tokens) > 1) {
+ throw new Zend_Search_Lucene_Search_QueryParserException('Range query boundary terms must be non-multiple word terms');
+ } else if (count($tokens) == 1) {
+ $from = new Zend_Search_Lucene_Index_Term(reset($tokens)->getTermText(), $this->_context->getField());
+ } else {
+ $from = null;
+ }
- // $firstTerm = new Zend_Search_Lucene_Index_Term($this->_rqFirstTerm, $this->_context->getField());
- // $lastTerm = new Zend_Search_Lucene_Index_Term($this->_currentToken->text, $this->_context->getField());
+ $tokens = Zend_Search_Lucene_Analysis_Analyzer::getDefault()->tokenize($this->_currentToken->text, $this->_encoding);
+ if (count($tokens) > 1) {
+ throw new Zend_Search_Lucene_Search_QueryParserException('Range query boundary terms must be non-multiple word terms');
+ } else if (count($tokens) == 1) {
+ $to = new Zend_Search_Lucene_Index_Term(reset($tokens)->getTermText(), $this->_context->getField());
+ } else {
+ $to = null;
+ }
- // $query = new Zend_Search_Lucene_Search_Query_Range($firstTerm, $lastTerm, false);
- // $this->_context->addentry($query);
+ if ($from === null && $to === null) {
+ throw new Zend_Search_Lucene_Search_QueryParserException('At least one range query boundary term must be non-empty term');
+ }
+
+ $rangeQuery = new Zend_Search_Lucene_Search_Query_Range($from, $to, false);
+ $entry = new Zend_Search_Lucene_Search_QueryEntry_Subquery($rangeQuery);
+ $this->_context->addEntry($entry);
}
/**
*/
public function closedRQLastTerm()
{
- throw new Zend_Search_Lucene_Search_QueryParserException('Range queries are not supported yet.');
+ $tokens = Zend_Search_Lucene_Analysis_Analyzer::getDefault()->tokenize($this->_rqFirstTerm, $this->_encoding);
+ if (count($tokens) > 1) {
+ throw new Zend_Search_Lucene_Search_QueryParserException('Range query boundary terms must be non-multiple word terms');
+ } else if (count($tokens) == 1) {
+ $from = new Zend_Search_Lucene_Index_Term(reset($tokens)->getTermText(), $this->_context->getField());
+ } else {
+ $from = null;
+ }
- // $firstTerm = new Zend_Search_Lucene_Index_Term($this->_rqFirstTerm, $this->_context->getField());
- // $lastTerm = new Zend_Search_Lucene_Index_Term($this->_currentToken->text, $this->_context->getField());
+ $tokens = Zend_Search_Lucene_Analysis_Analyzer::getDefault()->tokenize($this->_currentToken->text, $this->_encoding);
+ if (count($tokens) > 1) {
+ throw new Zend_Search_Lucene_Search_QueryParserException('Range query boundary terms must be non-multiple word terms');
+ } else if (count($tokens) == 1) {
+ $to = new Zend_Search_Lucene_Index_Term(reset($tokens)->getTermText(), $this->_context->getField());
+ } else {
+ $to = null;
+ }
- // $query = new Zend_Search_Lucene_Search_Query_Range($firstTerm, $lastTerm, true);
- // $this->_context->addentry($query);
+ if ($from === null && $to === null) {
+ throw new Zend_Search_Lucene_Search_QueryParserException('At least one range query boundary term must be non-empty term');
+ }
+
+ $rangeQuery = new Zend_Search_Lucene_Search_Query_Range($from, $to, true);
+ $entry = new Zend_Search_Lucene_Search_QueryEntry_Subquery($rangeQuery);
+ $this->_context->addEntry($entry);
}
}