+2006/09/08
+----------
+Google Summer of Code is finished, spent a couple of weeks away from
+the project to think about it and also to take a break. Working on it
+now I discovered bugs in the query parser (now fixed), and I also
+un-convoluted the querylib logic (well slighlty).
+
+Updated ZFS files to latest SVN.
+
2006/08/21
----------
Fixed index document count, and created new config variable to store
We are running cutting-edge (i.e. HEAD) Zend Framework:
URL: http://framework.zend.com/svn/framework/trunk
- Revision: 924
- Last Changed Rev: 924
- Last Changed Date: 2006-07-27 10:23:04 +0200 (Thu, 27 Jul 2006)
-
+ Revision: 1042
+ Last Changed Rev: 1042
+ Last Changed Date: 2006-09-07 23:14:50 +0200 (Thu, 07 Sep 2006)
This Zend Framework present in this directory only contains the minimum
to run Zend_Search_Lucene - I don't foresee any problems, since the license
*/
+
+/** Zend_Search_Lucene_Exception */
+require_once 'Zend/Search/Lucene/Exception.php';
+
+
/**
* @category Zend
* @package Zend_Search_Lucene
* and advances the file pointer.
*
* @return integer
+ * @throws Zend_Search_Lucene_Exception
*/
public function readLong()
{
$str = $this->_fread(8);
/**
- * PHP uses long as largest integer. fseek() uses long for offset.
- * long has 4 bytes in a lot of systems. 4 bytes are discarded to prevent
- * conversion to float.
- * So, largest index segment file is 2Gb
+ * Check, that we work in 64-bit mode.
+ * fseek() uses long for offset. Thus, largest index segment file size in 32bit mode is 2Gb
*/
- return /* ord($str{0}) << 56 | */
- /* ord($str{1}) << 48 | */
- /* ord($str{2}) << 40 | */
- /* ord($str{3}) << 32 | */
- ord($str{4}) << 24 |
- ord($str{5}) << 16 |
- ord($str{6}) << 8 |
- ord($str{7});
+ if (PHP_INT_SIZE > 4) {
+ return ord($str{0}) << 56 |
+ ord($str{1}) << 48 |
+ ord($str{2}) << 40 |
+ ord($str{3}) << 32 |
+ ord($str{4}) << 24 |
+ ord($str{5}) << 16 |
+ ord($str{6}) << 8 |
+ ord($str{7});
+ } else {
+ if ((ord($str{0}) != 0) ||
+ (ord($str{1}) != 0) ||
+ (ord($str{2}) != 0) ||
+ (ord($str{3}) != 0) ||
+ ((ord($str{0}) & 0x80) != 0)) {
+ throw new Zend_Search_Lucene_Exception('Largest supported segment size (for 32-bit mode) is 2Gb');
+ }
+
+ return ord($str{4}) << 24 |
+ ord($str{5}) << 16 |
+ ord($str{6}) << 8 |
+ ord($str{7});
+ }
}
/**
* Writes long integer to the end of file
*
* @param integer $value
+ * @throws Zend_Search_Lucene_Exception
*/
public function writeLong($value)
{
/**
- * PHP uses long as largest integer. fseek() uses long for offset.
- * long has 4 bytes in a lot of systems. 4 bytes are discarded to prevent
- * conversion to float.
- * So, largest index segment file is 2Gb
+ * Check, that we work in 64-bit mode.
+ * fseek() and ftell() use long for offset. Thus, largest index segment file size in 32bit mode is 2Gb
*/
- settype($value, 'integer');
- $this->_fwrite( "\x00\x00\x00\x00" .
- chr($value>>24 & 0xFF) .
- chr($value>>16 & 0xFF) .
- chr($value>>8 & 0xFF) .
- chr($value & 0xFF), 8 );
+ if (PHP_INT_SIZE > 4) {
+ settype($value, 'integer');
+ $this->_fwrite( chr($value>>56 & 0xFF) .
+ chr($value>>48 & 0xFF) .
+ chr($value>>40 & 0xFF) .
+ chr($value>>32 & 0xFF) .
+ chr($value>>24 & 0xFF) .
+ chr($value>>16 & 0xFF) .
+ chr($value>>8 & 0xFF) .
+ chr($value & 0xFF), 8 );
+ } else {
+ if ($value > 0x7FFFFFFF) {
+ throw new Zend_Search_Lucene_Exception('Largest supported segment size (for 32-bit mode) is 2Gb');
+ }
+
+ $this->_fwrite( "\x00\x00\x00\x00" .
+ chr($value>>24 & 0xFF) .
+ chr($value>>16 & 0xFF) .
+ chr($value>>8 & 0xFF) .
+ chr($value & 0xFF), 8 );
+ }
}
//otherwise we are dealing with a new advanced query
unset($_SESSION['search_advanced_query']);
session_unregister('search_advanced_query');
+
+ //chars to strip from strings (whitespace)
+ $chars = " \t\n\r\0\x0B,-+";
//retrieve advanced query variables
$adv->mustappear = trim(optional_param('mustappear', '', PARAM_CLEAN), $chars);
//parse the advanced variables into a query string
//TODO: move out to external query class (QueryParse?)
- //chars to strip from strings (whitespace)
- $chars = ' \t\n\r\0\x0B,;';
$query_string = '';
//get all available module types
} //if
//run the query against the index
- $sq = new SearchQuery($query_string, $page_number, 10, true);
+ $sq = new SearchQuery($query_string, $page_number, 10, false);
} //if
if (!$site = get_site()) {
$validquery,
$validindex,
$results,
- $results_per_page;
+ $results_per_page,
+ $total_results;
public function __construct($term='', $page=1, $results_per_page=10, $cache=false) {
global $CFG;
} //set_query
public function results() {
- if ($this->validquery and $this->validindex) {
- return $this->get_subset_results();
- } else {
- return array();
- } //else
+ return $this->results;
} //results
-
- private function get_subset_results() {
- if ($this->count() < $this->results_per_page) {
- $this->pagenumber = 1;
- } else if ($this->pagenumber > $this->total_pages()) {
- $this->pagenumber = $this->total_pages();
- } //if
-
- $start = ($this->pagenumber - 1) * $this->results_per_page;
-
- return array_slice($this->results, $start, $this->results_per_page);
- } //get_results
-
- private function get_all_results() {
+
+ private function process_results($all=false) {
global $USER;
+
+ $term = strtolower($this->term);
- $resultdoc = new SearchResult();
- $resultdocs = array();
- $i = 0;
+ //experimental - return more results
+ $strip_arr = array('author:', 'title:', '+', '-', 'doctype:');
+ $stripped_term = str_replace($strip_arr, '', $term);
- $term = strtolower($this->term);
+ $hits = $this->index->find($term." title:".$stripped_term." author:".$stripped_term);
+ //--
- $hits = $this->index->find($term." title:".$term." author:".$term);
+ $hitcount = count($hits);
+ $this->total_results = $hitcount;
- foreach ($hits as $hit) {
+ if ($hitcount == 0) return array();
+
+ $totalpages = ceil($hitcount/$this->results_per_page);
+
+ if (!$all) {
+ if ($hitcount < $this->results_per_page) {
+ $this->pagenumber = 1;
+ } else if ($this->pagenumber > $totalpages) {
+ $this->pagenumber =$totalpages;
+ } //if
+
+ $start = ($this->pagenumber - 1) * $this->results_per_page;
+ $end = $start + $this->results_per_page;
+
+ if ($end > $hitcount) {
+ $end = $hitcount;
+ } //if
+ } else {
+ $start = 0;
+ $end = $hitcount;
+ } //else
+
+ $resultdoc = new SearchResult();
+ $resultdocs = array();
+
+ for ($i = $start; $i < $end; $i++) {
+ $hit = $hits[$i];
+
//check permissions on each result
if ($this->can_display($USER, $hit->id, $hit->doctype, $hit->course_id, $hit->group_id)) {
$resultdoc->number = $i;
$resultdoc->author = $hit->author;
//and store it
- $resultdocs[] = clone($resultdoc);
-
- $i++;
+ $resultdocs[] = clone($resultdoc);
} //if
} //foreach
-
+
return $resultdocs;
- } //get_all_results
+ } //process_results
private function get_results() {
$cache = new SearchCache();
if ($this->cache and $cache->can_cache()) {
if (!($resultdocs = $cache->cache($this->term))) {
- $resultdocs = $this->get_all_results();
+ $resultdocs = $this->process_results();
//cache the results so we don't have to compute this on every page-load
$cache->cache($this->term, $resultdocs);
//print "Using new results.";
} else {
//no caching :(
//print "Caching disabled!";
- $resultdocs = $this->get_all_results();
+ $resultdocs = $this->process_results();
} //else
return $resultdocs;
} //can_display
public function count() {
- return count($this->results);
+ return $this->total_results;
} //count
- //this shouldn't be in this class
- //public function index_count() {
- // return $this->index->count();
- //} //index_count
-
public function is_valid() {
return ($this->validquery and $this->validindex);
} //is_valid