+2006/07/25
+----------
+Query logic moved into the SearchQuery class in querylib.php. Should be able
+to include this file in any page and run a query against the index (PHP 5
+checks must be added to those pages then, though).
+
+Index info can be retrieved using IndexInfo class in indexlib.php.
+
+Abstracted some stuff away, to reduce rendundancy and decrease the
+likelihood of errors. Improved the stats.php page to include some
+diagnostics for adminstrators.
+
+delete.php skeleton created for removing deleted documents from the
+index. cron.php will contain the logic for running delete.php,
+update.php and eventually add.php.
+
2006/07/11
----------
(Warning: It took me 1900 seconds to index the forum, go make coffee
-whilst you wait.)
+whilst you wait.) [Moodle.org forum data]
Forum search functions changed to use 'get_recordset' instead of
'get_records', for speed reasons. This provides a significant improvement,
$freqPointer = $prevTermInfo->freqPointer;
$proxPointer = $prevTermInfo->proxPointer;
for ($count = $prevPosition*$indexInterval + 1;
- $count < $termCount &&
+ $count <= $termCount &&
( $this->_getFieldPosition($termFieldNum) < $searchDicField ||
($this->_getFieldPosition($termFieldNum) == $searchDicField &&
strcmp($termValue, $term->text) < 0) );
$currentToken = '';
for ($count = 0; $count < strlen($inputString); $count++) {
- if (ctype_alnum( $inputString{$count} )) {
+ if (ctype_alnum( $inputString{$count} ) ||
+ $inputString{$count} == '_') {
+ $currentToken .= $inputString{$count};
+ } else if ($inputString{$count} == '\\') { // Escaped character
+ $count++;
+
+ if ($count == strlen($inputString)) {
+ throw new Zend_Search_Lucene_Exception('Non finished escape sequence.');
+ }
+
$currentToken .= $inputString{$count};
} else {
// Previous token is finished
* extend.
* */
- class SearchDocument extends Zend_Search_Lucene_Document {
+ abstract class SearchDocument extends Zend_Search_Lucene_Document {
public function __construct(&$doc, &$data, $document_type, $course_id, $group_id) {
$this->addField(Zend_Search_Lucene_Field::Keyword('id', $doc->id));
$this->addField(Zend_Search_Lucene_Field::Text('title', $doc->title));
class ForumSearchDocument extends SearchDocument {
public function __construct(&$post, $forum_id, $course_id, $group_id) {
- // generic information
- /*$doc->id = $post->id;
- $doc->title = $post->subject;
- $doc->author = $post->firstname." ".$post->lastname;
- $doc->contents = $post->message;*/
-
+ // generic information
$doc->id = $post['id'];
$doc->title = $post['subject'];
$doc->author = $post['firstname']." ".$post['lastname'];
$data->forum = $forum_id;
$data->discussion = $post['discussion'];
- parent::__construct($doc, $data, SEARCH_FORUM_TYPE, $course_id, $group_id);
+ parent::__construct($doc, $data, SEARCH_TYPE_FORUM, $course_id, $group_id);
} //constructor
} //ForumSearchDocument
} //forum_make_link
function forum_iterator() {
- //no @ = Undefined index: 82 in /home/michael/public_html/moodle/lib/datalib.php on line 2671
+ //no @ = Undefined index: 82 in moodle/lib/datalib.php on line 2671
return @get_all_instances_in_courses("forum", get_courses());
} //forum_iterator
return $documents;
} //forum_get_content_for_index
- //old slower version
- function forum_get_content_for_index_old(&$forum) {
- $documents = array();
- if (!$forum) return $documents;
-
- $posts = forum_get_discussions($forum->id);
- if (!$posts) return $documents;
-
- foreach($posts as $post) {
- if (is_object($post)) {
- if (strlen($post->message) > 0 && ($post->deleted != 1)) {
- $documents[] = new ForumSearchDocument($post, $forum->id, $forum->course, $post->groupid);
- } //if
-
- if ($children = forum_get_child_posts($post->id, $forum->id)) {
- foreach ($children as $child) {
- if (strlen($child->message) > 0 && ($child->deleted != 1)) {
- $documents[] = new ForumSearchDocument($child, $forum->id, $forum->course, $post->groupid);
- } //if
- } //foreach
- } //if
- } //if
- } //foreach
-
- return $documents;
- } //forum_get_content_for_index_old
-
//reworked faster version from /mod/forum/lib.php
function forum_get_discussions_fast($forum) {
global $CFG, $USER;
$data->wiki = $wiki_id;
// construct the parent class
- parent::__construct($doc, $data, SEARCH_WIKI_TYPE, $course_id, $group_id);
+ parent::__construct($doc, $data, SEARCH_TYPE_WIKI, $course_id, $group_id);
} //constructor
} //WikiSearchDocument
//php5 found, continue including php5-only files
require_once("$CFG->dirroot/search/Zend/Search/Lucene.php");
-
- if (get_config("search_indexer_busy") == 1) {
- } //if
-
- //turn on busy flag
- set_config("search_indexer_busy", 1);
+
mtrace('<pre>Server Time: '.date('r',time())."\n");
+
+ if ($CFG->search_indexer_busy == '1') {
+ //means indexing was not finished previously
+ mtrace("Warning: Indexing was not successfully completed last time, restarting.\n");
+ } //if
+
+ //turn on busy flag
+ set_config('search_indexer_busy', '1');
//paths
- $index_path = $CFG->dataroot.'/search';
+ $index_path = SEARCH_INDEX_PATH;
$index_db_file = "$CFG->dirroot/search/db/$CFG->dbtype.sql";
//setup directory in data root
if ($mods = get_records_select('modules' /*'index this module?' where statement*/)) {
foreach ($mods as $mod) {
+ if ($mod->name == 'forum') continue;
$class_file = $CFG->dirroot.'/search/documents/'.$mod->name.'_document.php';
if (file_exists($class_file)) {
mtrace('</pre>');
//finished, turn busy flag off
- set_config("search_indexer_busy", 0);
+ set_config("search_indexer_busy", "0");
+
+ //mark the time we last updated
+ set_config("search_indexer_run_date", time());
?>
\ No newline at end of file
$phpversion = phpversion();
mtrace("Sorry, global search requires PHP 5.0.0 or later (currently using version $phpversion)");
exit(0);
- } //if
-
- $index_path = "$CFG->dataroot/search";
- $index_dir = get_directory_list($index_path, '', false, false);
- $index_filecount = count($index_dir);
-
- //check if the table exists in the db
- $tables = $db->MetaTables();
-
- if (in_array($CFG->prefix.'search_documents', $tables)) {
- $db_count = count_records('search_documents');
- } else {
- $db_count = 0;
- } //else
+ } //if
- //TODO: elaborate on error messages, when db!=0 and index=0 -> corrupt, etc.
- if ($index_filecount != 0 or $db_count != 0) {
- mtrace("<pre>The data directory ($index_path) contains $index_filecount files, and\n"
- ."there are $db_count records in the <em>search_documents</em> table.\n"
+ require_once("$CFG->dirroot/search/indexlib.php");
+ $indexinfo = new IndexInfo();
+
+ if ($indexinfo->valid()) {
+ mtrace("<pre>The data directory ($indexinfo->path) contains $indexinfo->filecount files, and\n"
+ ."there are ".$indexinfo->dbcount." records in the <em>search_documents</em> table.\n"
."\n"
- ."This indicates that you have already succesfully indexed this site, or at least\n"
- ."started and cancelled an indexing session. Follow the link if you are sure that\n"
- ."you want to continue indexing - this will replace any existing index data (no\n"
- ."Moodle data is affected).\n"
+ ."This indicates that you have already succesfully indexed this site. Follow the link\n"
+ ."if you are sure that you want to continue indexing - this will replace any existing\n"
+ ."index data (no Moodle data is affected).\n"
."\n"
."You are encouraged to use the 'Test indexing' script before continuing onto\n"
."indexing - this will check if the modules are set up correctly. Please correct\n"
<?php
- //Move this stuff to lib/searchlib.php?
- // Author: Michael Champanis
-
- //document types that can be searched
- define('SEARCH_NO_TYPE', 'none');
- define('SEARCH_WIKI_TYPE', 'wiki');
- define('SEARCH_FORUM_TYPE', 'forum');
+ /* Move this stuff to lib/searchlib.php?
+ * Author: Michael Champanis
+ *
+ * This file must not contain any PHP 5, because it is used to test for PHP 5
+ * itself, and needs to be able to be executed on PHP 4 installations.
+ * */
+
+ define('SEARCH_INDEX_PATH', "$CFG->dataroot/search");
+
+ //document types that can be searched
+ define('SEARCH_TYPE_NONE', 'none');
+ define('SEARCH_TYPE_WIKI', 'wiki');
+ define('SEARCH_TYPE_FORUM', 'forum');
//returns all the document type constants
- function search_get_document_types() {
- $r = Array(SEARCH_WIKI_TYPE, SEARCH_NO_TYPE, SEARCH_FORUM_TYPE);
- return $r;
+ function search_get_document_types($prefix='SEARCH_TYPE') {
+ $ret = array();
+
+ foreach (get_defined_constants() as $key=>$value) {
+ if (substr($key, 0, strlen($prefix)) == $prefix) {
+ $ret[$key] = $value;
+ } //if
+ } //foreach
+
+ return $ret;
} //search_get_document_types
-
+
//shortens a url so it can fit on the results page
function search_shorten_url($url, $length=30) {
return substr($url, 0, $length)."...";
* All articles written by Helen Foster
*
* */
-
- require_once('../config.php');
+
+ require_once('../config.php');
require_once("$CFG->dirroot/search/lib.php");
- //check for php5, but don't die yet (see line 27)
- if ($check = search_check_php5()) {
- require_once("$CFG->dirroot/search/Zend/Search/Lucene.php");
+ //check for php5, but don't die yet (see line 52)
+ if ($check = search_check_php5()) {
+ require_once("$CFG->dirroot/search/querylib.php");
$query_string = optional_param('query_string', '', PARAM_CLEAN);
$page_number = optional_param('page', 1, PARAM_INT);
if ($page_number < 1) {
$page_number = 1;
} //if
-
- $index_path = "$CFG->dataroot/search";
- $no_index = false; //optimism!
- $results_per_page = 10;
- try {
- $index = new Zend_Search_Lucene($index_path, false);
- } catch(Exception $e) {
- //print $e;
- $no_index = true;
- } //catch
+ $sq = new SearchQuery($query_string, $page_number, 10, true);
} //if
-
- //Result document class that contains all the display information we need
- class ResultDocument {
- public $url,
- $title,
- $score,
- $doctype,
- $author;
- } //ResultDocument
-
- //generates an HTML string of links to result pages
- function page_numbers($query, $hits, $page=1, $results_per_page=20) {
- //total result pages
- $pages = ceil($hits/$results_per_page);
-
- $ret = "<div align='center'>";
-
- //Back is disabled if we're on page 1
- if ($page > 1) {
- $ret .= "<a href='query.php?query_string=$query&page=".($page-1)."'>< Back</a> ";
- } else {
- $ret .= "< Back ";
- } //else
-
- //don't <a href> the current page
- for ($i = 1; $i <= $pages; $i++) {
- if ($page == $i) {
- $ret .= "[$i] ";
- } else {
- $ret .= "<a href='query.php?query_string=$query&page=$i'>$i</a> ";
- } //else
- } //for
-
- //Next disabled if we're on the last page
- if ($page < $pages) {
- $ret .= "<a href='query.php?query_string=$query&page=".($page+1)."'>Next ></a> ";
- } else {
- $ret .= "Next > ";
- } //else
-
- $ret .= "</div>";
-
- //shorten really long page lists, to stop table distorting width-ways
- if (strlen($ret) > 70) {
- $start = 4;
- $end = $page - 5;
- $ret = preg_replace("/<a\D+\d+\D+>$start<\/a>.*?<a\D+\d+\D+>$end<\/a>/", '...', $ret);
-
- $start = $page + 5;
- $end = $pages - 3;
- $ret = preg_replace("/<a\D+\d+\D+>$start<\/a>.*?<a\D+\d+\D+>$end<\/a>/", '...', $ret);
- } //if
-
- return $ret;
- } //page_numbers
-
- //calculates whether a user is allowed to see this result
- function can_display(&$user, $course_id, $group_id) {
- return true;
- } //can_display
-
- //caches the results of the last query, deletes the previous one also
- function cache($id=false, &$object=false) {
- //see if there was a previous query
- $last_term = (isset($_SESSION['search_last_term'])) ? $_SESSION['search_last_term'] : false;
-
- //if this query is different from the last, clear out the last one
- if ($id != false and $last_term != $id) {
- unset($_SESSION[$last_term]);
- session_unregister($last_term);
- } //if
-
- //store the new query if id and object are passed in
- if ($object and $id) {
- $_SESSION['search_last_term'] = $id;
- $_SESSION[$id] = $object;
- return true;
- //otherwise return the stored results
- } else if ($id and isset($_SESSION[$id])) {
- return $_SESSION[$id];
- } //else
- } //cache
-
-
if (!$site = get_site()) {
redirect("index.php");
} //if
<div align="center">
<?php
- echo 'Searching: ';
+ print 'Searching: ';
- if ($no_index) {
- print "0";
+ if ($sq->is_valid_index()) {
+ print $sq->index_count();
} else {
- print $index->count();
+ print "0";
} //else
print ' documents.';
- if ($no_index and isadmin()) {
+ if (!$sq->is_valid_index() and isadmin()) {
print "<br><br>Admin: There appears to be no index, click <a href='indexersplash.php'>here</a> to create one.";
} //if
?>
<?php
print_simple_box_end();
- if (!empty($query_string) and !$no_index) {
+ if ($sq->is_valid()) {
print_simple_box_start('center', '50%', 'white', 10);
- search_stopwatch();
-
- //if the cache is empty
- if (!($hits = cache($query_string))) {
- $resultdocs = array();
- $resultdoc = new ResultDocument;
-
- //generate a new result-set
- $hits = $index->find(strtolower($query_string));
-
- foreach ($hits as $hit) {
- //check permissions on each result
- if (can_display($USER, $hit->course_id, $hit->group_id)) {
- $resultdoc->url = $hit->url;
- $resultdoc->title = $hit->title;
- $resultdoc->score = $hit->score;
- $resultdoc->doctype = $hit->doctype;
- $resultdoc->author = $hit->author;
-
- //and store it if it passes the test
- $resultdocs[] = clone($resultdoc);
- } //if
- } //foreach
-
- //cache the results so we don't have to compute this on every page-load
- cache($query_string, $resultdocs);
-
- //print "Using new results.";
- } else {
- //There was something in the cache, so we're using that to save time
- //print "Using cached results.";
- } //else
-
- $hit_count = count($hits);
+ search_stopwatch();
+ $hit_count = $sq->count();
print "<br>";
print "<br>";
if ($hit_count > 0) {
- if ($hit_count < $results_per_page) {
- $page_number = 1;
- } else if ($page_number > ceil($hit_count/$results_per_page)) {
- $page_number = $hit_count/$results_per_page;
- } //if
-
- $start = ($page_number - 1)*$results_per_page;
- $end = $start + $results_per_page;
-
- $page_links = page_numbers($query_string, $hit_count, $page_number, $results_per_page);
+ $page_links = $sq->page_numbers();
+ $hits = $sq->results();
print "<ol>";
- for ($i = $start; $i < $end; $i++) {
- if ($i >= $hit_count) {
- break;
- } //if
-
- $listing = $hits[$i];
-
- print "<li value='".($i+1)."'><a href='".$listing->url."'>$listing->title</a><br>\n"
+ foreach ($hits as $listing) {
+ print "<li value='".($listing->number+1)."'><a href='".$listing->url."'>$listing->title</a><br>\n"
."<em>".search_shorten_url($listing->url, 70)."</em><br>\n"
."Type: ".$listing->doctype.", score: ".round($listing->score, 3).", author: ".$listing->author."<br>\n"
."<br></li>\n";
} //if
print_simple_box_end();
- } //if
-
- if (!empty($query_string) and !$no_index) {
?>
<div align="center">
</div>
<?php
- } //if
+ } //if (sq is valid)
print_simple_box_end();
print_footer();
<?php
/* Prints some basic statistics about the current index.
- * Allows the administrator to create an index if none exists.
+ *
+ * Does some diagnostics if you are logged in as an administrator.
* */
require_once('../config.php');
require_once("$CFG->dirroot/search/lib.php");
//check for php5, but don't die yet
- if ($check = search_check_php5()) {
- //filesystem stats
- $index_path = "$CFG->dataroot/search";
- $index_size = display_size(get_directory_size($index_path));
- $index_dir = get_directory_list($index_path, '', false, false);
- $index_filecount = count($index_dir);
+ if ($check = search_check_php5()) {
+ require_once("$CFG->dirroot/search/indexlib.php");
- //indexed documents stats (via db)
- $db_exists = false;
- $admin_tables = $db->MetaTables();
-
- if (in_array($CFG->prefix.'search_documents', $admin_tables)) {
- $db_exists = true;
- $types = search_get_document_types();
- sort($types);
-
- //total documents
- $type_counts['Total'] = count_records('search_documents');
-
- foreach($types as $type) {
- $c = count_records('search_documents', 'doctype', $type);
- $type_counts[$type] = (int)$c;
- } //foreach
- } else {
- $type_counts['Total'] = 0;
- } //else
- } //if
+ $indexinfo = new IndexInfo();
+ } //if
if (!$site = get_site()) {
redirect("index.php");
$admin_table->cellspacing = 0;
$admin_table->width = '500';
- $admin_table->data[] = array('<strong>Data directory</strong>', '<em><strong>'.$index_path.'</strong></em>');
- $admin_table->data[] = array('Files in index directory', $index_filecount);
- $admin_table->data[] = array('Total size', $index_size);
+ $admin_table->data[] = array('<strong>Data directory</strong>', '<em><strong>'.$indexinfo->path.'</strong></em>');
+ $admin_table->data[] = array('Files in index directory', $indexinfo->filecount);
+ $admin_table->data[] = array('Total size', $indexinfo->size);
+
+ if ($indexinfo->time > 0) {
+ $admin_table->data[] = array('Created on', date('r', $indexinfo->time));
+ } else {
+ $admin_table->data[] = array('Created on', '-');
+ } //else
+
+ if (!$indexinfo->valid($errors)) {
+ $admin_table->data[] = array('<strong>Errors</strong>', ' ');
- if ($index_filecount == 0 or !$db_exists) {
- $admin_table->data[] = array('Click to create index', "<a href='indexersplash.php'>Indexer</a>");
+ foreach ($errors as $key=>$value) {
+ $admin_table->data[] = array($key.' ... ', $value);
+ } //foreach
+
+ $admin_table->data[] = array('<strong>Solutions</strong>', ' ');
+
+ if (isset($errors['dir'])) {
+ $admin_table->data[] = array('Check dir', 'Ensure the data directory exists and is writable.');
+ } //if
+
+ if (isset($errors['db'])) {
+ $admin_table->data[] = array('Check DB', 'Check your database for any problems.');
+ } //if
+
+ $admin_table->data[] = array('Run indexer test', '<a href=\'tests/index.php\'>tests/index.php</a>');
+ $admin_table->data[] = array('Run indexer', '<a href=\'indexersplash.php\'>indexersplash.php</a>');
} //if
} //if
$table->cellspacing = 0;
$table->width = '500';
- $table->data[] = array('<strong>Database</strong>', '<em><strong>search_documents<strong></em>');
- foreach($type_counts as $key => $value) {
- $table->data[] = array($key, $value);
+ $table->data[] = array('<strong>Database</strong>', '<em><strong>search_documents<strong></em>');
+
+ //add an extra field if we're admin
+ if (isadmin()) {
+ //don't want to confuse users if the two totals don't match (hint: they should)
+ $table->data[] = array('Documents in index', $indexinfo->indexcount);
+ } //if
+
+ $table->data[] = array('Documents in database', $indexinfo->dbcount);
+
+ foreach($indexinfo->types as $key => $value) {
+ $table->data[] = array("'$key' documents", $value);
} //foreach
if (isadmin()) {
if (file_exists($class_file)) {
include_once($class_file);
- if (!defined('SEARCH_'.strtoupper($mod->name).'_TYPE')) {
- mtrace("ERROR: Constant 'SEARCH_".strtoupper($mod->name)."_TYPE' is not defined in /search/lib.php");
+ if (!defined('SEARCH_TYPE_'.strtoupper($mod->name))) {
+ mtrace("ERROR: Constant 'SEARCH_TYPE_".strtoupper($mod->name)."' is not defined in /search/lib.php");
continue;
} //if