//== ==
//=====================================================================================
+ /// This is the class used to split, in first instance, the monolithic moodle.xml into
+ /// smaller xml files allowing the MoodleParser later to process only the required info
+ /// based in each TODO, instead of processing the whole xml for each TODO. In theory
+ /// processing time can be reduced upto 1/20th of original time (depending of the
+ /// number of TODOs in the original moodle.xml file)
+ ///
+ /// Anyway, note it's a general splitter parser, and only needs to be instantiated
+ /// with the proper destination dir and the tosplit configuration. Be careful when
+ /// using it because it doesn't support XML attributes nor real cdata out from tags.
+ /// (both not used in the target Moodle backup files)
+
+ class moodle_splitter_parser {
+ var $level = 0; /// Level we are
+ var $tree = array(); /// Array of levels we are
+ var $cdata = ''; /// Raw storage for character data
+ var $content = ''; /// Content buffer to be printed to file
+ var $trailing= ''; /// Content of the trailing tree for each splited file
+ var $savepath = null; /// Path to store splited files
+ var $fhandler = null; /// Current file we are writing to
+ var $tosplit = array(); /// Array defining the files we want to split, in this format:
+ /// array( level/tag/level/tag => filename)
+ var $splitwords = array(); /// Denormalised array containing the potential tags
+ /// being a split point. To speed up check_split_point()
+ var $maxsplitlevel = 0; /// Precalculated max level where any split happens. To speed up check_split_point()
+ var $buffersize = 65536; /// 64KB is a good write buffer. Don't expect big benefits by increasing this.
+ var $repectformat = false; /// With this setting enabled, the splited files will look like the original one
+ /// with all the indentations 100% copied from original (character data outer tags).
+ /// But this is a waste of time from our perspective, and splited xml files are completely
+ /// functional without that, so we disable this for production, generating a more compact
+ /// XML quicker
+
+ /// PHP4 constructor
+ function moodle_splitter_parser($savepath, $tosplit = null) {
+ return $this->__construct($savepath, $tosplit);
+ }
+
+ /// PHP5 constructor
+ function __construct($savepath, $tosplit = null) {
+ $this->savepath = $savepath;
+ if (!empty($tosplit)) {
+ $this->tosplit = $tosplit;
+ } else { /// No tosplit list passed, process all the possible parts in one moodle.xml file
+ $this->tosplit = array(
+ '1/MOODLE_BACKUP/2/INFO' => 'split_info.xml',
+ '1/MOODLE_BACKUP/2/ROLES' => 'split_roles.xml',
+ '2/COURSE/3/HEADER' => 'split_course_header.xml',
+ '2/COURSE/3/BLOCKS' => 'split_blocks.xml',
+ '2/COURSE/3/SECTIONS' => 'split_sections.xml',
+ '2/COURSE/3/FORMATDATA' => 'split_formatdata.xml',
+ '2/COURSE/3/METACOURSE' => 'split_metacourse.xml',
+ '2/COURSE/3/GRADEBOOK' => 'split_gradebook.xml',
+ '2/COURSE/3/USERS' => 'split_users.xml',
+ '2/COURSE/3/MESSAGES' => 'split_messages.xml',
+ '2/COURSE/3/BLOGS' => 'split_blogs.xml',
+ '2/COURSE/3/QUESTION_CATEGORIES'=> 'split_questions.xml',
+ '2/COURSE/3/SCALES' => 'split_scales.xml',
+ '2/COURSE/3/GROUPS' => 'split_groups.xml',
+ '2/COURSE/3/GROUPINGS' => 'split_groupings.xml',
+ '2/COURSE/3/GROUPINGSGROUPS' => 'split_groupingsgroups.xml',
+ '2/COURSE/3/EVENTS' => 'split_events.xml',
+ '2/COURSE/3/MODULES' => 'split_modules.xml',
+ '2/COURSE/3/LOGS' => 'split_logs.xml'
+ );
+ }
+ /// Precalculate some info used to speedup checks
+ foreach ($this->tosplit as $key=>$value) {
+ $this->splitwords[basename($key)] = true;
+ if (((int) basename(dirname($key))) > $this->maxsplitlevel) {
+ $this->maxsplitlevel = (int) basename(dirname($key));
+ }
+ }
+ }
+
+ /// Given one tag being opened, check if it's one split point.
+ /// Return false or split filename
+ function check_split_point($tag) {
+ /// Quick check. Level < 2 cannot be a split point
+ if ($this->level < 2) {
+ return false;
+ }
+ /// Quick check. Current tag against potential splitwords
+ if (!isset($this->splitwords[$tag])) {
+ return false;
+ }
+ /// Prev test passed, take a look to 2-level tosplit
+ $keytocheck = ($this->level - 1) . '/' . $this->tree[$this->level - 1] . '/' . $this->level . '/' . $this->tree[$this->level];
+ if (!isset($this->tosplit[$keytocheck])) {
+ return false;
+ }
+ /// Prev test passed, we are in a split point, return new filename
+ return $this->tosplit[$keytocheck];
+ }
+
+ /// To append data (xml-escaped) to contents buffer
+ function character_data($parser, $data) {
+
+ ///$this->content .= preg_replace($this->entity_find, $this->entity_replace, $data); ///40% slower
+ ///$this->content .= str_replace($this->entity_find, $this->entity_replace, $data); ///25% slower
+ ///$this->content .= htmlspecialchars($data); ///the best
+ /// Instead of htmlspecialchars() each chunk of character data, we are going to
+ /// concat it without transformation and will apply the htmlspecialchars() when
+ /// that character data is, efectively, going to be added to contents buffer. This
+ /// makes the number of transformations to be reduced (speedup) and avoid potential
+ /// problems with transformations being applied "in the middle" of multibyte chars.
+ $this->cdata .= $data;
+ }
+
+ /// To detect start of tags, keeping level, tree and fhandle updated.
+ /// Also handles creation of split files
+ function start_tag($parser, $tag, $attrs) {
+
+ /// Update things before processing
+ $this->level++;
+ $this->tree[$this->level] = $tag;
+
+ /// Check if we need to start a new split file,
+ /// Speedup: we only do that if we haven't a fhandler and if level <= $maxsplitlevel
+ if ($this->level <= $this->maxsplitlevel && !$this->fhandler && $newfilename = $this->check_split_point($tag)) {
+ /// Open new file handler, init everything
+ $this->fhandler = fopen($this->savepath . '/' . $newfilename, 'w');
+ $this->content = '';
+ $this->cdata = '';
+ $this->trailing = '';
+ /// Build the original leading tree (and calculate the original trailing one)
+ for ($l = 1; $l < $this->level; $l++) {
+ $this->content .= "<{$this->tree[$l]}>\n";
+ $this->trailing = "\n</{$this->tree[$l]}>" . $this->trailing;
+ }
+ }
+ /// Perform xml-entities transformation and add to contents buffer together with opening tag.
+ /// Speedup. We lose nice formatting of the split XML but avoid 50% of transformations and XML is 100% equivalent
+ $this->content .= ($this->repectformat ? htmlspecialchars($this->cdata) : '') . "<$tag>";
+ $this->cdata = '';
+ }
+
+ /// To detect end of tags, keeping level, tree and fhandle updated, writting contents buffer to split file.
+ /// Also handles closing of split files
+ function end_tag($parser, $tag) {
+
+ /// Perform xml-entities transformation and add to contents buffer together with closing tag, repecting (or no) format
+ $this->content .= ($this->repectformat ? htmlspecialchars($this->cdata) : htmlspecialchars(trim($this->cdata))) . "</$tag>";
+ $this->cdata = '';
+
+ /// Check if we need to close current split file
+ /// Speedup: we only do that if we have a fhandler and if level <= $maxsplitlevel
+ if ($this->level <= $this->maxsplitlevel && $this->fhandler && $newfilename = $this->check_split_point($tag)) {
+ /// Write pending contents buffer before closing. It's a must
+ fwrite($this->fhandler, $this->content);
+ $this->content = "";
+ /// Write the original trailing tree for fhandler
+ fwrite($this->fhandler, $this->trailing);
+ fclose($this->fhandler);
+ $this->fhandler = null;
+ } else {
+ /// Normal write of contents (use one buffer to improve speed)
+ if ($this->fhandler && strlen($this->content) > $this->buffersize) {
+ fwrite($this->fhandler, $this->content);
+ $this->content = "";
+ }
+ }
+
+ /// Update things after processing
+ $this->tree[$this->level] = "";
+ $this->level--;
+
+ }
+ }
+
+ /// This function executes the moodle_splitter_parser, causing the monolithic moodle.xml
+ /// file to be splitted in n smaller files for better treatament by the MoodleParser in restore_read_xml()
+ function restore_split_xml ($xml_file, $preferences) {
+
+ $status = true;
+
+ $xml_parser = xml_parser_create('UTF-8');
+ $split_parser = new moodle_splitter_parser(dirname($xml_file));
+ xml_set_object($xml_parser,$split_parser);
+ xml_set_element_handler($xml_parser, 'start_tag', 'end_tag');
+ xml_set_character_data_handler($xml_parser, 'character_data');
+
+ $doteach = filesize($xml_file) / 20;
+ $fromdot = 0;
+
+ $fp = fopen($xml_file,"r")
+ or $status = false;
+ if ($status) {
+ $lasttime = time();
+ while ($data = fread($fp, 8192)) {
+ if (!defined('RESTORE_SILENTLY')) {
+ $fromdot += 8192;
+ if ($fromdot > $doteach) {
+ echo ".";
+ backup_flush(300);
+ $fromdot = 0;
+ }
+ if ((time() - $lasttime) > 10) {
+ $lasttime = time();
+ backup_flush(300);
+ }
+ }
+ xml_parse($xml_parser, $data, feof($fp))
+ or die(sprintf("XML error: %s at line %d",
+ xml_error_string(xml_get_error_code($xml_parser)),
+ xml_get_current_line_number($xml_parser)));
+ }
+ fclose($fp);
+ }
+ xml_parser_free($xml_parser);
+ return $status;
+ }
+
//This is the class used to do all the xml parse
class MoodleParser {
}
function endElementRoles($parser, $tagName) {
- //Check if we are into INFO zone
+ //Check if we are into ROLES zone
if ($this->tree[2] == "ROLES") {
if ($this->tree[3] == "ROLE") {
}
}
- //Stop parsing if todo = INFO and tagName = INFO (en of the tag, of course)
+ //Stop parsing if todo = ROLES and tagName = ROLES (en of the tag, of course)
//Speed up a lot (avoid parse all)
if ($tagName == "ROLES") {
$this->finished = true;
//This function executes the MoodleParser
function restore_read_xml ($xml_file,$todo,$preferences) {
+ global $CFG;
+
$status = true;
+ /// If enabled in the site, use split files instead of original moodle.xml file
+ /// This will speed parsing speed upto 20x.
+ if (!empty($CFG->experimentalsplitrestore)) {
+ /// Use splite file, else nothing to process (saves one full parsing for each non-existing todo)
+ $splitfile= dirname($xml_file) . '/' . strtolower('split_' . $todo . '.xml');
+ if (file_exists($splitfile)) {
+ $xml_file = $splitfile;
+ debugging("Info: todo=$todo, using split file", DEBUG_DEVELOPER);
+ } else {
+ /// For some todos, that are used in earlier restore steps (restore_precheck(), restore_form...
+ /// allow fallback to monolithic moodle.xml. Those todos are at the beggining of the xml, so
+ /// it doesn't hurts too much.
+ if ($todo == 'INFO' || $todo == 'COURSE_HEADER' || $todo == 'ROLES') {
+ debugging("Info: todo=$todo, no split file. Fallback to moodle.xml", DEBUG_DEVELOPER);
+ } else {
+ debugging("Info: todo=$todo, no split file. Parse skipped", DEBUG_DEVELOPER);
+ return true;
+ }
+ }
+ }
+
$xml_parser = xml_parser_create('UTF-8');
$moodle_parser = new MoodleParser();
$moodle_parser->todo = $todo;
if ($status) {
// MDL-9290 performance improvement on reading large xml
$lasttime = time(); // crmas
- while ($data = fread($fp, 4096) and !$moodle_parser->finished) {
+ while ($data = fread($fp, 8192) and !$moodle_parser->finished) {
if ((time() - $lasttime) > 5) {
$lasttime = time();
echo "<ul>";
}
- //Localtion of the xml file
+ //Location of the xml file
$xml_file = $CFG->dataroot."/temp/backup/".$restore->backup_unique_code."/moodle.xml";
+ //Preprocess the moodle.xml file spliting into smaller chucks (modules, users, logs...)
+ //for optimal parsing later in the restore process.
+ if (!empty($CFG->experimentalsplitrestore)) {
+ if (!defined('RESTORE_SILENTLY')) {
+ echo '<li>'.get_string('preprocessingbackupfile') . '</li>';
+ }
+ //First of all, split moodle.xml into handy files
+ if (!restore_split_xml ($xml_file, $restore)) {
+ if (!defined('RESTORE_SILENTLY')) {
+ notify("Error proccessing moodle.xml file. Process ended.");
+ } else {
+ $errorstr = "Error proccessing moodle.xml file. Process ended.";
+ }
+ return false;
+ }
+ }
+
//If we've selected to restore into new course
//create it (course)
//Saving conversion id variables into backup_tables