From: skodak Date: Mon, 24 Dec 2007 21:08:39 +0000 (+0000) Subject: MDL-12724 import of html purifier 2.1.3; merged from MOODLE_19_STABLE X-Git-Url: http://git.mjollnir.org/gw?a=commitdiff_plain;h=ba7d966bae7240247d71bdb435440459b79a7abb;p=moodle.git MDL-12724 import of html purifier 2.1.3; merged from MOODLE_19_STABLE --- diff --git a/lib/htmlpurifier/HTMLPurifier.php b/lib/htmlpurifier/HTMLPurifier.php index 677c1e3995..e9dfe5f404 100644 --- a/lib/htmlpurifier/HTMLPurifier.php +++ b/lib/htmlpurifier/HTMLPurifier.php @@ -22,8 +22,8 @@ */ /* - HTML Purifier 2.1.2 - Standards Compliant HTML Filtering - Copyright (C) 2006 Edward Z. Yang + HTML Purifier 2.1.3 - Standards Compliant HTML Filtering + Copyright (C) 2006-2007 Edward Z. Yang This library is free software; you can redistribute it and/or modify it under the terms of the GNU Lesser General Public @@ -43,9 +43,8 @@ // constants are slow, but we'll make one exception define('HTMLPURIFIER_PREFIX', dirname(__FILE__)); -// almost every class has an undocumented dependency to these, so make sure -// they get included -require_once 'HTMLPurifier/ConfigSchema.php'; // important +// every class has an undocumented dependency to these, must be included! +require_once 'HTMLPurifier/ConfigSchema.php'; // fatal errors if not included require_once 'HTMLPurifier/Config.php'; require_once 'HTMLPurifier/Context.php'; @@ -60,16 +59,23 @@ require_once 'HTMLPurifier/LanguageFactory.php'; HTMLPurifier_ConfigSchema::define( 'Core', 'CollectErrors', false, 'bool', ' Whether or not to collect errors found while filtering the document. This -is a useful way to give feedback to your users. CURRENTLY NOT IMPLEMENTED. -This directive has been available since 2.0.0. +is a useful way to give feedback to your users. Warning: +Currently this feature is very patchy and experimental, with lots of +possible error messages not yet implemented. It will not cause any problems, +but it may not help your users either. This directive has been available +since 2.0.0. '); /** - * Main library execution class. + * Facade that coordinates HTML Purifier's subsystems in order to purify HTML. * - * Facade that performs calls to the HTMLPurifier_Lexer, - * HTMLPurifier_Strategy and HTMLPurifier_Generator subsystems in order to - * purify HTML. + * @note There are several points in which configuration can be specified + * for HTML Purifier. The precedence of these (from lowest to + * highest) is as follows: + * -# Instance: new HTMLPurifier($config) + * -# Invocation: purify($html, $config) + * These configurations are entirely independent of each other and + * are *not* merged. * * @todo We need an easier way to inject strategies, it'll probably end * up getting done through config though. @@ -77,15 +83,16 @@ This directive has been available since 2.0.0. class HTMLPurifier { - var $version = '2.1.2'; + var $version = '2.1.3'; var $config; - var $filters; + var $filters = array(); var $strategy, $generator; /** - * Final HTMLPurifier_Context of last run purification. Might be an array. + * Resultant HTMLPurifier_Context of last run purification. Is an array + * of contexts if the last called method was purifyArray(). * @public */ var $context; @@ -150,6 +157,11 @@ class HTMLPurifier $context->register('ErrorCollector', $error_collector); } + // setup id_accumulator context, necessary due to the fact that + // AttrValidator can be called from many places + $id_accumulator = HTMLPurifier_IDAccumulator::build($config, $context); + $context->register('IDAccumulator', $id_accumulator); + $html = HTMLPurifier_Encoder::convertToUTF8($html, $config, $context); for ($i = 0, $size = count($this->filters); $i < $size; $i++) { @@ -198,6 +210,8 @@ class HTMLPurifier /** * Singleton for enforcing just one HTML Purifier in your system + * @param $prototype Optional prototype HTMLPurifier instance to + * overload singleton with. */ function &getInstance($prototype = null) { static $htmlpurifier; diff --git a/lib/htmlpurifier/HTMLPurifier/AttrDef/URI.php b/lib/htmlpurifier/HTMLPurifier/AttrDef/URI.php index 365748c037..0e9a5f4739 100644 --- a/lib/htmlpurifier/HTMLPurifier/AttrDef/URI.php +++ b/lib/htmlpurifier/HTMLPurifier/AttrDef/URI.php @@ -102,7 +102,7 @@ class HTMLPurifier_AttrDef_URI extends HTMLPurifier_AttrDef $result = $uri->validate($config, $context); if (!$result) break; - // chained validation + // chained filtering $uri_def =& $config->getDefinition('URI'); $result = $uri_def->filter($uri, $config, $context); if (!$result) break; diff --git a/lib/htmlpurifier/HTMLPurifier/AttrDef/URI/Email.php b/lib/htmlpurifier/HTMLPurifier/AttrDef/URI/Email.php index aaec099a6e..ababd9eae0 100644 --- a/lib/htmlpurifier/HTMLPurifier/AttrDef/URI/Email.php +++ b/lib/htmlpurifier/HTMLPurifier/AttrDef/URI/Email.php @@ -1,7 +1,6 @@ getHTMLDefinition(); $e =& $context->get('ErrorCollector', true); + // initialize IDAccumulator if necessary + $ok =& $context->get('IDAccumulator', true); + if (!$ok) { + $id_accumulator = HTMLPurifier_IDAccumulator::build($config, $context); + $context->register('IDAccumulator', $id_accumulator); + } + // initialize CurrentToken if necessary $current_token =& $context->get('CurrentToken', true); if (!$current_token) $context->register('CurrentToken', $token); diff --git a/lib/htmlpurifier/HTMLPurifier/ChildDef/Optional.php b/lib/htmlpurifier/HTMLPurifier/ChildDef/Optional.php index 779a7f06b9..e9f14edf7d 100644 --- a/lib/htmlpurifier/HTMLPurifier/ChildDef/Optional.php +++ b/lib/htmlpurifier/HTMLPurifier/ChildDef/Optional.php @@ -15,7 +15,10 @@ class HTMLPurifier_ChildDef_Optional extends HTMLPurifier_ChildDef_Required var $type = 'optional'; function validateChildren($tokens_of_children, $config, &$context) { $result = parent::validateChildren($tokens_of_children, $config, $context); - if ($result === false) return array(); + if ($result === false) { + if (empty($tokens_of_children)) return true; + else return array(); + } return $result; } } diff --git a/lib/htmlpurifier/HTMLPurifier/Config.php b/lib/htmlpurifier/HTMLPurifier/Config.php index e04a4b0cc5..203542f0aa 100644 --- a/lib/htmlpurifier/HTMLPurifier/Config.php +++ b/lib/htmlpurifier/HTMLPurifier/Config.php @@ -42,7 +42,7 @@ class HTMLPurifier_Config /** * HTML Purifier's version */ - var $version = '2.1.2'; + var $version = '2.1.3'; /** * Two-level associative array of configuration directives diff --git a/lib/htmlpurifier/HTMLPurifier/HTMLDefinition.php b/lib/htmlpurifier/HTMLPurifier/HTMLDefinition.php index fe6bd14187..e13e0c62b0 100644 --- a/lib/htmlpurifier/HTMLPurifier/HTMLDefinition.php +++ b/lib/htmlpurifier/HTMLPurifier/HTMLDefinition.php @@ -236,13 +236,26 @@ class HTMLPurifier_HTMLDefinition extends HTMLPurifier_Definition /** * Adds a custom element to your HTML definition * @note See HTMLPurifier_HTMLModule::addElement for detailed - * parameter descriptions. + * parameter and return value descriptions. */ - function addElement($element_name, $type, $contents, $attr_collections, $attributes) { + function &addElement($element_name, $type, $contents, $attr_collections, $attributes) { $module =& $this->getAnonymousModule(); // assume that if the user is calling this, the element // is safe. This may not be a good idea - $module->addElement($element_name, true, $type, $contents, $attr_collections, $attributes); + $element =& $module->addElement($element_name, true, $type, $contents, $attr_collections, $attributes); + return $element; + } + + /** + * Adds a blank element to your HTML definition, for overriding + * existing behavior + * @note See HTMLPurifier_HTMLModule::addBlankElement for detailed + * parameter and return value descriptions. + */ + function &addBlankElement($element_name) { + $module =& $this->getAnonymousModule(); + $element =& $module->addBlankElement($element_name); + return $element; } /** diff --git a/lib/htmlpurifier/HTMLPurifier/HTMLModule/Tidy/XHTMLAndHTML4.php b/lib/htmlpurifier/HTMLPurifier/HTMLModule/Tidy/XHTMLAndHTML4.php index 386cf365a2..dcf306a019 100644 --- a/lib/htmlpurifier/HTMLPurifier/HTMLModule/Tidy/XHTMLAndHTML4.php +++ b/lib/htmlpurifier/HTMLPurifier/HTMLModule/Tidy/XHTMLAndHTML4.php @@ -13,6 +13,8 @@ require_once 'HTMLPurifier/AttrTransform/Length.php'; require_once 'HTMLPurifier/AttrTransform/ImgSpace.php'; require_once 'HTMLPurifier/AttrTransform/EnumToCSS.php'; +require_once 'HTMLPurifier/ChildDef/StrictBlockquote.php'; + class HTMLPurifier_HTMLModule_Tidy_XHTMLAndHTML4 extends HTMLPurifier_HTMLModule_Tidy { @@ -188,5 +190,17 @@ class HTMLPurifier_HTMLModule_Tidy_Strict extends { var $name = 'Tidy_Strict'; var $defaultLevel = 'light'; + + function makeFixes() { + $r = parent::makeFixes(); + $r['blockquote#content_model_type'] = 'strictblockquote'; + return $r; + } + + var $defines_child_def = true; + function getChildDef($def) { + if ($def->content_model_type != 'strictblockquote') return parent::getChildDef($def); + return new HTMLPurifier_ChildDef_StrictBlockquote($def->content_model); + } } diff --git a/lib/htmlpurifier/HTMLPurifier/HTMLModuleManager.php b/lib/htmlpurifier/HTMLPurifier/HTMLModuleManager.php index 74a233ff2f..3fc8616020 100644 --- a/lib/htmlpurifier/HTMLPurifier/HTMLModuleManager.php +++ b/lib/htmlpurifier/HTMLPurifier/HTMLModuleManager.php @@ -35,7 +35,6 @@ require_once 'HTMLPurifier/HTMLModule/Object.php'; require_once 'HTMLPurifier/HTMLModule/Tidy.php'; require_once 'HTMLPurifier/HTMLModule/Tidy/XHTMLAndHTML4.php'; require_once 'HTMLPurifier/HTMLModule/Tidy/XHTML.php'; -require_once 'HTMLPurifier/HTMLModule/Tidy/XHTMLStrict.php'; require_once 'HTMLPurifier/HTMLModule/Tidy/Proprietary.php'; HTMLPurifier_ConfigSchema::define( @@ -209,7 +208,7 @@ class HTMLPurifier_HTMLModuleManager $this->doctypes->register( 'XHTML 1.0 Strict', true, array_merge($common, $xml, $non_xml), - array('Tidy_Strict', 'Tidy_XHTML', 'Tidy_XHTMLStrict', 'Tidy_Proprietary'), + array('Tidy_Strict', 'Tidy_XHTML', 'Tidy_Strict', 'Tidy_Proprietary'), array(), '-//W3C//DTD XHTML 1.0 Strict//EN', 'http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd' @@ -218,7 +217,7 @@ class HTMLPurifier_HTMLModuleManager $this->doctypes->register( 'XHTML 1.1', true, array_merge($common, $xml, array('Ruby')), - array('Tidy_Strict', 'Tidy_XHTML', 'Tidy_Proprietary', 'Tidy_XHTMLStrict'), // Tidy_XHTML1_1 + array('Tidy_Strict', 'Tidy_XHTML', 'Tidy_Proprietary', 'Tidy_Strict'), // Tidy_XHTML1_1 array(), '-//W3C//DTD XHTML 1.1//EN', 'http://www.w3.org/TR/xhtml11/DTD/xhtml11.dtd' diff --git a/lib/htmlpurifier/HTMLPurifier/IDAccumulator.php b/lib/htmlpurifier/HTMLPurifier/IDAccumulator.php index 525c9aa080..60715afc1e 100644 --- a/lib/htmlpurifier/HTMLPurifier/IDAccumulator.php +++ b/lib/htmlpurifier/HTMLPurifier/IDAccumulator.php @@ -1,11 +1,15 @@ load($config->get('Attr', 'IDBlacklist')); + return $id_accumulator; + } + /** * Add an ID to the lookup table. * @param $id ID to be added. diff --git a/lib/htmlpurifier/HTMLPurifier/Injector.php b/lib/htmlpurifier/HTMLPurifier/Injector.php index 5901716387..3b84709767 100644 --- a/lib/htmlpurifier/HTMLPurifier/Injector.php +++ b/lib/htmlpurifier/HTMLPurifier/Injector.php @@ -4,6 +4,9 @@ * Injects tokens into the document while parsing for well-formedness. * This enables "formatter-like" functionality such as auto-paragraphing, * smiley-ification and linkification to take place. + * + * @todo Allow injectors to request a re-run on their output. This + * would help if an operation is recursive. */ class HTMLPurifier_Injector { @@ -107,5 +110,12 @@ class HTMLPurifier_Injector */ function handleElement(&$token) {} + /** + * Notifier that is called when an end token is processed + * @note This differs from handlers in that the token is read-only + */ + function notifyEnd($token) {} + + } diff --git a/lib/htmlpurifier/HTMLPurifier/Injector/AutoParagraph.php b/lib/htmlpurifier/HTMLPurifier/Injector/AutoParagraph.php index 6e0a6a3ed5..56a6a26878 100644 --- a/lib/htmlpurifier/HTMLPurifier/Injector/AutoParagraph.php +++ b/lib/htmlpurifier/HTMLPurifier/Injector/AutoParagraph.php @@ -6,20 +6,28 @@ HTMLPurifier_ConfigSchema::define( 'AutoFormat', 'AutoParagraph', false, 'bool', '

This directive turns on auto-paragraphing, where double newlines are - converted in to paragraphs whenever possible. Auto-paragraphing - applies when: + converted in to paragraphs whenever possible. Auto-paragraphing:

p tags must be allowed for this directive to take effect. We do not use br tags for paragraphing, as that is semantically incorrect.

+

+ To prevent auto-paragraphing as a content-producer, refrain from using + double-newlines except to specify a new paragraph or in contexts where + it has special meaning (whitespace usually has no meaning except in + tags like pre, so this should not be difficult.) To prevent + the paragraphing of inline text adjacent to block elements, wrap them + in div tags (the behavior is slightly different outside of + the root node.) +

This directive has been available since 2.0.1.

@@ -62,19 +70,27 @@ class HTMLPurifier_Injector_AutoParagraph extends HTMLPurifier_Injector $ok = false; // test if up-coming tokens are either block or have // a double newline in them + $nesting = 0; for ($i = $this->inputIndex + 1; isset($this->inputTokens[$i]); $i++) { if ($this->inputTokens[$i]->type == 'start'){ if (!$this->_isInline($this->inputTokens[$i])) { - $ok = true; + // we haven't found a double-newline, and + // we've hit a block element, so don't paragraph + $ok = false; + break; } - break; + $nesting++; + } + if ($this->inputTokens[$i]->type == 'end') { + if ($nesting <= 0) break; + $nesting--; } - if ($this->inputTokens[$i]->type == 'end') break; if ($this->inputTokens[$i]->type == 'text') { + // found it! if (strpos($this->inputTokens[$i]->data, "\n\n") !== false) { $ok = true; + break; } - if (!$this->inputTokens[$i]->is_whitespace) break; } } if ($ok) { diff --git a/lib/htmlpurifier/HTMLPurifier/Lexer.php b/lib/htmlpurifier/HTMLPurifier/Lexer.php index 78abebd07b..22ef1d6dd0 100644 --- a/lib/htmlpurifier/HTMLPurifier/Lexer.php +++ b/lib/htmlpurifier/HTMLPurifier/Lexer.php @@ -13,11 +13,14 @@ if (version_compare(PHP_VERSION, "5", ">=")) { } HTMLPurifier_ConfigSchema::define( - 'Core', 'AcceptFullDocuments', true, 'bool', - 'This parameter determines whether or not the filter should accept full '. - 'HTML documents, not just HTML fragments. When on, it will '. - 'drop all sections except the content between body.' -); + 'Core', 'ConvertDocumentToFragment', true, 'bool', ' +This parameter determines whether or not the filter should convert +input that is a full document with html and body tags to a fragment +of just the contents of a body tag. This parameter is simply something +HTML Purifier can do during an edge-case: for most inputs, this +processing is not necessary. +'); +HTMLPurifier_ConfigSchema::defineAlias('Core', 'AcceptFullDocuments', 'Core', 'ConvertDocumentToFragment'); HTMLPurifier_ConfigSchema::define( 'Core', 'LexerImpl', null, 'mixed/null', ' @@ -316,7 +319,7 @@ class HTMLPurifier_Lexer function normalize($html, $config, &$context) { // extract body from document if applicable - if ($config->get('Core', 'AcceptFullDocuments')) { + if ($config->get('Core', 'ConvertDocumentToFragment')) { $html = $this->extractBody($html); } diff --git a/lib/htmlpurifier/HTMLPurifier/Lexer/DirectLex.php b/lib/htmlpurifier/HTMLPurifier/Lexer/DirectLex.php index b3639916d1..86c0a2112b 100644 --- a/lib/htmlpurifier/HTMLPurifier/Lexer/DirectLex.php +++ b/lib/htmlpurifier/HTMLPurifier/Lexer/DirectLex.php @@ -160,9 +160,15 @@ class HTMLPurifier_Lexer_DirectLex extends HTMLPurifier_Lexer $segment = substr($html, $cursor, $strlen_segment); + if ($segment === false) { + // somehow, we attempted to access beyond the end of + // the string, defense-in-depth, reported by Nate Abele + break; + } + // Check if it's a comment if ( - substr($segment, 0, 3) == '!--' + substr($segment, 0, 3) === '!--' ) { // re-determine segment length, looking for --> $position_comment_end = strpos($html, '-->', $cursor); @@ -237,7 +243,7 @@ class HTMLPurifier_Lexer_DirectLex extends HTMLPurifier_Lexer // trailing slash. Remember, we could have a tag like
, so // any later token processing scripts must convert improperly // classified EmptyTags from StartTags. - $is_self_closing= (strrpos($segment,'/') === $strlen_segment-1); + $is_self_closing = (strrpos($segment,'/') === $strlen_segment-1); if ($is_self_closing) { $strlen_segment--; $segment = substr($segment, 0, $strlen_segment); diff --git a/lib/htmlpurifier/HTMLPurifier/Lexer/PH5P.php b/lib/htmlpurifier/HTMLPurifier/Lexer/PH5P.php index 5720c33a96..b676237914 100644 --- a/lib/htmlpurifier/HTMLPurifier/Lexer/PH5P.php +++ b/lib/htmlpurifier/HTMLPurifier/Lexer/PH5P.php @@ -26,8 +26,6 @@ class HTMLPurifier_Lexer_PH5P extends HTMLPurifier_Lexer_DOMLex { } -// begin PHP5P source code here - /* Copyright 2007 Jeroen van der Meer @@ -3722,7 +3720,7 @@ class HTML5TreeConstructer { } } - private function generateImpliedEndTags(array $exclude = array()) { + private function generateImpliedEndTags($exclude = array()) { /* When the steps below require the UA to generate implied end tags, then, if the current node is a dd element, a dt element, an li element, a p element, a td element, a th element, or a tr element, the UA must @@ -3736,7 +3734,8 @@ class HTML5TreeConstructer { } } - private function getElementCategory($name) { + private function getElementCategory($node) { + $name = $node->tagName; if(in_array($name, $this->special)) return self::SPECIAL; @@ -3884,3 +3883,4 @@ class HTML5TreeConstructer { return $this->dom; } } +?> diff --git a/lib/htmlpurifier/HTMLPurifier/Strategy/FixNesting.php b/lib/htmlpurifier/HTMLPurifier/Strategy/FixNesting.php index 51a14a78f4..25e9f8acbc 100644 --- a/lib/htmlpurifier/HTMLPurifier/Strategy/FixNesting.php +++ b/lib/htmlpurifier/HTMLPurifier/Strategy/FixNesting.php @@ -195,7 +195,7 @@ class HTMLPurifier_Strategy_FixNesting extends HTMLPurifier_Strategy //################################################################// // Process result by interpreting $result - if ($result === true) { + if ($result === true || $child_tokens === $result) { // leave the node as is // register start token as a parental node start diff --git a/lib/htmlpurifier/HTMLPurifier/Strategy/MakeWellFormed.php b/lib/htmlpurifier/HTMLPurifier/Strategy/MakeWellFormed.php index b3e8aa7453..4b6f498f67 100644 --- a/lib/htmlpurifier/HTMLPurifier/Strategy/MakeWellFormed.php +++ b/lib/htmlpurifier/HTMLPurifier/Strategy/MakeWellFormed.php @@ -36,27 +36,22 @@ class HTMLPurifier_Strategy_MakeWellFormed extends HTMLPurifier_Strategy $definition = $config->getHTMLDefinition(); - // CurrentNesting + // local variables + $result = array(); + $generator = new HTMLPurifier_Generator(); + $escape_invalid_tags = $config->get('Core', 'EscapeInvalidTags'); + $e =& $context->get('ErrorCollector', true); + + // member variables $this->currentNesting = array(); - $context->register('CurrentNesting', $this->currentNesting); + $this->inputIndex = false; + $this->inputTokens =& $tokens; + $this->outputTokens =& $result; - // InputIndex - $this->inputIndex = false; + // context variables + $context->register('CurrentNesting', $this->currentNesting); $context->register('InputIndex', $this->inputIndex); - - // InputTokens $context->register('InputTokens', $tokens); - $this->inputTokens =& $tokens; - - // OutputTokens - $result = array(); - $this->outputTokens =& $result; - - // %Core.EscapeInvalidTags - $escape_invalid_tags = $config->get('Core', 'EscapeInvalidTags'); - $generator = new HTMLPurifier_Generator(); - - $e =& $context->get('ErrorCollector', true); // -- begin INJECTOR -- @@ -95,6 +90,10 @@ class HTMLPurifier_Strategy_MakeWellFormed extends HTMLPurifier_Strategy trigger_error("Cannot enable $name injector because $error is not allowed", E_USER_WARNING); } + // warning: most foreach loops follow the convention $i => $x. + // be sure, for PHP4 compatibility, to only perform write operations + // directly referencing the object using $i: $x is only safe for reads + // -- end INJECTOR -- $token = false; @@ -105,6 +104,8 @@ class HTMLPurifier_Strategy_MakeWellFormed extends HTMLPurifier_Strategy // if all goes well, this token will be passed through unharmed $token = $tokens[$this->inputIndex]; + //printTokens($tokens, $this->inputIndex); + foreach ($this->injectors as $i => $x) { if ($x->skip > 0) $this->injectors[$i]->skip--; } @@ -114,7 +115,7 @@ class HTMLPurifier_Strategy_MakeWellFormed extends HTMLPurifier_Strategy if ($token->type === 'text') { // injector handler code; duplicated for performance reasons foreach ($this->injectors as $i => $x) { - if (!$x->skip) $x->handleText($token); + if (!$x->skip) $this->injectors[$i]->handleText($token); if (is_array($token)) { $this->currentInjector = $i; break; @@ -172,7 +173,7 @@ class HTMLPurifier_Strategy_MakeWellFormed extends HTMLPurifier_Strategy // injector handler code; duplicated for performance reasons if ($ok) { foreach ($this->injectors as $i => $x) { - if (!$x->skip) $x->handleElement($token); + if (!$x->skip) $this->injectors[$i]->handleElement($token); if (is_array($token)) { $this->currentInjector = $i; break; @@ -202,6 +203,9 @@ class HTMLPurifier_Strategy_MakeWellFormed extends HTMLPurifier_Strategy $current_parent = array_pop($this->currentNesting); if ($current_parent->name == $token->name) { $result[] = $token; + foreach ($this->injectors as $i => $x) { + $this->injectors[$i]->notifyEnd($token); + } continue; } @@ -238,16 +242,16 @@ class HTMLPurifier_Strategy_MakeWellFormed extends HTMLPurifier_Strategy // okay, we found it, close all the skipped tags // note that skipped tags contains the element we need closed - $size = count($skipped_tags); - for ($i = $size - 1; $i > 0; $i--) { - if ($e && !isset($skipped_tags[$i]->armor['MakeWellFormed_TagClosedError'])) { + for ($i = count($skipped_tags) - 1; $i >= 0; $i--) { + if ($i && $e && !isset($skipped_tags[$i]->armor['MakeWellFormed_TagClosedError'])) { $e->send(E_NOTICE, 'Strategy_MakeWellFormed: Tag closed by element end', $skipped_tags[$i]); } - $result[] = new HTMLPurifier_Token_End($skipped_tags[$i]->name); + $result[] = $new_token = new HTMLPurifier_Token_End($skipped_tags[$i]->name); + foreach ($this->injectors as $j => $x) { // $j, not $i!!! + $this->injectors[$j]->notifyEnd($new_token); + } } - $result[] = new HTMLPurifier_Token_End($skipped_tags[$i]->name); - } $context->destroy('CurrentNesting'); @@ -255,17 +259,18 @@ class HTMLPurifier_Strategy_MakeWellFormed extends HTMLPurifier_Strategy $context->destroy('InputIndex'); $context->destroy('CurrentToken'); - // we're at the end now, fix all still unclosed tags - // not using processToken() because at this point we don't - // care about current nesting + // we're at the end now, fix all still unclosed tags (this is + // duplicated from the end of the loop with some slight modifications) + // not using $skipped_tags since it would invariably be all of them if (!empty($this->currentNesting)) { - $size = count($this->currentNesting); - for ($i = $size - 1; $i >= 0; $i--) { + for ($i = count($this->currentNesting) - 1; $i >= 0; $i--) { if ($e && !isset($this->currentNesting[$i]->armor['MakeWellFormed_TagClosedError'])) { $e->send(E_NOTICE, 'Strategy_MakeWellFormed: Tag closed by document end', $this->currentNesting[$i]); } - $result[] = - new HTMLPurifier_Token_End($this->currentNesting[$i]->name); + $result[] = $new_token = new HTMLPurifier_Token_End($this->currentNesting[$i]->name); + foreach ($this->injectors as $j => $x) { // $j, not $i!!! + $this->injectors[$j]->notifyEnd($new_token); + } } } @@ -286,8 +291,14 @@ class HTMLPurifier_Strategy_MakeWellFormed extends HTMLPurifier_Strategy // adjust the injector skips based on the array substitution if ($this->injectors) { - $offset = count($token) + 1; + $offset = count($token); for ($i = 0; $i <= $this->currentInjector; $i++) { + // because of the skip back, we need to add one more + // for uninitialized injectors. I'm not exactly + // sure why this is the case, but I think it has to + // do with the fact that we're decrementing skips + // before re-checking text + if (!$this->injectors[$i]->skip) $this->injectors[$i]->skip++; $this->injectors[$i]->skip += $offset; } } diff --git a/lib/htmlpurifier/HTMLPurifier/Strategy/RemoveForeignElements.php b/lib/htmlpurifier/HTMLPurifier/Strategy/RemoveForeignElements.php index 2c280b23d7..5d26e4f570 100644 --- a/lib/htmlpurifier/HTMLPurifier/Strategy/RemoveForeignElements.php +++ b/lib/htmlpurifier/HTMLPurifier/Strategy/RemoveForeignElements.php @@ -116,6 +116,7 @@ class HTMLPurifier_Strategy_RemoveForeignElements extends HTMLPurifier_Strategy // mostly everything's good, but // we need to make sure required attributes are in order if ( + ($token->type === 'start' || $token->type === 'empty') && $definition->info[$token->name]->required_attr && ($token->name != 'img' || $remove_invalid_img) // ensure config option still works ) { @@ -134,7 +135,6 @@ class HTMLPurifier_Strategy_RemoveForeignElements extends HTMLPurifier_Strategy $token->armor['ValidateAttributes'] = true; } - // CAN BE GENERICIZED if (isset($hidden_elements[$token->name]) && $token->type == 'start') { $textify_comments = $token->name; } elseif ($token->name === $textify_comments && $token->type == 'end') { diff --git a/lib/htmlpurifier/HTMLPurifier/Strategy/ValidateAttributes.php b/lib/htmlpurifier/HTMLPurifier/Strategy/ValidateAttributes.php index 869f3fab93..6debcc336b 100644 --- a/lib/htmlpurifier/HTMLPurifier/Strategy/ValidateAttributes.php +++ b/lib/htmlpurifier/HTMLPurifier/Strategy/ValidateAttributes.php @@ -6,10 +6,6 @@ require_once 'HTMLPurifier/IDAccumulator.php'; require_once 'HTMLPurifier/AttrValidator.php'; -HTMLPurifier_ConfigSchema::define( - 'Attr', 'IDBlacklist', array(), 'list', - 'Array of IDs not allowed in the document.'); - /** * Validate all attributes in the tokens. */ @@ -19,11 +15,6 @@ class HTMLPurifier_Strategy_ValidateAttributes extends HTMLPurifier_Strategy function execute($tokens, $config, &$context) { - // setup id_accumulator context - $id_accumulator = new HTMLPurifier_IDAccumulator(); - $id_accumulator->load($config->get('Attr', 'IDBlacklist')); - $context->register('IDAccumulator', $id_accumulator); - // setup validator $validator = new HTMLPurifier_AttrValidator(); @@ -44,8 +35,6 @@ class HTMLPurifier_Strategy_ValidateAttributes extends HTMLPurifier_Strategy $tokens[$key] = $token; // for PHP 4 } - - $context->destroy('IDAccumulator'); $context->destroy('CurrentToken'); return $tokens; diff --git a/lib/htmlpurifier/HTMLPurifier/URIFilter.php b/lib/htmlpurifier/HTMLPurifier/URIFilter.php index e0066f3bf0..ca000ea5a2 100644 --- a/lib/htmlpurifier/HTMLPurifier/URIFilter.php +++ b/lib/htmlpurifier/HTMLPurifier/URIFilter.php @@ -1,10 +1,22 @@ host)) return true; $scheme_obj = $uri->getSchemeObj($config, $context); + if (!$scheme_obj) { + // scheme not recognized + return false; + } if (!$scheme_obj->hierarchical) { // non-hierarchal URI with explicit scheme, don't change return true; diff --git a/lib/htmlpurifier/readme_moodle.txt b/lib/htmlpurifier/readme_moodle.txt index c3c426aef2..82b82482b5 100644 --- a/lib/htmlpurifier/readme_moodle.txt +++ b/lib/htmlpurifier/readme_moodle.txt @@ -1,9 +1,9 @@ -Description of HTML Purifier v2.1.2 Lite library import into Moodle +Description of HTML Purifier v2.1.3 Lite library import into Moodle Changes: * HMLTModule/Text.php - added , , and tags * HMLTModule/XMLCommonAttributes.php - remove xml:lang - needed for multilang - * AttrDef/Lang.php - relaxt lang check - needed for multilang + * AttrDef/Lang.php - relax lang check - needed for multilang skodak