MDL-12724 import of html purifier 2.1.3; merged from MOODLE_19_STABLE

author skodak <skodak>

Mon, 24 Dec 2007 21:08:39 +0000 (21:08 +0000)

committer skodak <skodak>

Mon, 24 Dec 2007 21:08:39 +0000 (21:08 +0000)
author skodak <skodak>
Mon, 24 Dec 2007 21:08:39 +0000 (21:08 +0000)
committer skodak <skodak>
Mon, 24 Dec 2007 21:08:39 +0000 (21:08 +0000)
diff --git a/lib/htmlpurifier/HTMLPurifier.php b/lib/htmlpurifier/HTMLPurifier.php

index 677c1e39952ed7649b35e07c340bbb139ef08e78..e9dfe5f4048fafcd8e2a35da1f5b58ed9aee7a65 100644 (file)
--- a/lib/htmlpurifier/HTMLPurifier.php
+++ b/lib/htmlpurifier/HTMLPurifier.php
@@ -22,8 +22,8 @@
   */
  
  /*
-    HTML Purifier 2.1.2 - Standards Compliant HTML Filtering
-    Copyright (C) 2006 Edward Z. Yang
+    HTML Purifier 2.1.3 - Standards Compliant HTML Filtering
+    Copyright (C) 2006-2007 Edward Z. Yang
  
      This library is free software; you can redistribute it and/or
      modify it under the terms of the GNU Lesser General Public
@@ -43,9 +43,8 @@
  // constants are slow, but we'll make one exception
  define('HTMLPURIFIER_PREFIX', dirname(__FILE__));
  
-// almost every class has an undocumented dependency to these, so make sure
-// they get included
-require_once 'HTMLPurifier/ConfigSchema.php'; // important
+// every class has an undocumented dependency to these, must be included!
+require_once 'HTMLPurifier/ConfigSchema.php'; // fatal errors if not included
  require_once 'HTMLPurifier/Config.php';
  require_once 'HTMLPurifier/Context.php';
  
@@ -60,16 +59,23 @@ require_once 'HTMLPurifier/LanguageFactory.php';
  HTMLPurifier_ConfigSchema::define(
      'Core', 'CollectErrors', false, 'bool', '
  Whether or not to collect errors found while filtering the document. This
-is a useful way to give feedback to your users. CURRENTLY NOT IMPLEMENTED.
-This directive has been available since 2.0.0.
+is a useful way to give feedback to your users. <strong>Warning:</strong>
+Currently this feature is very patchy and experimental, with lots of
+possible error messages not yet implemented. It will not cause any problems,
+but it may not help your users either. This directive has been available
+since 2.0.0.
  ');
  
  /**
- * Main library execution class.
+ * Facade that coordinates HTML Purifier's subsystems in order to purify HTML.
   * 
- * Facade that performs calls to the HTMLPurifier_Lexer,
- * HTMLPurifier_Strategy and HTMLPurifier_Generator subsystems in order to
- * purify HTML.
+ * @note There are several points in which configuration can be specified 
+ *       for HTML Purifier.  The precedence of these (from lowest to
+ *       highest) is as follows:
+ *          -# Instance: new HTMLPurifier($config)
+ *          -# Invocation: purify($html, $config)
+ *       These configurations are entirely independent of each other and
+ *       are *not* merged.
   * 
   * @todo We need an easier way to inject strategies, it'll probably end
   *       up getting done through config though.
@@ -77,15 +83,16 @@ This directive has been available since 2.0.0.
  class HTMLPurifier
  {
      
-    var $version = '2.1.2';
+    var $version = '2.1.3';
      
      var $config;
-    var $filters;
+    var $filters = array();
      
      var $strategy, $generator;
      
      /**
-     * Final HTMLPurifier_Context of last run purification. Might be an array.
+     * Resultant HTMLPurifier_Context of last run purification. Is an array
+     * of contexts if the last called method was purifyArray().
       * @public
       */
      var $context;
@@ -150,6 +157,11 @@ class HTMLPurifier
              $context->register('ErrorCollector', $error_collector);
          }
          
+        // setup id_accumulator context, necessary due to the fact that
+        // AttrValidator can be called from many places
+        $id_accumulator = HTMLPurifier_IDAccumulator::build($config, $context);
+        $context->register('IDAccumulator', $id_accumulator);
+        
          $html = HTMLPurifier_Encoder::convertToUTF8($html, $config, $context);
          
          for ($i = 0, $size = count($this->filters); $i < $size; $i++) {
@@ -198,6 +210,8 @@ class HTMLPurifier
      
      /**
       * Singleton for enforcing just one HTML Purifier in your system
+     * @param $prototype Optional prototype HTMLPurifier instance to
+     *                   overload singleton with.
       */
      function &getInstance($prototype = null) {
          static $htmlpurifier;
diff --git a/lib/htmlpurifier/HTMLPurifier/AttrDef/URI.php b/lib/htmlpurifier/HTMLPurifier/AttrDef/URI.php

index 365748c037dfb0c4ff2cde9ceb29ae2db0538f89..0e9a5f4739839ea4903907effc4861ad27244d63 100644 (file)
--- a/lib/htmlpurifier/HTMLPurifier/AttrDef/URI.php
+++ b/lib/htmlpurifier/HTMLPurifier/AttrDef/URI.php
@@ -102,7 +102,7 @@ class HTMLPurifier_AttrDef_URI extends HTMLPurifier_AttrDef
              $result = $uri->validate($config, $context);
              if (!$result) break;
              
-            // chained validation
+            // chained filtering
              $uri_def =& $config->getDefinition('URI');
              $result = $uri_def->filter($uri, $config, $context);
              if (!$result) break;
diff --git a/lib/htmlpurifier/HTMLPurifier/AttrDef/URI/Email.php b/lib/htmlpurifier/HTMLPurifier/AttrDef/URI/Email.php

index aaec099a6e4e097ed91d00bddbbd1b3f843daebf..ababd9eae07839424a7a92dfed1f17bde564918e 100644 (file)
--- a/lib/htmlpurifier/HTMLPurifier/AttrDef/URI/Email.php
+++ b/lib/htmlpurifier/HTMLPurifier/AttrDef/URI/Email.php
@@ -1,7 +1,6 @@
  <?php
  
  require_once 'HTMLPurifier/AttrDef.php';
-require_once 'HTMLPurifier/AttrDef/URI/Email/SimpleCheck.php';
  
  class HTMLPurifier_AttrDef_URI_Email extends HTMLPurifier_AttrDef
  {
@@ -15,3 +14,5 @@ class HTMLPurifier_AttrDef_URI_Email extends HTMLPurifier_AttrDef
      
  }
  
+// sub-implementations
+require_once 'HTMLPurifier/AttrDef/URI/Email/SimpleCheck.php';
diff --git a/lib/htmlpurifier/HTMLPurifier/AttrValidator.php b/lib/htmlpurifier/HTMLPurifier/AttrValidator.php

index f02bd2087cfcd79dbee66e46808e8b0276e4c65d..a471b0937921bd0b5ca62adc38c1f904c4856977 100644 (file)
--- a/lib/htmlpurifier/HTMLPurifier/AttrValidator.php
+++ b/lib/htmlpurifier/HTMLPurifier/AttrValidator.php
@@ -23,6 +23,13 @@ class HTMLPurifier_AttrValidator
          $definition = $config->getHTMLDefinition();
          $e =& $context->get('ErrorCollector', true);
          
+        // initialize IDAccumulator if necessary
+        $ok =& $context->get('IDAccumulator', true);
+        if (!$ok) {
+            $id_accumulator = HTMLPurifier_IDAccumulator::build($config, $context);
+            $context->register('IDAccumulator', $id_accumulator);
+        }
+        
          // initialize CurrentToken if necessary
          $current_token =& $context->get('CurrentToken', true);
          if (!$current_token) $context->register('CurrentToken', $token);
diff --git a/lib/htmlpurifier/HTMLPurifier/ChildDef/Optional.php b/lib/htmlpurifier/HTMLPurifier/ChildDef/Optional.php

index 779a7f06b9e78df8d5a296487b1dd552c21abe4e..e9f14edf7db667bca26503d41a05dbe1d7d24579 100644 (file)
--- a/lib/htmlpurifier/HTMLPurifier/ChildDef/Optional.php
+++ b/lib/htmlpurifier/HTMLPurifier/ChildDef/Optional.php
@@ -15,7 +15,10 @@ class HTMLPurifier_ChildDef_Optional extends HTMLPurifier_ChildDef_Required
      var $type = 'optional';
      function validateChildren($tokens_of_children, $config, &$context) {
          $result = parent::validateChildren($tokens_of_children, $config, $context);
-        if ($result === false) return array();
+        if ($result === false) {
+            if (empty($tokens_of_children)) return true;
+            else return array();
+        }
          return $result;
      }
  }
diff --git a/lib/htmlpurifier/HTMLPurifier/Config.php b/lib/htmlpurifier/HTMLPurifier/Config.php

index e04a4b0cc51ec438f37d53fba6f5756d5665b390..203542f0aa7c296b4f1481994ce33ccec1684fda 100644 (file)
--- a/lib/htmlpurifier/HTMLPurifier/Config.php
+++ b/lib/htmlpurifier/HTMLPurifier/Config.php
@@ -42,7 +42,7 @@ class HTMLPurifier_Config
      /**
       * HTML Purifier's version
       */
-    var $version = '2.1.2';
+    var $version = '2.1.3';
      
      /**
       * Two-level associative array of configuration directives
diff --git a/lib/htmlpurifier/HTMLPurifier/HTMLDefinition.php b/lib/htmlpurifier/HTMLPurifier/HTMLDefinition.php

index fe6bd1418776b4794ff60b427144355286686936..e13e0c62b0e8183430d8f0dfeb5879270b99e8d1 100644 (file)
--- a/lib/htmlpurifier/HTMLPurifier/HTMLDefinition.php
+++ b/lib/htmlpurifier/HTMLPurifier/HTMLDefinition.php
@@ -236,13 +236,26 @@ class HTMLPurifier_HTMLDefinition extends HTMLPurifier_Definition
      /**
       * Adds a custom element to your HTML definition
       * @note See HTMLPurifier_HTMLModule::addElement for detailed 
-     *       parameter descriptions.
+     *       parameter and return value descriptions.
       */
-    function addElement($element_name, $type, $contents, $attr_collections, $attributes) {
+    function &addElement($element_name, $type, $contents, $attr_collections, $attributes) {
          $module =& $this->getAnonymousModule();
          // assume that if the user is calling this, the element
          // is safe. This may not be a good idea
-        $module->addElement($element_name, true, $type, $contents, $attr_collections, $attributes);
+        $element =& $module->addElement($element_name, true, $type, $contents, $attr_collections, $attributes);
+        return $element;
+    }
+    
+    /**
+     * Adds a blank element to your HTML definition, for overriding
+     * existing behavior
+     * @note See HTMLPurifier_HTMLModule::addBlankElement for detailed
+     *       parameter and return value descriptions.
+     */
+    function &addBlankElement($element_name) {
+        $module  =& $this->getAnonymousModule();
+        $element =& $module->addBlankElement($element_name);
+        return $element;
      }
      
      /**
diff --git a/lib/htmlpurifier/HTMLPurifier/HTMLModule/Tidy/XHTMLAndHTML4.php b/lib/htmlpurifier/HTMLPurifier/HTMLModule/Tidy/XHTMLAndHTML4.php

index 386cf365a26494644aa6d2230d486f7c3818fc93..dcf306a0197a0c1214d74306e664432749ea771c 100644 (file)
--- a/lib/htmlpurifier/HTMLPurifier/HTMLModule/Tidy/XHTMLAndHTML4.php
+++ b/lib/htmlpurifier/HTMLPurifier/HTMLModule/Tidy/XHTMLAndHTML4.php
@@ -13,6 +13,8 @@ require_once 'HTMLPurifier/AttrTransform/Length.php';
  require_once 'HTMLPurifier/AttrTransform/ImgSpace.php';
  require_once 'HTMLPurifier/AttrTransform/EnumToCSS.php';
  
+require_once 'HTMLPurifier/ChildDef/StrictBlockquote.php';
+
  class HTMLPurifier_HTMLModule_Tidy_XHTMLAndHTML4 extends
        HTMLPurifier_HTMLModule_Tidy
  {
@@ -188,5 +190,17 @@ class HTMLPurifier_HTMLModule_Tidy_Strict extends
  {
      var $name = 'Tidy_Strict';
      var $defaultLevel = 'light';
+    
+    function makeFixes() {
+        $r = parent::makeFixes();
+        $r['blockquote#content_model_type'] = 'strictblockquote';
+        return $r;
+    }
+    
+    var $defines_child_def = true;
+    function getChildDef($def) {
+        if ($def->content_model_type != 'strictblockquote') return parent::getChildDef($def);
+        return new HTMLPurifier_ChildDef_StrictBlockquote($def->content_model);
+    }
  }
  
diff --git a/lib/htmlpurifier/HTMLPurifier/HTMLModuleManager.php b/lib/htmlpurifier/HTMLPurifier/HTMLModuleManager.php

index 74a233ff2f9ed3181ba11b38556de2733ee3b84f..3fc86160201fb801a6e35d6e01ed36c4bc5ccbb4 100644 (file)
--- a/lib/htmlpurifier/HTMLPurifier/HTMLModuleManager.php
+++ b/lib/htmlpurifier/HTMLPurifier/HTMLModuleManager.php
@@ -35,7 +35,6 @@ require_once 'HTMLPurifier/HTMLModule/Object.php';
  require_once 'HTMLPurifier/HTMLModule/Tidy.php';
  require_once 'HTMLPurifier/HTMLModule/Tidy/XHTMLAndHTML4.php';
  require_once 'HTMLPurifier/HTMLModule/Tidy/XHTML.php';
-require_once 'HTMLPurifier/HTMLModule/Tidy/XHTMLStrict.php';
  require_once 'HTMLPurifier/HTMLModule/Tidy/Proprietary.php';
  
  HTMLPurifier_ConfigSchema::define(
@@ -209,7 +208,7 @@ class HTMLPurifier_HTMLModuleManager
          $this->doctypes->register(
              'XHTML 1.0 Strict', true,
              array_merge($common, $xml, $non_xml),
-            array('Tidy_Strict', 'Tidy_XHTML', 'Tidy_XHTMLStrict', 'Tidy_Proprietary'),
+            array('Tidy_Strict', 'Tidy_XHTML', 'Tidy_Strict', 'Tidy_Proprietary'),
              array(),
              '-//W3C//DTD XHTML 1.0 Strict//EN',
              'http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd'
@@ -218,7 +217,7 @@ class HTMLPurifier_HTMLModuleManager
          $this->doctypes->register(
              'XHTML 1.1', true,
              array_merge($common, $xml, array('Ruby')),
-            array('Tidy_Strict', 'Tidy_XHTML', 'Tidy_Proprietary', 'Tidy_XHTMLStrict'), // Tidy_XHTML1_1
+            array('Tidy_Strict', 'Tidy_XHTML', 'Tidy_Proprietary', 'Tidy_Strict'), // Tidy_XHTML1_1
              array(),
              '-//W3C//DTD XHTML 1.1//EN',
              'http://www.w3.org/TR/xhtml11/DTD/xhtml11.dtd'
diff --git a/lib/htmlpurifier/HTMLPurifier/IDAccumulator.php b/lib/htmlpurifier/HTMLPurifier/IDAccumulator.php

index 525c9aa0800fc31be92d9b72d4d39eb1a41aaed0..60715afc1ecbea36154a07d0a34ed6de363ed343 100644 (file)
--- a/lib/htmlpurifier/HTMLPurifier/IDAccumulator.php
+++ b/lib/htmlpurifier/HTMLPurifier/IDAccumulator.php
@@ -1,11 +1,15 @@
  <?php
  
+HTMLPurifier_ConfigSchema::define(
+    'Attr', 'IDBlacklist', array(), 'list',
+    'Array of IDs not allowed in the document.'
+);
+
  /**
   * Component of HTMLPurifier_AttrContext that accumulates IDs to prevent dupes
   * @note In Slashdot-speak, dupe means duplicate.
- * @note This class does not accept $config or $context, thus, it is the
- *       burden of the callee to register the appropriate errors or
- *       configuration.
+ * @note The default constructor does not accept $config or $context objects:
+ *       use must use the static build() factory method to perform initialization.
   */
  class HTMLPurifier_IDAccumulator
  {
@@ -16,6 +20,19 @@ class HTMLPurifier_IDAccumulator
       */
      var $ids = array();
      
+    /**
+     * Builds an IDAccumulator, also initializing the default blacklist
+     * @param $config Instance of HTMLPurifier_Config
+     * @param $context Instance of HTMLPurifier_Context
+     * @return Fully initialized HTMLPurifier_IDAccumulator
+     * @static
+     */
+    function build($config, &$context) {
+        $id_accumulator = new HTMLPurifier_IDAccumulator();
+        $id_accumulator->load($config->get('Attr', 'IDBlacklist'));
+        return $id_accumulator;
+    }
+    
      /**
       * Add an ID to the lookup table.
       * @param $id ID to be added.
diff --git a/lib/htmlpurifier/HTMLPurifier/Injector.php b/lib/htmlpurifier/HTMLPurifier/Injector.php

index 59017163877a5f8dbc14411d0c0cb2a4a7279bd2..3b847097673231353ca25c81dd3aeebe6dc2e853 100644 (file)
--- a/lib/htmlpurifier/HTMLPurifier/Injector.php
+++ b/lib/htmlpurifier/HTMLPurifier/Injector.php
@@ -4,6 +4,9 @@
   * Injects tokens into the document while parsing for well-formedness.
   * This enables "formatter-like" functionality such as auto-paragraphing,
   * smiley-ification and linkification to take place.
+ * 
+ * @todo Allow injectors to request a re-run on their output. This 
+ *       would help if an operation is recursive.
   */
  class HTMLPurifier_Injector
  {
@@ -107,5 +110,12 @@ class HTMLPurifier_Injector
       */
      function handleElement(&$token) {}
      
+    /**
+     * Notifier that is called when an end token is processed
+     * @note This differs from handlers in that the token is read-only
+     */
+    function notifyEnd($token) {}
+    
+    
  }
  
diff --git a/lib/htmlpurifier/HTMLPurifier/Injector/AutoParagraph.php b/lib/htmlpurifier/HTMLPurifier/Injector/AutoParagraph.php

index 6e0a6a3ed5c7a069a5f3375bdff97a98c0ee6472..56a6a2687884caa168e57dbf962d4baa21c20395 100644 (file)
--- a/lib/htmlpurifier/HTMLPurifier/Injector/AutoParagraph.php
+++ b/lib/htmlpurifier/HTMLPurifier/Injector/AutoParagraph.php
@@ -6,20 +6,28 @@ HTMLPurifier_ConfigSchema::define(
      'AutoFormat', 'AutoParagraph', false, 'bool', '
  <p>
    This directive turns on auto-paragraphing, where double newlines are
-  converted in to paragraphs whenever possible. Auto-paragraphing
-  applies when:
+  converted in to paragraphs whenever possible. Auto-paragraphing:
  </p>
  <ul>
-  <li>There are inline elements or text in the root node</li>
-  <li>There are inline elements or text with double newlines or
-      block elements in nodes that allow paragraph tags</li>
-  <li>There are double newlines in paragraph tags</li>
+  <li>Always applies to inline elements or text in the root node,</li>
+  <li>Applies to inline elements or text with double newlines in nodes
+      that allow paragraph tags,</li>
+  <li>Applies to double newlines in paragraph tags</li>
  </ul>
  <p>
    <code>p</code> tags must be allowed for this directive to take effect.
    We do not use <code>br</code> tags for paragraphing, as that is
    semantically incorrect.
  </p>
+<p>
+  To prevent auto-paragraphing as a content-producer, refrain from using
+  double-newlines except to specify a new paragraph or in contexts where
+  it has special meaning (whitespace usually has no meaning except in
+  tags like <code>pre</code>, so this should not be difficult.) To prevent
+  the paragraphing of inline text adjacent to block elements, wrap them
+  in <code>div</code> tags (the behavior is slightly different outside of
+  the root node.)
+</p>
  <p>
    This directive has been available since 2.0.1.
  </p>
@@ -62,19 +70,27 @@ class HTMLPurifier_Injector_AutoParagraph extends HTMLPurifier_Injector
                  $ok = false;
                  // test if up-coming tokens are either block or have
                  // a double newline in them
+                $nesting = 0;
                  for ($i = $this->inputIndex + 1; isset($this->inputTokens[$i]); $i++) {
                      if ($this->inputTokens[$i]->type == 'start'){
                          if (!$this->_isInline($this->inputTokens[$i])) {
-                            $ok = true;
+                            // we haven't found a double-newline, and
+                            // we've hit a block element, so don't paragraph
+                            $ok = false;
+                            break;
                          }
-                        break;
+                        $nesting++;
+                    }
+                    if ($this->inputTokens[$i]->type == 'end') {
+                        if ($nesting <= 0) break;
+                        $nesting--;
                      }
-                    if ($this->inputTokens[$i]->type == 'end') break;
                      if ($this->inputTokens[$i]->type == 'text') {
+                        // found it!
                          if (strpos($this->inputTokens[$i]->data, "\n\n") !== false) {
                              $ok = true;
+                            break;
                          }
-                        if (!$this->inputTokens[$i]->is_whitespace) break;
                      }
                  }
                  if ($ok) {
diff --git a/lib/htmlpurifier/HTMLPurifier/Lexer.php b/lib/htmlpurifier/HTMLPurifier/Lexer.php

index 78abebd07bfbb2313cab632572c588ff4e3d6b97..22ef1d6dd0b4c027e3afbc506dcaf78bda39e9b9 100644 (file)
--- a/lib/htmlpurifier/HTMLPurifier/Lexer.php
+++ b/lib/htmlpurifier/HTMLPurifier/Lexer.php
@@ -13,11 +13,14 @@ if (version_compare(PHP_VERSION, "5", ">=")) {
  }
  
  HTMLPurifier_ConfigSchema::define(
-    'Core', 'AcceptFullDocuments', true, 'bool',
-    'This parameter determines whether or not the filter should accept full '.
-    'HTML documents, not just HTML fragments.  When on, it will '.
-    'drop all sections except the content between body.'
-);
+    'Core', 'ConvertDocumentToFragment', true, 'bool', '
+This parameter determines whether or not the filter should convert
+input that is a full document with html and body tags to a fragment
+of just the contents of a body tag. This parameter is simply something
+HTML Purifier can do during an edge-case: for most inputs, this
+processing is not necessary.
+');
+HTMLPurifier_ConfigSchema::defineAlias('Core', 'AcceptFullDocuments', 'Core', 'ConvertDocumentToFragment');
  
  HTMLPurifier_ConfigSchema::define(
      'Core', 'LexerImpl', null, 'mixed/null', '
@@ -316,7 +319,7 @@ class HTMLPurifier_Lexer
      function normalize($html, $config, &$context) {
          
          // extract body from document if applicable
-        if ($config->get('Core', 'AcceptFullDocuments')) {
+        if ($config->get('Core', 'ConvertDocumentToFragment')) {
              $html = $this->extractBody($html);
          }
          
diff --git a/lib/htmlpurifier/HTMLPurifier/Lexer/DirectLex.php b/lib/htmlpurifier/HTMLPurifier/Lexer/DirectLex.php

index b3639916d1debae688e85932766269561c6e1d53..86c0a2112b09a01528eeeba018afef4666996f9d 100644 (file)
--- a/lib/htmlpurifier/HTMLPurifier/Lexer/DirectLex.php
+++ b/lib/htmlpurifier/HTMLPurifier/Lexer/DirectLex.php
@@ -160,9 +160,15 @@ class HTMLPurifier_Lexer_DirectLex extends HTMLPurifier_Lexer
                  
                  $segment = substr($html, $cursor, $strlen_segment);
                  
+                if ($segment === false) {
+                    // somehow, we attempted to access beyond the end of
+                    // the string, defense-in-depth, reported by Nate Abele
+                    break;
+                }
+                
                  // Check if it's a comment
                  if (
-                    substr($segment, 0, 3) == '!--'
+                    substr($segment, 0, 3) === '!--'
                  ) {
                      // re-determine segment length, looking for -->
                      $position_comment_end = strpos($html, '-->', $cursor);
@@ -237,7 +243,7 @@ class HTMLPurifier_Lexer_DirectLex extends HTMLPurifier_Lexer
                  // trailing slash. Remember, we could have a tag like <br>, so
                  // any later token processing scripts must convert improperly
                  // classified EmptyTags from StartTags.
-                $is_self_closing= (strrpos($segment,'/') === $strlen_segment-1);
+                $is_self_closing = (strrpos($segment,'/') === $strlen_segment-1);
                  if ($is_self_closing) {
                      $strlen_segment--;
                      $segment = substr($segment, 0, $strlen_segment);
diff --git a/lib/htmlpurifier/HTMLPurifier/Lexer/PH5P.php b/lib/htmlpurifier/HTMLPurifier/Lexer/PH5P.php

index 5720c33a96b8e016c735cf21bb370c45797ae0d4..b6762379141b156905ad8c2bba70f677051edbc2 100644 (file)
--- a/lib/htmlpurifier/HTMLPurifier/Lexer/PH5P.php
+++ b/lib/htmlpurifier/HTMLPurifier/Lexer/PH5P.php
@@ -26,8 +26,6 @@ class HTMLPurifier_Lexer_PH5P extends HTMLPurifier_Lexer_DOMLex {
      \r
  }\r
  \r
-// begin PHP5P source code here\r
-\r
  /*\r
  \r
  Copyright 2007 Jeroen van der Meer <http://jero.net/> \r
@@ -3722,7 +3720,7 @@ class HTML5TreeConstructer {
          }\r
      }\r
  \r
-    private function generateImpliedEndTags(array $exclude = array()) {\r
+    private function generateImpliedEndTags($exclude = array()) {\r
          /* When the steps below require the UA to generate implied end tags,\r
          then, if the current node is a dd element, a dt element, an li element,\r
          a p element, a td element, a th  element, or a tr element, the UA must\r
@@ -3736,7 +3734,8 @@ class HTML5TreeConstructer {
          }\r
      }\r
  \r
-    private function getElementCategory($name) {\r
+    private function getElementCategory($node) {\r
+        $name = $node->tagName;\r
          if(in_array($name, $this->special))\r
              return self::SPECIAL;\r
  \r
@@ -3884,3 +3883,4 @@ class HTML5TreeConstructer {
          return $this->dom;\r
      }\r
  }\r
+?>\r
diff --git a/lib/htmlpurifier/HTMLPurifier/Strategy/FixNesting.php b/lib/htmlpurifier/HTMLPurifier/Strategy/FixNesting.php

index 51a14a78f45f6f32fede1d47b8809bf03ff1f838..25e9f8acbca816ecad32b926d3f55bbc749387b1 100644 (file)
--- a/lib/htmlpurifier/HTMLPurifier/Strategy/FixNesting.php
+++ b/lib/htmlpurifier/HTMLPurifier/Strategy/FixNesting.php
@@ -195,7 +195,7 @@ class HTMLPurifier_Strategy_FixNesting extends HTMLPurifier_Strategy
              //################################################################//
              // Process result by interpreting $result
              
-            if ($result === true) {
+            if ($result === true || $child_tokens === $result) {
                  // leave the node as is
                  
                  // register start token as a parental node start
diff --git a/lib/htmlpurifier/HTMLPurifier/Strategy/MakeWellFormed.php b/lib/htmlpurifier/HTMLPurifier/Strategy/MakeWellFormed.php

index b3e8aa74532394569ca59894033f89b98dfe4e70..4b6f498f67307026cc26d0806fbcc229e9b30526 100644 (file)
--- a/lib/htmlpurifier/HTMLPurifier/Strategy/MakeWellFormed.php
+++ b/lib/htmlpurifier/HTMLPurifier/Strategy/MakeWellFormed.php
@@ -36,27 +36,22 @@ class HTMLPurifier_Strategy_MakeWellFormed extends HTMLPurifier_Strategy
          
          $definition = $config->getHTMLDefinition();
          
-        // CurrentNesting
+        // local variables
+        $result = array();
+        $generator = new HTMLPurifier_Generator();
+        $escape_invalid_tags = $config->get('Core', 'EscapeInvalidTags');
+        $e =& $context->get('ErrorCollector', true);
+        
+        // member variables
          $this->currentNesting = array();
-        $context->register('CurrentNesting', $this->currentNesting);
+        $this->inputIndex     = false;
+        $this->inputTokens    =& $tokens;
+        $this->outputTokens   =& $result;
          
-        // InputIndex
-        $this->inputIndex = false;
+        // context variables
+        $context->register('CurrentNesting', $this->currentNesting);
          $context->register('InputIndex', $this->inputIndex);
-        
-        // InputTokens
          $context->register('InputTokens', $tokens);
-        $this->inputTokens =& $tokens;
-        
-        // OutputTokens
-        $result = array();
-        $this->outputTokens =& $result;
-        
-        // %Core.EscapeInvalidTags
-        $escape_invalid_tags = $config->get('Core', 'EscapeInvalidTags');
-        $generator = new HTMLPurifier_Generator();
-        
-        $e =& $context->get('ErrorCollector', true);
          
          // -- begin INJECTOR --
          
@@ -95,6 +90,10 @@ class HTMLPurifier_Strategy_MakeWellFormed extends HTMLPurifier_Strategy
              trigger_error("Cannot enable $name injector because $error is not allowed", E_USER_WARNING);
          }
          
+        // warning: most foreach loops follow the convention $i => $x.
+        // be sure, for PHP4 compatibility, to only perform write operations
+        // directly referencing the object using $i: $x is only safe for reads
+        
          // -- end INJECTOR --
          
          $token = false;
@@ -105,6 +104,8 @@ class HTMLPurifier_Strategy_MakeWellFormed extends HTMLPurifier_Strategy
              // if all goes well, this token will be passed through unharmed
              $token = $tokens[$this->inputIndex];
              
+            //printTokens($tokens, $this->inputIndex);
+            
              foreach ($this->injectors as $i => $x) {
                  if ($x->skip > 0) $this->injectors[$i]->skip--;
              }
@@ -114,7 +115,7 @@ class HTMLPurifier_Strategy_MakeWellFormed extends HTMLPurifier_Strategy
                  if ($token->type === 'text') {
                       // injector handler code; duplicated for performance reasons
                       foreach ($this->injectors as $i => $x) {
-                         if (!$x->skip) $x->handleText($token);
+                         if (!$x->skip) $this->injectors[$i]->handleText($token);
                           if (is_array($token)) {
                               $this->currentInjector = $i;
                               break;
@@ -172,7 +173,7 @@ class HTMLPurifier_Strategy_MakeWellFormed extends HTMLPurifier_Strategy
              // injector handler code; duplicated for performance reasons
              if ($ok) {
                  foreach ($this->injectors as $i => $x) {
-                    if (!$x->skip) $x->handleElement($token);
+                    if (!$x->skip) $this->injectors[$i]->handleElement($token);
                      if (is_array($token)) {
                          $this->currentInjector = $i;
                          break;
@@ -202,6 +203,9 @@ class HTMLPurifier_Strategy_MakeWellFormed extends HTMLPurifier_Strategy
              $current_parent = array_pop($this->currentNesting);
              if ($current_parent->name == $token->name) {
                  $result[] = $token;
+                foreach ($this->injectors as $i => $x) {
+                    $this->injectors[$i]->notifyEnd($token);
+                }
                  continue;
              }
              
@@ -238,16 +242,16 @@ class HTMLPurifier_Strategy_MakeWellFormed extends HTMLPurifier_Strategy
              
              // okay, we found it, close all the skipped tags
              // note that skipped tags contains the element we need closed
-            $size = count($skipped_tags);
-            for ($i = $size - 1; $i > 0; $i--) {
-                if ($e && !isset($skipped_tags[$i]->armor['MakeWellFormed_TagClosedError'])) {
+            for ($i = count($skipped_tags) - 1; $i >= 0; $i--) {
+                if ($i && $e && !isset($skipped_tags[$i]->armor['MakeWellFormed_TagClosedError'])) {
                      $e->send(E_NOTICE, 'Strategy_MakeWellFormed: Tag closed by element end', $skipped_tags[$i]);
                  }
-                $result[] = new HTMLPurifier_Token_End($skipped_tags[$i]->name);
+                $result[] = $new_token = new HTMLPurifier_Token_End($skipped_tags[$i]->name);
+                foreach ($this->injectors as $j => $x) { // $j, not $i!!!
+                    $this->injectors[$j]->notifyEnd($new_token);
+                }
              }
              
-            $result[] = new HTMLPurifier_Token_End($skipped_tags[$i]->name);
-            
          }
          
          $context->destroy('CurrentNesting');
@@ -255,17 +259,18 @@ class HTMLPurifier_Strategy_MakeWellFormed extends HTMLPurifier_Strategy
          $context->destroy('InputIndex');
          $context->destroy('CurrentToken');
          
-        // we're at the end now, fix all still unclosed tags
-        // not using processToken() because at this point we don't
-        // care about current nesting
+        // we're at the end now, fix all still unclosed tags (this is
+        // duplicated from the end of the loop with some slight modifications)
+        // not using $skipped_tags since it would invariably be all of them
          if (!empty($this->currentNesting)) {
-            $size = count($this->currentNesting);
-            for ($i = $size - 1; $i >= 0; $i--) {
+            for ($i = count($this->currentNesting) - 1; $i >= 0; $i--) {
                  if ($e && !isset($this->currentNesting[$i]->armor['MakeWellFormed_TagClosedError'])) {
                      $e->send(E_NOTICE, 'Strategy_MakeWellFormed: Tag closed by document end', $this->currentNesting[$i]);
                  }
-                $result[] =
-                    new HTMLPurifier_Token_End($this->currentNesting[$i]->name);
+                $result[] = $new_token = new HTMLPurifier_Token_End($this->currentNesting[$i]->name);
+                foreach ($this->injectors as $j => $x) { // $j, not $i!!!
+                    $this->injectors[$j]->notifyEnd($new_token);
+                }
              }
          }
          
@@ -286,8 +291,14 @@ class HTMLPurifier_Strategy_MakeWellFormed extends HTMLPurifier_Strategy
              
              // adjust the injector skips based on the array substitution
              if ($this->injectors) {
-                $offset = count($token) + 1;
+                $offset = count($token);
                  for ($i = 0; $i <= $this->currentInjector; $i++) {
+                    // because of the skip back, we need to add one more
+                    // for uninitialized injectors. I'm not exactly
+                    // sure why this is the case, but I think it has to
+                    // do with the fact that we're decrementing skips
+                    // before re-checking text
+                    if (!$this->injectors[$i]->skip) $this->injectors[$i]->skip++;
                      $this->injectors[$i]->skip += $offset;
                  }
              }
diff --git a/lib/htmlpurifier/HTMLPurifier/Strategy/RemoveForeignElements.php b/lib/htmlpurifier/HTMLPurifier/Strategy/RemoveForeignElements.php

index 2c280b23d75e83993975c6a89b7aaead723dfbf9..5d26e4f57056c9705985c0e8f8c18119ba431abd 100644 (file)
--- a/lib/htmlpurifier/HTMLPurifier/Strategy/RemoveForeignElements.php
+++ b/lib/htmlpurifier/HTMLPurifier/Strategy/RemoveForeignElements.php
@@ -116,6 +116,7 @@ class HTMLPurifier_Strategy_RemoveForeignElements extends HTMLPurifier_Strategy
                      // mostly everything's good, but
                      // we need to make sure required attributes are in order
                      if (
+                        ($token->type === 'start' || $token->type === 'empty') &&
                          $definition->info[$token->name]->required_attr &&
                          ($token->name != 'img' || $remove_invalid_img) // ensure config option still works
                      ) {
@@ -134,7 +135,6 @@ class HTMLPurifier_Strategy_RemoveForeignElements extends HTMLPurifier_Strategy
                          $token->armor['ValidateAttributes'] = true;
                      }
                      
-                    // CAN BE GENERICIZED
                      if (isset($hidden_elements[$token->name]) && $token->type == 'start') {
                          $textify_comments = $token->name;
                      } elseif ($token->name === $textify_comments && $token->type == 'end') {
diff --git a/lib/htmlpurifier/HTMLPurifier/Strategy/ValidateAttributes.php b/lib/htmlpurifier/HTMLPurifier/Strategy/ValidateAttributes.php

index 869f3fab932000e09e508de4ee4ee4b8a567aefc..6debcc336bd2748a40a11cf7d08287c24dfaa49e 100644 (file)
--- a/lib/htmlpurifier/HTMLPurifier/Strategy/ValidateAttributes.php
+++ b/lib/htmlpurifier/HTMLPurifier/Strategy/ValidateAttributes.php
@@ -6,10 +6,6 @@ require_once 'HTMLPurifier/IDAccumulator.php';
  
  require_once 'HTMLPurifier/AttrValidator.php';
  
-HTMLPurifier_ConfigSchema::define(
-    'Attr', 'IDBlacklist', array(), 'list',
-    'Array of IDs not allowed in the document.');
-
  /**
   * Validate all attributes in the tokens.
   */
@@ -19,11 +15,6 @@ class HTMLPurifier_Strategy_ValidateAttributes extends HTMLPurifier_Strategy
      
      function execute($tokens, $config, &$context) {
          
-        // setup id_accumulator context
-        $id_accumulator = new HTMLPurifier_IDAccumulator();
-        $id_accumulator->load($config->get('Attr', 'IDBlacklist'));
-        $context->register('IDAccumulator', $id_accumulator);
-        
          // setup validator
          $validator = new HTMLPurifier_AttrValidator();
          
@@ -44,8 +35,6 @@ class HTMLPurifier_Strategy_ValidateAttributes extends HTMLPurifier_Strategy
              
              $tokens[$key] = $token; // for PHP 4
          }
-        
-        $context->destroy('IDAccumulator');
          $context->destroy('CurrentToken');
          
          return $tokens;
diff --git a/lib/htmlpurifier/HTMLPurifier/URIFilter.php b/lib/htmlpurifier/HTMLPurifier/URIFilter.php

index e0066f3bf04f9b624f2146d7cb4f23979b0c9d56..ca000ea5a2a8d6d29a1089985a6494d2dd5e8e42 100644 (file)
--- a/lib/htmlpurifier/HTMLPurifier/URIFilter.php
+++ b/lib/htmlpurifier/HTMLPurifier/URIFilter.php
@@ -1,10 +1,22 @@
  <?php
  
  /**
- * Chainable filters for custom URI processing 
+ * Chainable filters for custom URI processing.
+ * 
+ * These filters can perform custom actions on a URI filter object,
+ * including transformation or blacklisting.
+ * 
+ * @warning This filter is called before scheme object validation occurs.
+ *          Make sure, if you require a specific scheme object, you
+ *          you check that it exists. This allows filters to convert
+ *          proprietary URI schemes into regular ones.
   */
  class HTMLPurifier_URIFilter
  {
+    
+    /**
+     * Unique identifier of filter
+     */
      var $name;
      
      /**
@@ -17,8 +29,12 @@ class HTMLPurifier_URIFilter
       * @param &$uri Reference to URI object
       * @param $config Instance of HTMLPurifier_Config
       * @param &$context Instance of HTMLPurifier_Context
+     * @return bool Whether or not to continue processing: false indicates
+     *         URL is no good, true indicates continue processing. Note that
+     *         all changes are committed directly on the URI object
       */
      function filter(&$uri, $config, &$context) {
          trigger_error('Cannot call abstract function', E_USER_ERROR);
      }
+    
  }
diff --git a/lib/htmlpurifier/HTMLPurifier/URIFilter/MakeAbsolute.php b/lib/htmlpurifier/HTMLPurifier/URIFilter/MakeAbsolute.php

index 9935dc6ee95be03a05cef0ee145c29a9296dead9..8fe4f73e61648f01c40d0c493c72eea6dea72384 100644 (file)
--- a/lib/htmlpurifier/HTMLPurifier/URIFilter/MakeAbsolute.php
+++ b/lib/htmlpurifier/HTMLPurifier/URIFilter/MakeAbsolute.php
@@ -47,6 +47,10 @@ class HTMLPurifier_URIFilter_MakeAbsolute extends HTMLPurifier_URIFilter
              // absolute URI already: don't change
              if (!is_null($uri->host)) return true;
              $scheme_obj = $uri->getSchemeObj($config, $context);
+            if (!$scheme_obj) {
+                // scheme not recognized
+                return false;
+            }
              if (!$scheme_obj->hierarchical) {
                  // non-hierarchal URI with explicit scheme, don't change
                  return true;
diff --git a/lib/htmlpurifier/readme_moodle.txt b/lib/htmlpurifier/readme_moodle.txt

index c3c426aef2a6823df7dff18c622e39dc2ab1713c..82b82482b52463c37b76f61f3fde39ade1a78739 100644 (file)
--- a/lib/htmlpurifier/readme_moodle.txt
+++ b/lib/htmlpurifier/readme_moodle.txt
@@ -1,9 +1,9 @@
-Description of HTML Purifier v2.1.2 Lite library import into Moodle
+Description of HTML Purifier v2.1.3 Lite library import into Moodle
  
  Changes:
   * HMLTModule/Text.php - added  <nolink>, <tex>, <lang> and <algebra> tags
   * HMLTModule/XMLCommonAttributes.php - remove xml:lang - needed for multilang
- * AttrDef/Lang.php - relaxt lang check - needed for multilang
+ * AttrDef/Lang.php - relax lang check - needed for multilang
  
  skodak
author	skodak <skodak>
	Mon, 24 Dec 2007 21:08:39 +0000 (21:08 +0000)
committer	skodak <skodak>
	Mon, 24 Dec 2007 21:08:39 +0000 (21:08 +0000)
lib/htmlpurifier/HTMLPurifier.php		patch \| blob \| history
lib/htmlpurifier/HTMLPurifier/AttrDef/URI.php		patch \| blob \| history
lib/htmlpurifier/HTMLPurifier/AttrDef/URI/Email.php		patch \| blob \| history
lib/htmlpurifier/HTMLPurifier/AttrValidator.php		patch \| blob \| history
lib/htmlpurifier/HTMLPurifier/ChildDef/Optional.php		patch \| blob \| history
lib/htmlpurifier/HTMLPurifier/Config.php		patch \| blob \| history
lib/htmlpurifier/HTMLPurifier/HTMLDefinition.php		patch \| blob \| history
lib/htmlpurifier/HTMLPurifier/HTMLModule/Tidy/XHTMLAndHTML4.php		patch \| blob \| history
lib/htmlpurifier/HTMLPurifier/HTMLModuleManager.php		patch \| blob \| history
lib/htmlpurifier/HTMLPurifier/IDAccumulator.php		patch \| blob \| history
lib/htmlpurifier/HTMLPurifier/Injector.php		patch \| blob \| history
lib/htmlpurifier/HTMLPurifier/Injector/AutoParagraph.php		patch \| blob \| history
lib/htmlpurifier/HTMLPurifier/Lexer.php		patch \| blob \| history
lib/htmlpurifier/HTMLPurifier/Lexer/DirectLex.php		patch \| blob \| history
lib/htmlpurifier/HTMLPurifier/Lexer/PH5P.php		patch \| blob \| history
lib/htmlpurifier/HTMLPurifier/Strategy/FixNesting.php		patch \| blob \| history
lib/htmlpurifier/HTMLPurifier/Strategy/MakeWellFormed.php		patch \| blob \| history
lib/htmlpurifier/HTMLPurifier/Strategy/RemoveForeignElements.php		patch \| blob \| history
lib/htmlpurifier/HTMLPurifier/Strategy/ValidateAttributes.php		patch \| blob \| history
lib/htmlpurifier/HTMLPurifier/URIFilter.php		patch \| blob \| history
lib/htmlpurifier/HTMLPurifier/URIFilter/MakeAbsolute.php		patch \| blob \| history
lib/htmlpurifier/readme_moodle.txt		patch \| blob \| history