From: skodak Date: Tue, 21 Aug 2007 22:06:47 +0000 (+0000) Subject: MDL-10937 upgraded html purifier to 2.1.1 X-Git-Url: http://git.mjollnir.org/gw?a=commitdiff_plain;h=5adad31057ff7ab44254cfc446add0b261a74733;p=moodle.git MDL-10937 upgraded html purifier to 2.1.1 --- diff --git a/lib/htmlpurifier/HTMLPurifier.auto.php b/lib/htmlpurifier/HTMLPurifier.auto.php index a66fd2e25d..cb6a84265d 100644 --- a/lib/htmlpurifier/HTMLPurifier.auto.php +++ b/lib/htmlpurifier/HTMLPurifier.auto.php @@ -7,4 +7,3 @@ set_include_path(dirname(__FILE__) . PATH_SEPARATOR . get_include_path() ); require_once 'HTMLPurifier.php'; -?> \ No newline at end of file diff --git a/lib/htmlpurifier/HTMLPurifier.func.php b/lib/htmlpurifier/HTMLPurifier.func.php index 876ad7b298..b8cdacecec 100644 --- a/lib/htmlpurifier/HTMLPurifier.func.php +++ b/lib/htmlpurifier/HTMLPurifier.func.php @@ -18,4 +18,3 @@ function HTMLPurifier($html, $config = null) { return $purifier->purify($html, $config); } -?> \ No newline at end of file diff --git a/lib/htmlpurifier/HTMLPurifier.php b/lib/htmlpurifier/HTMLPurifier.php index 3d538bca1a..fa42e3bb78 100644 --- a/lib/htmlpurifier/HTMLPurifier.php +++ b/lib/htmlpurifier/HTMLPurifier.php @@ -22,7 +22,7 @@ */ /* - HTML Purifier 1.6.1 - Standards Compliant HTML Filtering + HTML Purifier 2.1.1 - Standards Compliant HTML Filtering Copyright (C) 2006 Edward Z. Yang This library is free software; you can redistribute it and/or @@ -40,9 +40,12 @@ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA */ +// constants are slow, but we'll make one exception +define('HTMLPURIFIER_PREFIX', dirname(__FILE__)); + // almost every class has an undocumented dependency to these, so make sure // they get included -require_once 'HTMLPurifier/ConfigSchema.php'; +require_once 'HTMLPurifier/ConfigSchema.php'; // important require_once 'HTMLPurifier/Config.php'; require_once 'HTMLPurifier/Context.php'; @@ -51,6 +54,16 @@ require_once 'HTMLPurifier/Generator.php'; require_once 'HTMLPurifier/Strategy/Core.php'; require_once 'HTMLPurifier/Encoder.php'; +require_once 'HTMLPurifier/ErrorCollector.php'; +require_once 'HTMLPurifier/LanguageFactory.php'; + +HTMLPurifier_ConfigSchema::define( + 'Core', 'CollectErrors', false, 'bool', ' +Whether or not to collect errors found while filtering the document. This +is a useful way to give feedback to your users. CURRENTLY NOT IMPLEMENTED. +This directive has been available since 2.0.0. +'); + /** * Main library execution class. * @@ -64,12 +77,12 @@ require_once 'HTMLPurifier/Encoder.php'; class HTMLPurifier { - var $version = '1.6.1'; + var $version = '2.1.1'; var $config; var $filters; - var $lexer, $strategy, $generator; + var $strategy, $generator; /** * Final HTMLPurifier_Context of last run purification. Might be an array. @@ -89,7 +102,6 @@ class HTMLPurifier $this->config = HTMLPurifier_Config::create($config); - $this->lexer = HTMLPurifier_Lexer::create(); $this->strategy = new HTMLPurifier_Strategy_Core(); $this->generator = new HTMLPurifier_Generator(); @@ -117,7 +129,27 @@ class HTMLPurifier $config = $config ? HTMLPurifier_Config::create($config) : $this->config; + // implementation is partially environment dependant, partially + // configuration dependant + $lexer = HTMLPurifier_Lexer::create($config); + $context = new HTMLPurifier_Context(); + + // our friendly neighborhood generator, all primed with configuration too! + $this->generator->generateFromTokens(array(), $config, $context); + $context->register('Generator', $this->generator); + + // set up global context variables + if ($config->get('Core', 'CollectErrors')) { + // may get moved out if other facilities use it + $language_factory = HTMLPurifier_LanguageFactory::instance(); + $language = $language_factory->create($config, $context); + $context->register('Locale', $language); + + $error_collector = new HTMLPurifier_ErrorCollector($context); + $context->register('ErrorCollector', $error_collector); + } + $html = HTMLPurifier_Encoder::convertToUTF8($html, $config, $context); for ($i = 0, $size = count($this->filters); $i < $size; $i++) { @@ -130,7 +162,7 @@ class HTMLPurifier // list of tokens $this->strategy->execute( // list of un-purified tokens - $this->lexer->tokenizeHTML( + $lexer->tokenizeHTML( // un-purified HTML $html, $config, $context ), @@ -164,7 +196,23 @@ class HTMLPurifier return $array_of_html; } + /** + * Singleton for enforcing just one HTML Purifier in your system + */ + function &getInstance($prototype = null) { + static $htmlpurifier; + if (!$htmlpurifier || $prototype) { + if (is_a($prototype, 'HTMLPurifier')) { + $htmlpurifier = $prototype; + } elseif ($prototype) { + $htmlpurifier = new HTMLPurifier($prototype); + } else { + $htmlpurifier = new HTMLPurifier(); + } + } + return $htmlpurifier; + } + } -?> \ No newline at end of file diff --git a/lib/htmlpurifier/HTMLPurifier/AttrCollections.php b/lib/htmlpurifier/HTMLPurifier/AttrCollections.php index 8318abb15c..0aa55f128f 100644 --- a/lib/htmlpurifier/HTMLPurifier/AttrCollections.php +++ b/lib/htmlpurifier/HTMLPurifier/AttrCollections.php @@ -1,7 +1,6 @@ info; // load extensions from the modules foreach ($modules as $module) { foreach ($module->attr_collections as $coll_i => $coll) { + if (!isset($this->info[$coll_i])) { + $this->info[$coll_i] = array(); + } foreach ($coll as $attr_i => $attr) { - if ($attr_i === 0 && isset($info[$coll_i][$attr_i])) { + if ($attr_i === 0 && isset($this->info[$coll_i][$attr_i])) { // merge in includes - $info[$coll_i][$attr_i] = array_merge( - $info[$coll_i][$attr_i], $attr); + $this->info[$coll_i][$attr_i] = array_merge( + $this->info[$coll_i][$attr_i], $attr); continue; } - $info[$coll_i][$attr_i] = $attr; + $this->info[$coll_i][$attr_i] = $attr; } } } // perform internal expansions and inclusions - foreach ($info as $name => $attr) { + foreach ($this->info as $name => $attr) { // merge attribute collections that include others - $this->performInclusions($info[$name]); + $this->performInclusions($this->info[$name]); // replace string identifiers with actual attribute objects - $this->expandIdentifiers($info[$name], $attr_types); + $this->expandIdentifiers($this->info[$name], $attr_types); } } @@ -57,16 +56,20 @@ class HTMLPurifier_AttrCollections function performInclusions(&$attr) { if (!isset($attr[0])) return; $merge = $attr[0]; + $seen = array(); // recursion guard // loop through all the inclusions for ($i = 0; isset($merge[$i]); $i++) { + if (isset($seen[$merge[$i]])) continue; + $seen[$merge[$i]] = true; // foreach attribute of the inclusion, copy it over + if (!isset($this->info[$merge[$i]])) continue; foreach ($this->info[$merge[$i]] as $key => $value) { if (isset($attr[$key])) continue; // also catches more inclusions $attr[$key] = $value; } - if (isset($info[$merge[$i]][0])) { + if (isset($this->info[$merge[$i]][0])) { // recursion - $merge = array_merge($merge, isset($info[$merge[$i]][0])); + $merge = array_merge($merge, $this->info[$merge[$i]][0]); } } unset($attr[0]); @@ -79,22 +82,48 @@ class HTMLPurifier_AttrCollections * @param $attr_types HTMLPurifier_AttrTypes instance */ function expandIdentifiers(&$attr, $attr_types) { + + // because foreach will process new elements we add, make sure we + // skip duplicates + $processed = array(); + foreach ($attr as $def_i => $def) { + // skip inclusions if ($def_i === 0) continue; - if (!is_string($def)) continue; + + if (isset($processed[$def_i])) continue; + + // determine whether or not attribute is required + if ($required = (strpos($def_i, '*') !== false)) { + // rename the definition + unset($attr[$def_i]); + $def_i = trim($def_i, '*'); + $attr[$def_i] = $def; + } + + $processed[$def_i] = true; + + // if we've already got a literal object, move on + if (is_object($def)) { + // preserve previous required + $attr[$def_i]->required = ($required || $attr[$def_i]->required); + continue; + } + if ($def === false) { unset($attr[$def_i]); continue; } - if (isset($attr_types->info[$def])) { - $attr[$def_i] = $attr_types->info[$def]; + + if ($t = $attr_types->get($def)) { + $attr[$def_i] = $t; + $attr[$def_i]->required = $required; } else { - trigger_error('Attempted to reference undefined attribute type', E_USER_ERROR); unset($attr[$def_i]); } } + } } -?> \ No newline at end of file diff --git a/lib/htmlpurifier/HTMLPurifier/AttrDef.php b/lib/htmlpurifier/HTMLPurifier/AttrDef.php index 334a7acedd..882b626043 100644 --- a/lib/htmlpurifier/HTMLPurifier/AttrDef.php +++ b/lib/htmlpurifier/HTMLPurifier/AttrDef.php @@ -14,11 +14,17 @@ class HTMLPurifier_AttrDef { /** - * Tells us whether or not an HTML attribute is minimized. Only the - * boolean attribute vapourware would use this. + * Tells us whether or not an HTML attribute is minimized. Has no + * meaning in other contexts. */ var $minimized = false; + /** + * Tells us whether or not an HTML attribute is required. Has no + * meaning in other contexts + */ + var $required = false; + /** * Validates and cleans passed string according to a definition. * @@ -62,6 +68,19 @@ class HTMLPurifier_AttrDef $string = str_replace(array("\r", "\t"), ' ', $string); return $string; } + + /** + * Factory method for creating this class from a string. + * @param $string String construction info + * @return Created AttrDef object corresponding to $string + * @public + */ + function make($string) { + // default implementation, return flyweight of this object + // if overloaded, it is *necessary* for you to clone the + // object (usually by instantiating a new copy) and return that + return $this; + } + } -?> \ No newline at end of file diff --git a/lib/htmlpurifier/HTMLPurifier/AttrDef/CSS.php b/lib/htmlpurifier/HTMLPurifier/AttrDef/CSS.php index 220ec0d0d1..d0f49bc4ad 100644 --- a/lib/htmlpurifier/HTMLPurifier/AttrDef/CSS.php +++ b/lib/htmlpurifier/HTMLPurifier/AttrDef/CSS.php @@ -66,4 +66,3 @@ class HTMLPurifier_AttrDef_CSS extends HTMLPurifier_AttrDef } -?> \ No newline at end of file diff --git a/lib/htmlpurifier/HTMLPurifier/AttrDef/CSS/Background.php b/lib/htmlpurifier/HTMLPurifier/AttrDef/CSS/Background.php index 42d8bcf0e6..b82e98e581 100644 --- a/lib/htmlpurifier/HTMLPurifier/AttrDef/CSS/Background.php +++ b/lib/htmlpurifier/HTMLPurifier/AttrDef/CSS/Background.php @@ -84,4 +84,3 @@ class HTMLPurifier_AttrDef_CSS_Background extends HTMLPurifier_AttrDef } -?> \ No newline at end of file diff --git a/lib/htmlpurifier/HTMLPurifier/AttrDef/CSS/BackgroundPosition.php b/lib/htmlpurifier/HTMLPurifier/AttrDef/CSS/BackgroundPosition.php index 77a3ddd6e3..0d10ab681d 100644 --- a/lib/htmlpurifier/HTMLPurifier/AttrDef/CSS/BackgroundPosition.php +++ b/lib/htmlpurifier/HTMLPurifier/AttrDef/CSS/BackgroundPosition.php @@ -127,4 +127,3 @@ class HTMLPurifier_AttrDef_CSS_BackgroundPosition extends HTMLPurifier_AttrDef } -?> \ No newline at end of file diff --git a/lib/htmlpurifier/HTMLPurifier/AttrDef/CSS/Border.php b/lib/htmlpurifier/HTMLPurifier/AttrDef/CSS/Border.php index 583f14fd09..f6d4d684e3 100644 --- a/lib/htmlpurifier/HTMLPurifier/AttrDef/CSS/Border.php +++ b/lib/htmlpurifier/HTMLPurifier/AttrDef/CSS/Border.php @@ -42,4 +42,3 @@ class HTMLPurifier_AttrDef_CSS_Border extends HTMLPurifier_AttrDef } -?> \ No newline at end of file diff --git a/lib/htmlpurifier/HTMLPurifier/AttrDef/CSS/Color.php b/lib/htmlpurifier/HTMLPurifier/AttrDef/CSS/Color.php index 4e6a78acf8..30b38f9293 100644 --- a/lib/htmlpurifier/HTMLPurifier/AttrDef/CSS/Color.php +++ b/lib/htmlpurifier/HTMLPurifier/AttrDef/CSS/Color.php @@ -2,43 +2,47 @@ require_once 'HTMLPurifier/AttrDef.php'; -/** - * Validates Color as defined by CSS. - */ -class HTMLPurifier_AttrDef_CSS_Color extends HTMLPurifier_AttrDef -{ - - /** - * Color keyword lookup table. - * @todo Extend it to include all usually allowed colors. - */ - var $colors = array( +HTMLPurifier_ConfigSchema::define( + 'Core', 'ColorKeywords', array( 'maroon' => '#800000', - 'red' => '#F00', + 'red' => '#FF0000', 'orange' => '#FFA500', - 'yellow' => '#FF0', + 'yellow' => '#FFFF00', 'olive' => '#808000', 'purple' => '#800080', - 'fuchsia' => '#F0F', - 'white' => '#FFF', - 'lime' => '#0F0', + 'fuchsia' => '#FF00FF', + 'white' => '#FFFFFF', + 'lime' => '#00FF00', 'green' => '#008000', 'navy' => '#000080', - 'blue' => '#00F', - 'aqua' => '#0FF', + 'blue' => '#0000FF', + 'aqua' => '#00FFFF', 'teal' => '#008080', - 'black' => '#000', + 'black' => '#000000', 'silver' => '#C0C0C0', 'gray' => '#808080' - ); + ), 'hash', ' +Lookup array of color names to six digit hexadecimal number corresponding +to color, with preceding hash mark. Used when parsing colors. +This directive has been available since 2.0.0. +'); + +/** + * Validates Color as defined by CSS. + */ +class HTMLPurifier_AttrDef_CSS_Color extends HTMLPurifier_AttrDef +{ function validate($color, $config, &$context) { + static $colors = null; + if ($colors === null) $colors = $config->get('Core', 'ColorKeywords'); + $color = trim($color); if (!$color) return false; $lower = strtolower($color); - if (isset($this->colors[$lower])) return $this->colors[$lower]; + if (isset($colors[$lower])) return $colors[$lower]; if ($color[0] === '#') { // hexadecimal handling @@ -94,4 +98,3 @@ class HTMLPurifier_AttrDef_CSS_Color extends HTMLPurifier_AttrDef } -?> \ No newline at end of file diff --git a/lib/htmlpurifier/HTMLPurifier/AttrDef/CSS/Composite.php b/lib/htmlpurifier/HTMLPurifier/AttrDef/CSS/Composite.php index 9d2803d26c..44ad542153 100644 --- a/lib/htmlpurifier/HTMLPurifier/AttrDef/CSS/Composite.php +++ b/lib/htmlpurifier/HTMLPurifier/AttrDef/CSS/Composite.php @@ -35,4 +35,3 @@ class HTMLPurifier_AttrDef_CSS_Composite extends HTMLPurifier_AttrDef } -?> \ No newline at end of file diff --git a/lib/htmlpurifier/HTMLPurifier/AttrDef/CSS/Font.php b/lib/htmlpurifier/HTMLPurifier/AttrDef/CSS/Font.php index 1b3b090503..6ce18efb80 100644 --- a/lib/htmlpurifier/HTMLPurifier/AttrDef/CSS/Font.php +++ b/lib/htmlpurifier/HTMLPurifier/AttrDef/CSS/Font.php @@ -18,18 +18,6 @@ class HTMLPurifier_AttrDef_CSS_Font extends HTMLPurifier_AttrDef */ var $info = array(); - /** - * System font keywords. - */ - var $system_fonts = array( - 'caption' => true, - 'icon' => true, - 'menu' => true, - 'message-box' => true, - 'small-caption' => true, - 'status-bar' => true - ); - function HTMLPurifier_AttrDef_CSS_Font($config) { $def = $config->getCSSDefinition(); $this->info['font-style'] = $def->info['font-style']; @@ -42,13 +30,22 @@ class HTMLPurifier_AttrDef_CSS_Font extends HTMLPurifier_AttrDef function validate($string, $config, &$context) { + static $system_fonts = array( + 'caption' => true, + 'icon' => true, + 'menu' => true, + 'message-box' => true, + 'small-caption' => true, + 'status-bar' => true + ); + // regular pre-processing $string = $this->parseCDATA($string); if ($string === '') return false; // check if it's one of the keywords $lowercase_string = strtolower($string); - if (isset($this->system_fonts[$lowercase_string])) { + if (isset($system_fonts[$lowercase_string])) { return $lowercase_string; } @@ -151,4 +148,3 @@ class HTMLPurifier_AttrDef_CSS_Font extends HTMLPurifier_AttrDef } -?> \ No newline at end of file diff --git a/lib/htmlpurifier/HTMLPurifier/AttrDef/CSS/FontFamily.php b/lib/htmlpurifier/HTMLPurifier/AttrDef/CSS/FontFamily.php index 15cbbf3995..dfd89b9584 100644 --- a/lib/htmlpurifier/HTMLPurifier/AttrDef/CSS/FontFamily.php +++ b/lib/htmlpurifier/HTMLPurifier/AttrDef/CSS/FontFamily.php @@ -10,19 +10,15 @@ require_once 'HTMLPurifier/AttrDef.php'; class HTMLPurifier_AttrDef_CSS_FontFamily extends HTMLPurifier_AttrDef { - /** - * Generic font family keywords. - * @protected - */ - var $generic_names = array( - 'serif' => true, - 'sans-serif' => true, - 'monospace' => true, - 'fantasy' => true, - 'cursive' => true - ); - function validate($string, $config, &$context) { + static $generic_names = array( + 'serif' => true, + 'sans-serif' => true, + 'monospace' => true, + 'fantasy' => true, + 'cursive' => true + ); + $string = $this->parseCDATA($string); // assume that no font names contain commas in them $fonts = explode(',', $string); @@ -31,7 +27,7 @@ class HTMLPurifier_AttrDef_CSS_FontFamily extends HTMLPurifier_AttrDef $font = trim($font); if ($font === '') continue; // match a generic name - if (isset($this->generic_names[$font])) { + if (isset($generic_names[$font])) { $final .= $font . ', '; continue; } @@ -42,19 +38,24 @@ class HTMLPurifier_AttrDef_CSS_FontFamily extends HTMLPurifier_AttrDef $quote = $font[0]; if ($font[$length - 1] !== $quote) continue; $font = substr($font, 1, $length - 2); + // double-backslash processing is buggy + $font = str_replace("\\$quote", $quote, $font); // de-escape quote + $font = str_replace("\\\n", "\n", $font); // de-escape newlines } - // process font + // $font is a pure representation of the font name + if (ctype_alnum($font)) { // very simple font, allow it in unharmed $final .= $font . ', '; continue; } - $nospace = str_replace(array(' ', '.', '!'), '', $font); - if (ctype_alnum($nospace)) { - // font with spaces in it - $final .= "'$font', "; - continue; - } + + // complicated font, requires quoting + + // armor single quotes and new lines + $font = str_replace("'", "\\'", $font); + $font = str_replace("\n", "\\\n", $font); + $final .= "'$font', "; } $final = rtrim($final, ', '); if ($final === '') return false; @@ -63,4 +64,3 @@ class HTMLPurifier_AttrDef_CSS_FontFamily extends HTMLPurifier_AttrDef } -?> \ No newline at end of file diff --git a/lib/htmlpurifier/HTMLPurifier/AttrDef/CSS/Length.php b/lib/htmlpurifier/HTMLPurifier/AttrDef/CSS/Length.php index 7da26a8f6b..095eaade3f 100644 --- a/lib/htmlpurifier/HTMLPurifier/AttrDef/CSS/Length.php +++ b/lib/htmlpurifier/HTMLPurifier/AttrDef/CSS/Length.php @@ -53,4 +53,3 @@ class HTMLPurifier_AttrDef_CSS_Length extends HTMLPurifier_AttrDef } -?> \ No newline at end of file diff --git a/lib/htmlpurifier/HTMLPurifier/AttrDef/CSS/ListStyle.php b/lib/htmlpurifier/HTMLPurifier/AttrDef/CSS/ListStyle.php index 2d2ed12da6..a89d679274 100644 --- a/lib/htmlpurifier/HTMLPurifier/AttrDef/CSS/ListStyle.php +++ b/lib/htmlpurifier/HTMLPurifier/AttrDef/CSS/ListStyle.php @@ -77,4 +77,3 @@ class HTMLPurifier_AttrDef_CSS_ListStyle extends HTMLPurifier_AttrDef } -?> \ No newline at end of file diff --git a/lib/htmlpurifier/HTMLPurifier/AttrDef/CSS/Multiple.php b/lib/htmlpurifier/HTMLPurifier/AttrDef/CSS/Multiple.php index 0d1c840615..9a818d108a 100644 --- a/lib/htmlpurifier/HTMLPurifier/AttrDef/CSS/Multiple.php +++ b/lib/htmlpurifier/HTMLPurifier/AttrDef/CSS/Multiple.php @@ -55,4 +55,3 @@ class HTMLPurifier_AttrDef_CSS_Multiple extends HTMLPurifier_AttrDef } -?> \ No newline at end of file diff --git a/lib/htmlpurifier/HTMLPurifier/AttrDef/CSS/Number.php b/lib/htmlpurifier/HTMLPurifier/AttrDef/CSS/Number.php index 48f1335ac8..4f22f82907 100644 --- a/lib/htmlpurifier/HTMLPurifier/AttrDef/CSS/Number.php +++ b/lib/htmlpurifier/HTMLPurifier/AttrDef/CSS/Number.php @@ -58,4 +58,3 @@ class HTMLPurifier_AttrDef_CSS_Number extends HTMLPurifier_AttrDef } -?> \ No newline at end of file diff --git a/lib/htmlpurifier/HTMLPurifier/AttrDef/CSS/Percentage.php b/lib/htmlpurifier/HTMLPurifier/AttrDef/CSS/Percentage.php index cc96f15d8c..4625bde6d7 100644 --- a/lib/htmlpurifier/HTMLPurifier/AttrDef/CSS/Percentage.php +++ b/lib/htmlpurifier/HTMLPurifier/AttrDef/CSS/Percentage.php @@ -40,4 +40,3 @@ class HTMLPurifier_AttrDef_CSS_Percentage extends HTMLPurifier_AttrDef } -?> \ No newline at end of file diff --git a/lib/htmlpurifier/HTMLPurifier/AttrDef/CSS/TextDecoration.php b/lib/htmlpurifier/HTMLPurifier/AttrDef/CSS/TextDecoration.php index 294dd83077..501ab2616f 100644 --- a/lib/htmlpurifier/HTMLPurifier/AttrDef/CSS/TextDecoration.php +++ b/lib/htmlpurifier/HTMLPurifier/AttrDef/CSS/TextDecoration.php @@ -10,23 +10,19 @@ require_once 'HTMLPurifier/AttrDef.php'; class HTMLPurifier_AttrDef_CSS_TextDecoration extends HTMLPurifier_AttrDef { - /** - * Lookup table of allowed values. - * @protected - */ - var $allowed_values = array( - 'line-through' => true, - 'overline' => true, - 'underline' => true - ); - function validate($string, $config, &$context) { + static $allowed_values = array( + 'line-through' => true, + 'overline' => true, + 'underline' => true + ); + $string = strtolower($this->parseCDATA($string)); $parts = explode(' ', $string); $final = ''; foreach ($parts as $part) { - if (isset($this->allowed_values[$part])) { + if (isset($allowed_values[$part])) { $final .= $part . ' '; } } @@ -38,4 +34,3 @@ class HTMLPurifier_AttrDef_CSS_TextDecoration extends HTMLPurifier_AttrDef } -?> \ No newline at end of file diff --git a/lib/htmlpurifier/HTMLPurifier/AttrDef/CSS/URI.php b/lib/htmlpurifier/HTMLPurifier/AttrDef/CSS/URI.php index b310907cd5..b71a858572 100644 --- a/lib/htmlpurifier/HTMLPurifier/AttrDef/CSS/URI.php +++ b/lib/htmlpurifier/HTMLPurifier/AttrDef/CSS/URI.php @@ -15,7 +15,7 @@ class HTMLPurifier_AttrDef_CSS_URI extends HTMLPurifier_AttrDef_URI { function HTMLPurifier_AttrDef_CSS_URI() { - $this->HTMLPurifier_AttrDef_URI(true); // always embedded + parent::HTMLPurifier_AttrDef_URI(true); // always embedded } function validate($uri_string, $config, &$context) { @@ -29,7 +29,7 @@ class HTMLPurifier_AttrDef_CSS_URI extends HTMLPurifier_AttrDef_URI if ($uri_string[$new_length] != ')') return false; $uri = trim(substr($uri_string, 0, $new_length)); - if (isset($uri[0]) && ($uri[0] == "'" || $uri[0] == '"')) { + if (!empty($uri) && ($uri[0] == "'" || $uri[0] == '"')) { $quote = $uri[0]; $new_length = strlen($uri) - 1; if ($uri[$new_length] !== $quote) return false; @@ -55,4 +55,3 @@ class HTMLPurifier_AttrDef_CSS_URI extends HTMLPurifier_AttrDef_URI } -?> \ No newline at end of file diff --git a/lib/htmlpurifier/HTMLPurifier/AttrDef/Enum.php b/lib/htmlpurifier/HTMLPurifier/AttrDef/Enum.php index 91a075f87a..011adda8bd 100644 --- a/lib/htmlpurifier/HTMLPurifier/AttrDef/Enum.php +++ b/lib/htmlpurifier/HTMLPurifier/AttrDef/Enum.php @@ -45,6 +45,21 @@ class HTMLPurifier_AttrDef_Enum extends HTMLPurifier_AttrDef return $result ? $string : false; } + /** + * @param $string In form of comma-delimited list of case-insensitive + * valid values. Example: "foo,bar,baz". Prepend "s:" to make + * case sensitive + */ + function make($string) { + if (strlen($string) > 2 && $string[0] == 's' && $string[1] == ':') { + $string = substr($string, 2); + $sensitive = true; + } else { + $sensitive = false; + } + $values = explode(',', $string); + return new HTMLPurifier_AttrDef_Enum($values, $sensitive); + } + } -?> \ No newline at end of file diff --git a/lib/htmlpurifier/HTMLPurifier/AttrDef/HTML/Bool.php b/lib/htmlpurifier/HTMLPurifier/AttrDef/HTML/Bool.php new file mode 100644 index 0000000000..ff6f0a8649 --- /dev/null +++ b/lib/htmlpurifier/HTMLPurifier/AttrDef/HTML/Bool.php @@ -0,0 +1,29 @@ +name = $name;} + + function validate($string, $config, &$context) { + if (empty($string)) return false; + return $this->name; + } + + /** + * @param $string Name of attribute + */ + function make($string) { + return new HTMLPurifier_AttrDef_HTML_Bool($string); + } + +} + diff --git a/lib/htmlpurifier/HTMLPurifier/AttrDef/HTML/Color.php b/lib/htmlpurifier/HTMLPurifier/AttrDef/HTML/Color.php new file mode 100644 index 0000000000..d6fa6749fb --- /dev/null +++ b/lib/htmlpurifier/HTMLPurifier/AttrDef/HTML/Color.php @@ -0,0 +1,34 @@ +get('Core', 'ColorKeywords'); + + $string = trim($string); + + if (empty($string)) return false; + if (isset($colors[$string])) return $colors[$string]; + if ($string[0] === '#') $hex = substr($string, 1); + else $hex = $string; + + $length = strlen($hex); + if ($length !== 3 && $length !== 6) return false; + if (!ctype_xdigit($hex)) return false; + if ($length === 3) $hex = $hex[0].$hex[0].$hex[1].$hex[1].$hex[2].$hex[2]; + + return "#$hex"; + + } + +} + diff --git a/lib/htmlpurifier/HTMLPurifier/AttrDef/HTML/FrameTarget.php b/lib/htmlpurifier/HTMLPurifier/AttrDef/HTML/FrameTarget.php index 5893bbfa0a..fdca8cb2ce 100644 --- a/lib/htmlpurifier/HTMLPurifier/AttrDef/HTML/FrameTarget.php +++ b/lib/htmlpurifier/HTMLPurifier/AttrDef/HTML/FrameTarget.php @@ -31,4 +31,3 @@ class HTMLPurifier_AttrDef_HTML_FrameTarget extends HTMLPurifier_AttrDef_Enum } -?> \ No newline at end of file diff --git a/lib/htmlpurifier/HTMLPurifier/AttrDef/HTML/ID.php b/lib/htmlpurifier/HTMLPurifier/AttrDef/HTML/ID.php index c8bf29913c..641749cea0 100644 --- a/lib/htmlpurifier/HTMLPurifier/AttrDef/HTML/ID.php +++ b/lib/htmlpurifier/HTMLPurifier/AttrDef/HTML/ID.php @@ -118,4 +118,3 @@ class HTMLPurifier_AttrDef_HTML_ID extends HTMLPurifier_AttrDef } -?> \ No newline at end of file diff --git a/lib/htmlpurifier/HTMLPurifier/AttrDef/HTML/Length.php b/lib/htmlpurifier/HTMLPurifier/AttrDef/HTML/Length.php index ac83295a03..c4f98436ae 100644 --- a/lib/htmlpurifier/HTMLPurifier/AttrDef/HTML/Length.php +++ b/lib/htmlpurifier/HTMLPurifier/AttrDef/HTML/Length.php @@ -41,4 +41,3 @@ class HTMLPurifier_AttrDef_HTML_Length extends HTMLPurifier_AttrDef_HTML_Pixels } -?> \ No newline at end of file diff --git a/lib/htmlpurifier/HTMLPurifier/AttrDef/HTML/LinkTypes.php b/lib/htmlpurifier/HTMLPurifier/AttrDef/HTML/LinkTypes.php index 94a47ba92e..1122f0c8bd 100644 --- a/lib/htmlpurifier/HTMLPurifier/AttrDef/HTML/LinkTypes.php +++ b/lib/htmlpurifier/HTMLPurifier/AttrDef/HTML/LinkTypes.php @@ -26,22 +26,20 @@ HTMLPurifier_ConfigSchema::define( class HTMLPurifier_AttrDef_HTML_LinkTypes extends HTMLPurifier_AttrDef { - /** Lookup array of attribute names to configuration name */ - var $configLookup = array( - 'rel' => 'AllowedRel', - 'rev' => 'AllowedRev' - ); - /** Name config attribute to pull. */ var $name; function HTMLPurifier_AttrDef_HTML_LinkTypes($name) { - if (!isset($this->configLookup[$name])) { + $configLookup = array( + 'rel' => 'AllowedRel', + 'rev' => 'AllowedRev' + ); + if (!isset($configLookup[$name])) { trigger_error('Unrecognized attribute name for link '. 'relationship.', E_USER_ERROR); return; } - $this->name = $this->configLookup[$name]; + $this->name = $configLookup[$name]; } function validate($string, $config, &$context) { @@ -72,4 +70,3 @@ class HTMLPurifier_AttrDef_HTML_LinkTypes extends HTMLPurifier_AttrDef } -?> \ No newline at end of file diff --git a/lib/htmlpurifier/HTMLPurifier/AttrDef/HTML/MultiLength.php b/lib/htmlpurifier/HTMLPurifier/AttrDef/HTML/MultiLength.php index f50259b6fd..4c0c88c8ba 100644 --- a/lib/htmlpurifier/HTMLPurifier/AttrDef/HTML/MultiLength.php +++ b/lib/htmlpurifier/HTMLPurifier/AttrDef/HTML/MultiLength.php @@ -41,4 +41,3 @@ class HTMLPurifier_AttrDef_HTML_MultiLength extends HTMLPurifier_AttrDef_HTML_Le } -?> \ No newline at end of file diff --git a/lib/htmlpurifier/HTMLPurifier/AttrDef/HTML/Nmtokens.php b/lib/htmlpurifier/HTMLPurifier/AttrDef/HTML/Nmtokens.php index 1eaeaa7e6a..6e58a80d18 100644 --- a/lib/htmlpurifier/HTMLPurifier/AttrDef/HTML/Nmtokens.php +++ b/lib/htmlpurifier/HTMLPurifier/AttrDef/HTML/Nmtokens.php @@ -48,4 +48,3 @@ class HTMLPurifier_AttrDef_HTML_Nmtokens extends HTMLPurifier_AttrDef } -?> \ No newline at end of file diff --git a/lib/htmlpurifier/HTMLPurifier/AttrDef/HTML/Pixels.php b/lib/htmlpurifier/HTMLPurifier/AttrDef/HTML/Pixels.php index 4c29091254..38bc7d684e 100644 --- a/lib/htmlpurifier/HTMLPurifier/AttrDef/HTML/Pixels.php +++ b/lib/htmlpurifier/HTMLPurifier/AttrDef/HTML/Pixels.php @@ -34,4 +34,3 @@ class HTMLPurifier_AttrDef_HTML_Pixels extends HTMLPurifier_AttrDef } -?> \ No newline at end of file diff --git a/lib/htmlpurifier/HTMLPurifier/AttrDef/Integer.php b/lib/htmlpurifier/HTMLPurifier/AttrDef/Integer.php index d6953d6165..822ed67708 100644 --- a/lib/htmlpurifier/HTMLPurifier/AttrDef/Integer.php +++ b/lib/htmlpurifier/HTMLPurifier/AttrDef/Integer.php @@ -72,4 +72,3 @@ class HTMLPurifier_AttrDef_Integer extends HTMLPurifier_AttrDef } -?> \ No newline at end of file diff --git a/lib/htmlpurifier/HTMLPurifier/AttrDef/Lang.php b/lib/htmlpurifier/HTMLPurifier/AttrDef/Lang.php index 72d67f643c..4fbd9f67a2 100644 --- a/lib/htmlpurifier/HTMLPurifier/AttrDef/Lang.php +++ b/lib/htmlpurifier/HTMLPurifier/AttrDef/Lang.php @@ -72,4 +72,3 @@ class HTMLPurifier_AttrDef_Lang extends HTMLPurifier_AttrDef } -?> \ No newline at end of file diff --git a/lib/htmlpurifier/HTMLPurifier/AttrDef/Text.php b/lib/htmlpurifier/HTMLPurifier/AttrDef/Text.php index eb2a24a711..c06346649b 100644 --- a/lib/htmlpurifier/HTMLPurifier/AttrDef/Text.php +++ b/lib/htmlpurifier/HTMLPurifier/AttrDef/Text.php @@ -14,4 +14,3 @@ class HTMLPurifier_AttrDef_Text extends HTMLPurifier_AttrDef } -?> \ No newline at end of file diff --git a/lib/htmlpurifier/HTMLPurifier/AttrDef/URI.php b/lib/htmlpurifier/HTMLPurifier/AttrDef/URI.php index 7102718136..dcf9849c45 100644 --- a/lib/htmlpurifier/HTMLPurifier/AttrDef/URI.php +++ b/lib/htmlpurifier/HTMLPurifier/AttrDef/URI.php @@ -1,89 +1,64 @@ + Munges all browsable (usually http, https and ftp) + absolute URI\'s into another URI, usually a URI redirection service. + This directive accepts a URI, formatted with a %s where + the url-encoded original URI should be inserted (sample: + http://www.google.com/url?q=%s). +

+

+ Uses for this directive: +

+ +

+ This directive has been available since 1.3.0. +

+'); -HTMLPurifier_ConfigSchema::define( - 'URI', 'Munge', null, 'string/null', - 'Munges all browsable (usually http, https and ftp) URI\'s into some URL '. - 'redirection service. Pass this directive a URI, with %s inserted where '. - 'the url-encoded original URI should be inserted (sample: '. - 'http://www.google.com/url?q=%s). '. - 'This prevents PageRank leaks, while being as transparent as possible '. - 'to users (you may also want to add some client side JavaScript to '. - 'override the text in the statusbar). Warning: many security experts '. - 'believe that this form of protection does not deter spam-bots. '. - 'You can also use this directive to redirect users to a splash page '. - 'telling them they are leaving your website. '. - 'This directive has been available since 1.3.0.' -); +// disabling directives HTMLPurifier_ConfigSchema::define( - 'URI', 'HostBlacklist', array(), 'list', - 'List of strings that are forbidden in the host of any URI. Use it to '. - 'kill domain names of spam, etc. Note that it will catch anything in '. - 'the domain, so moo.com will catch moo.com.example.com. '. - 'This directive has been available since 1.3.0.' -); + 'URI', 'Disable', false, 'bool', ' +

+ Disables all URIs in all forms. Not sure why you\'d want to do that + (after all, the Internet\'s founded on the notion of a hyperlink). + This directive has been available since 1.3.0. +

+'); +HTMLPurifier_ConfigSchema::defineAlias('Attr', 'DisableURI', 'URI', 'Disable'); HTMLPurifier_ConfigSchema::define( - 'URI', 'Disable', false, 'bool', - 'Disables all URIs in all forms. Not sure why you\'d want to do that '. - '(after all, the Internet\'s founded on the notion of a hyperlink). '. - 'This directive has been available since 1.3.0.' -); -HTMLPurifier_ConfigSchema::defineAlias('Attr', 'DisableURI', 'URI', 'Disable'); + 'URI', 'DisableResources', false, 'bool', ' +

+ Disables embedding resources, essentially meaning no pictures. You can + still link to them though. See %URI.DisableExternalResources for why + this might be a good idea. This directive has been available since 1.3.0. +

+'); /** * Validates a URI as defined by RFC 3986. @@ -92,205 +67,83 @@ HTMLPurifier_ConfigSchema::defineAlias('Attr', 'DisableURI', 'URI', 'Disable'); class HTMLPurifier_AttrDef_URI extends HTMLPurifier_AttrDef { - var $host; - var $PercentEncoder; - var $embeds_resource; + var $parser, $percentEncoder; + var $embedsResource; /** * @param $embeds_resource_resource Does the URI here result in an extra HTTP request? */ function HTMLPurifier_AttrDef_URI($embeds_resource = false) { - $this->host = new HTMLPurifier_AttrDef_URI_Host(); - $this->PercentEncoder = new HTMLPurifier_PercentEncoder(); - $this->embeds_resource = (bool) $embeds_resource; + $this->parser = new HTMLPurifier_URIParser(); + $this->percentEncoder = new HTMLPurifier_PercentEncoder(); + $this->embedsResource = (bool) $embeds_resource; } function validate($uri, $config, &$context) { - // We'll write stack-based parsers later, for now, use regexps to - // get things working as fast as possible (irony) - if ($config->get('URI', 'Disable')) return false; - // parse as CDATA + // initial operations $uri = $this->parseCDATA($uri); + $uri = $this->percentEncoder->normalize($uri); - // fix up percent-encoding - $uri = $this->PercentEncoder->normalize($uri); - - // while it would be nice to use parse_url(), that's specifically - // for HTTP and thus won't work for our generic URI parsing + // parse the URI + $uri = $this->parser->parse($uri); + if ($uri === false) return false; - // according to the RFC... (but this cuts corners, i.e. non-validating) - $r_URI = '!'. - '(([^:/?#<>\'"]+):)?'. // 2. Scheme - '(//([^/?#<>\'"]*))?'. // 4. Authority - '([^?#<>\'"]*)'. // 5. Path - '(\?([^#<>\'"]*))?'. // 7. Query - '(#([^<>\'"]*))?'. // 8. Fragment - '!'; + // add embedded flag to context for validators + $context->register('EmbeddedURI', $this->embedsResource); - $matches = array(); - $result = preg_match($r_URI, $uri, $matches); - - if (!$result) return false; // invalid URI - - // seperate out parts - $scheme = !empty($matches[1]) ? $matches[2] : null; - $authority = !empty($matches[3]) ? $matches[4] : null; - $path = $matches[5]; // always present, can be empty - $query = !empty($matches[6]) ? $matches[7] : null; - $fragment = !empty($matches[8]) ? $matches[9] : null; - - - - $registry =& HTMLPurifier_URISchemeRegistry::instance(); - if ($scheme !== null) { - // no need to validate the scheme's fmt since we do that when we - // retrieve the specific scheme object from the registry - $scheme = ctype_lower($scheme) ? $scheme : strtolower($scheme); - $scheme_obj = $registry->getScheme($scheme, $config, $context); - if (!$scheme_obj) return false; // invalid scheme, clean it out - } else { - $scheme_obj = $registry->getScheme( - $config->get('URI', 'DefaultScheme'), $config, $context - ); - } - - - // the URI we're processing embeds_resource a resource in the page, but the URI - // it references cannot be located - if ($this->embeds_resource && !$scheme_obj->browsable) { - return false; - } - - - if ($authority !== null) { + $ok = false; + do { - // remove URI if it's absolute and we disabled externals or - // if it's absolute and embedded and we disabled external resources - unset($our_host); - if ( - $config->get('URI', 'DisableExternal') || - ( - $config->get('URI', 'DisableExternalResources') && - $this->embeds_resource - ) - ) { - $our_host = $config->get('URI', 'Host'); - if ($our_host === null) return false; - } + // generic validation + $result = $uri->validate($config, $context); + if (!$result) break; - $HEXDIG = '[A-Fa-f0-9]'; - $unreserved = 'A-Za-z0-9-._~'; // make sure you wrap with [] - $sub_delims = '!$&\'()'; // needs [] - $pct_encoded = "%$HEXDIG$HEXDIG"; - $r_userinfo = "(?:[$unreserved$sub_delims:]|$pct_encoded)*"; - $r_authority = "/^(($r_userinfo)@)?(\[[^\]]+\]|[^:]*)(:(\d*))?/"; - $matches = array(); - preg_match($r_authority, $authority, $matches); - // overloads regexp! - $userinfo = !empty($matches[1]) ? $matches[2] : null; - $host = !empty($matches[3]) ? $matches[3] : null; - $port = !empty($matches[4]) ? $matches[5] : null; + // chained validation + $uri_def =& $config->getDefinition('URI'); + $result = $uri_def->filter($uri, $config, $context); + if (!$result) break; - // validate port - if ($port !== null) { - $port = (int) $port; - if ($port < 1 || $port > 65535) $port = null; - } - - $host = $this->host->validate($host, $config, $context); - if ($host === false) $host = null; - - if ($this->checkBlacklist($host, $config, $context)) return false; + // scheme-specific validation + $scheme_obj = $uri->getSchemeObj($config, $context); + if (!$scheme_obj) break; + if ($this->embedsResource && !$scheme_obj->browsable) break; + $result = $scheme_obj->validate($uri, $config, $context); + if (!$result) break; - // more lenient absolute checking - if (isset($our_host)) { - $host_parts = array_reverse(explode('.', $host)); - // could be cached - $our_host_parts = array_reverse(explode('.', $our_host)); - foreach ($our_host_parts as $i => $discard) { - if (!isset($host_parts[$i])) return false; - if ($host_parts[$i] != $our_host_parts[$i]) return false; - } - } - - // userinfo and host are validated within the regexp + // survived gauntlet + $ok = true; - } else { - $port = $host = $userinfo = null; - } - - - // query and fragment are quite simple in terms of definition: - // *( pchar / "/" / "?" ), so define their validation routines - // when we start fixing percent encoding - - - - // path gets to be validated against a hodge-podge of rules depending - // on the status of authority and scheme, but it's not that important, - // esp. since it won't be applicable to everyone - + } while (false); + $context->destroy('EmbeddedURI'); + if (!$ok) return false; - // okay, now we defer execution to the subobject for more processing - // note that $fragment is omitted - list($userinfo, $host, $port, $path, $query) = - $scheme_obj->validateComponents( - $userinfo, $host, $port, $path, $query, $config, $context - ); - - - // reconstruct authority - $authority = null; - if (!is_null($userinfo) || !is_null($host) || !is_null($port)) { - $authority = ''; - if($userinfo !== null) $authority .= $userinfo . '@'; - $authority .= $host; - if($port !== null) $authority .= ':' . $port; + // munge scheme off if necessary (this must be last) + if (!is_null($uri->scheme) && is_null($uri->host)) { + if ($uri_def->defaultScheme == $uri->scheme) { + $uri->scheme = null; + } } - // reconstruct the result - $result = ''; - if ($scheme !== null) $result .= "$scheme:"; - if ($authority !== null) $result .= "//$authority"; - $result .= $path; - if ($query !== null) $result .= "?$query"; - if ($fragment !== null) $result .= "#$fragment"; + // back to string + $result = $uri->toString(); - // munge if necessary - $munge = $config->get('URI', 'Munge'); - if (!empty($scheme_obj->browsable) && $munge !== null) { - if ($authority !== null) { - $result = str_replace('%s', rawurlencode($result), $munge); - } + // munge entire URI if necessary + if ( + !is_null($uri->host) && // indicator for authority + !empty($scheme_obj->browsable) && + !is_null($munge = $config->get('URI', 'Munge')) + ) { + $result = str_replace('%s', rawurlencode($result), $munge); } return $result; } - /** - * Checks a host against an array blacklist - * @param $host Host to check - * @param $config HTMLPurifier_Config instance - * @param $context HTMLPurifier_Context instance - * @return bool Is spam? - */ - function checkBlacklist($host, &$config, &$context) { - $blacklist = $config->get('URI', 'HostBlacklist'); - if (!empty($blacklist)) { - foreach($blacklist as $blacklisted_host_fragment) { - if (strpos($host, $blacklisted_host_fragment) !== false) { - return true; - } - } - } - return false; - } - } -?> + diff --git a/lib/htmlpurifier/HTMLPurifier/AttrDef/URI/Email.php b/lib/htmlpurifier/HTMLPurifier/AttrDef/URI/Email.php index 80b8d367e1..5a7085db7a 100644 --- a/lib/htmlpurifier/HTMLPurifier/AttrDef/URI/Email.php +++ b/lib/htmlpurifier/HTMLPurifier/AttrDef/URI/Email.php @@ -14,4 +14,3 @@ class HTMLPurifier_AttrDef_URI_Email extends HTMLPurifier_AttrDef } -?> \ No newline at end of file diff --git a/lib/htmlpurifier/HTMLPurifier/AttrDef/URI/Email/SimpleCheck.php b/lib/htmlpurifier/HTMLPurifier/AttrDef/URI/Email/SimpleCheck.php index e35b1b4b28..6623f1907f 100644 --- a/lib/htmlpurifier/HTMLPurifier/AttrDef/URI/Email/SimpleCheck.php +++ b/lib/htmlpurifier/HTMLPurifier/AttrDef/URI/Email/SimpleCheck.php @@ -20,4 +20,3 @@ class HTMLPurifier_AttrDef_URI_Email_SimpleCheck extends HTMLPurifier_AttrDef_UR } -?> \ No newline at end of file diff --git a/lib/htmlpurifier/HTMLPurifier/AttrDef/URI/Host.php b/lib/htmlpurifier/HTMLPurifier/AttrDef/URI/Host.php index 5344cdac25..ac729ebd93 100644 --- a/lib/htmlpurifier/HTMLPurifier/AttrDef/URI/Host.php +++ b/lib/htmlpurifier/HTMLPurifier/AttrDef/URI/Host.php @@ -51,4 +51,3 @@ class HTMLPurifier_AttrDef_URI_Host extends HTMLPurifier_AttrDef } -?> \ No newline at end of file diff --git a/lib/htmlpurifier/HTMLPurifier/AttrDef/URI/IPv4.php b/lib/htmlpurifier/HTMLPurifier/AttrDef/URI/IPv4.php index 0730bbc8ac..9a1af293ba 100644 --- a/lib/htmlpurifier/HTMLPurifier/AttrDef/URI/IPv4.php +++ b/lib/htmlpurifier/HTMLPurifier/AttrDef/URI/IPv4.php @@ -15,13 +15,10 @@ class HTMLPurifier_AttrDef_URI_IPv4 extends HTMLPurifier_AttrDef */ var $ip4; - function HTMLPurifier_AttrDef_URI_IPv4() { - $oct = '(?:25[0-5]|2[0-4][0-9]|1[0-9]{2}|[1-9][0-9]|[0-9])'; // 0-255 - $this->ip4 = "(?:{$oct}\\.{$oct}\\.{$oct}\\.{$oct})"; - } - function validate($aIP, $config, &$context) { + if (!$this->ip4) $this->_loadRegex(); + if (preg_match('#^' . $this->ip4 . '$#s', $aIP)) { return $aIP; @@ -31,6 +28,14 @@ class HTMLPurifier_AttrDef_URI_IPv4 extends HTMLPurifier_AttrDef } + /** + * Lazy load function to prevent regex from being stuffed in + * cache. + */ + function _loadRegex() { + $oct = '(?:25[0-5]|2[0-4][0-9]|1[0-9]{2}|[1-9][0-9]|[0-9])'; // 0-255 + $this->ip4 = "(?:{$oct}\\.{$oct}\\.{$oct}\\.{$oct})"; + } + } -?> \ No newline at end of file diff --git a/lib/htmlpurifier/HTMLPurifier/AttrDef/URI/IPv6.php b/lib/htmlpurifier/HTMLPurifier/AttrDef/URI/IPv6.php index 73f085e55e..f48b803dd7 100644 --- a/lib/htmlpurifier/HTMLPurifier/AttrDef/URI/IPv6.php +++ b/lib/htmlpurifier/HTMLPurifier/AttrDef/URI/IPv6.php @@ -13,6 +13,8 @@ class HTMLPurifier_AttrDef_URI_IPv6 extends HTMLPurifier_AttrDef_URI_IPv4 function validate($aIP, $config, &$context) { + if (!$this->ip4) $this->_loadRegex(); + $original = $aIP; $hex = '[0-9a-fA-F]'; @@ -96,4 +98,3 @@ class HTMLPurifier_AttrDef_URI_IPv6 extends HTMLPurifier_AttrDef_URI_IPv4 } -?> \ No newline at end of file diff --git a/lib/htmlpurifier/HTMLPurifier/AttrTransform.php b/lib/htmlpurifier/HTMLPurifier/AttrTransform.php index 2fa07b4755..ce69fcbe82 100644 --- a/lib/htmlpurifier/HTMLPurifier/AttrTransform.php +++ b/lib/htmlpurifier/HTMLPurifier/AttrTransform.php @@ -55,4 +55,3 @@ class HTMLPurifier_AttrTransform } -?> \ No newline at end of file diff --git a/lib/htmlpurifier/HTMLPurifier/AttrTransform/BdoDir.php b/lib/htmlpurifier/HTMLPurifier/AttrTransform/BdoDir.php index 0ea5eb6dc2..f127feb2b2 100644 --- a/lib/htmlpurifier/HTMLPurifier/AttrTransform/BdoDir.php +++ b/lib/htmlpurifier/HTMLPurifier/AttrTransform/BdoDir.php @@ -28,4 +28,3 @@ class HTMLPurifier_AttrTransform_BdoDir extends HTMLPurifier_AttrTransform } -?> \ No newline at end of file diff --git a/lib/htmlpurifier/HTMLPurifier/AttrTransform/BgColor.php b/lib/htmlpurifier/HTMLPurifier/AttrTransform/BgColor.php index a7bb2b4564..de2867efdd 100644 --- a/lib/htmlpurifier/HTMLPurifier/AttrTransform/BgColor.php +++ b/lib/htmlpurifier/HTMLPurifier/AttrTransform/BgColor.php @@ -23,4 +23,3 @@ extends HTMLPurifier_AttrTransform { } -?> \ No newline at end of file diff --git a/lib/htmlpurifier/HTMLPurifier/AttrTransform/BoolToCSS.php b/lib/htmlpurifier/HTMLPurifier/AttrTransform/BoolToCSS.php index f4a16a7f17..25548eea7c 100644 --- a/lib/htmlpurifier/HTMLPurifier/AttrTransform/BoolToCSS.php +++ b/lib/htmlpurifier/HTMLPurifier/AttrTransform/BoolToCSS.php @@ -36,4 +36,3 @@ extends HTMLPurifier_AttrTransform { } -?> \ No newline at end of file diff --git a/lib/htmlpurifier/HTMLPurifier/AttrTransform/Border.php b/lib/htmlpurifier/HTMLPurifier/AttrTransform/Border.php index 10c62e3c5b..7da4f6a804 100644 --- a/lib/htmlpurifier/HTMLPurifier/AttrTransform/Border.php +++ b/lib/htmlpurifier/HTMLPurifier/AttrTransform/Border.php @@ -17,4 +17,3 @@ class HTMLPurifier_AttrTransform_Border extends HTMLPurifier_AttrTransform { } -?> \ No newline at end of file diff --git a/lib/htmlpurifier/HTMLPurifier/AttrTransform/EnumToCSS.php b/lib/htmlpurifier/HTMLPurifier/AttrTransform/EnumToCSS.php index ed4dfc32dd..0470413dd4 100644 --- a/lib/htmlpurifier/HTMLPurifier/AttrTransform/EnumToCSS.php +++ b/lib/htmlpurifier/HTMLPurifier/AttrTransform/EnumToCSS.php @@ -57,4 +57,3 @@ class HTMLPurifier_AttrTransform_EnumToCSS extends HTMLPurifier_AttrTransform { } -?> \ No newline at end of file diff --git a/lib/htmlpurifier/HTMLPurifier/AttrTransform/ImgRequired.php b/lib/htmlpurifier/HTMLPurifier/AttrTransform/ImgRequired.php index 4ff356d889..d042805538 100644 --- a/lib/htmlpurifier/HTMLPurifier/AttrTransform/ImgRequired.php +++ b/lib/htmlpurifier/HTMLPurifier/AttrTransform/ImgRequired.php @@ -20,7 +20,10 @@ HTMLPurifier_ConfigSchema::define( ); /** - * Post-transform that ensures the required attrs of img (alt and src) are set + * Transform that supplies default values for the src and alt attributes + * in img tags, as well as prevents the img tag from being removed + * because of a missing alt tag. This needs to be registered as both + * a pre and post attribute transform. */ class HTMLPurifier_AttrTransform_ImgRequired extends HTMLPurifier_AttrTransform { @@ -29,6 +32,7 @@ class HTMLPurifier_AttrTransform_ImgRequired extends HTMLPurifier_AttrTransform $src = true; if (!isset($attr['src'])) { + if ($config->get('Core', 'RemoveInvalidImg')) return $attr; $attr['src'] = $config->get('Attr', 'DefaultInvalidImage'); $src = false; } @@ -47,4 +51,3 @@ class HTMLPurifier_AttrTransform_ImgRequired extends HTMLPurifier_AttrTransform } -?> \ No newline at end of file diff --git a/lib/htmlpurifier/HTMLPurifier/AttrTransform/ImgSpace.php b/lib/htmlpurifier/HTMLPurifier/AttrTransform/ImgSpace.php index 53c787e2c9..60d5edc781 100644 --- a/lib/htmlpurifier/HTMLPurifier/AttrTransform/ImgSpace.php +++ b/lib/htmlpurifier/HTMLPurifier/AttrTransform/ImgSpace.php @@ -44,4 +44,3 @@ extends HTMLPurifier_AttrTransform { } -?> \ No newline at end of file diff --git a/lib/htmlpurifier/HTMLPurifier/AttrTransform/Lang.php b/lib/htmlpurifier/HTMLPurifier/AttrTransform/Lang.php index acb1786ae9..899f5c8dc5 100644 --- a/lib/htmlpurifier/HTMLPurifier/AttrTransform/Lang.php +++ b/lib/htmlpurifier/HTMLPurifier/AttrTransform/Lang.php @@ -27,4 +27,3 @@ class HTMLPurifier_AttrTransform_Lang extends HTMLPurifier_AttrTransform } -?> \ No newline at end of file diff --git a/lib/htmlpurifier/HTMLPurifier/AttrTransform/Length.php b/lib/htmlpurifier/HTMLPurifier/AttrTransform/Length.php index 2292aa133e..a8904c5e44 100644 --- a/lib/htmlpurifier/HTMLPurifier/AttrTransform/Length.php +++ b/lib/htmlpurifier/HTMLPurifier/AttrTransform/Length.php @@ -26,4 +26,3 @@ class HTMLPurifier_AttrTransform_Length extends HTMLPurifier_AttrTransform } -?> \ No newline at end of file diff --git a/lib/htmlpurifier/HTMLPurifier/AttrTransform/Name.php b/lib/htmlpurifier/HTMLPurifier/AttrTransform/Name.php index f14c147989..248d0e02fe 100644 --- a/lib/htmlpurifier/HTMLPurifier/AttrTransform/Name.php +++ b/lib/htmlpurifier/HTMLPurifier/AttrTransform/Name.php @@ -18,4 +18,3 @@ class HTMLPurifier_AttrTransform_Name extends HTMLPurifier_AttrTransform } -?> \ No newline at end of file diff --git a/lib/htmlpurifier/HTMLPurifier/AttrTransform/TextAlign.php b/lib/htmlpurifier/HTMLPurifier/AttrTransform/TextAlign.php deleted file mode 100644 index 09088fe176..0000000000 --- a/lib/htmlpurifier/HTMLPurifier/AttrTransform/TextAlign.php +++ /dev/null @@ -1,36 +0,0 @@ - 1, - 'right' => 1, - 'center' => 1, - 'justify' => 1); - - if (!isset($values[$align])) { - return $attr; - } - - $attr['style'] = isset($attr['style']) ? $attr['style'] : ''; - $attr['style'] = "text-align:$align;" . $attr['style']; - - return $attr; - - } - -} - -?> \ No newline at end of file diff --git a/lib/htmlpurifier/HTMLPurifier/AttrTypes.php b/lib/htmlpurifier/HTMLPurifier/AttrTypes.php index e13d0d3005..4cb70be7ad 100644 --- a/lib/htmlpurifier/HTMLPurifier/AttrTypes.php +++ b/lib/htmlpurifier/HTMLPurifier/AttrTypes.php @@ -1,10 +1,14 @@ info['Enum'] = new HTMLPurifier_AttrDef_Enum(); + $this->info['Bool'] = new HTMLPurifier_AttrDef_HTML_Bool(); + $this->info['CDATA'] = new HTMLPurifier_AttrDef_Text(); $this->info['ID'] = new HTMLPurifier_AttrDef_HTML_ID(); $this->info['Length'] = new HTMLPurifier_AttrDef_HTML_Length(); @@ -32,10 +41,42 @@ class HTMLPurifier_AttrTypes $this->info['Pixels'] = new HTMLPurifier_AttrDef_HTML_Pixels(); $this->info['Text'] = new HTMLPurifier_AttrDef_Text(); $this->info['URI'] = new HTMLPurifier_AttrDef_URI(); + $this->info['LanguageCode'] = new HTMLPurifier_AttrDef_Lang(); + $this->info['Color'] = new HTMLPurifier_AttrDef_HTML_Color(); // number is really a positive integer (one or more digits) + // FIXME: ^^ not always, see start and value of list items $this->info['Number'] = new HTMLPurifier_AttrDef_Integer(false, false, true); } + + /** + * Retrieves a type + * @param $type String type name + * @return Object AttrDef for type + */ + function get($type) { + + // determine if there is any extra info tacked on + if (strpos($type, '#') !== false) list($type, $string) = explode('#', $type, 2); + else $string = ''; + + if (!isset($this->info[$type])) { + trigger_error('Cannot retrieve undefined attribute type ' . $type, E_USER_ERROR); + return; + } + + return $this->info[$type]->make($string); + + } + + /** + * Sets a new implementation for a type + * @param $type String type name + * @param $impl Object AttrDef for type + */ + function set($type, $impl) { + $this->info[$type] = $impl; + } } -?> + diff --git a/lib/htmlpurifier/HTMLPurifier/AttrValidator.php b/lib/htmlpurifier/HTMLPurifier/AttrValidator.php new file mode 100644 index 0000000000..f02bd2087c --- /dev/null +++ b/lib/htmlpurifier/HTMLPurifier/AttrValidator.php @@ -0,0 +1,139 @@ +getHTMLDefinition(); + $e =& $context->get('ErrorCollector', true); + + // initialize CurrentToken if necessary + $current_token =& $context->get('CurrentToken', true); + if (!$current_token) $context->register('CurrentToken', $token); + + if ($token->type !== 'start' && $token->type !== 'empty') return $token; + + // create alias to global definition array, see also $defs + // DEFINITION CALL + $d_defs = $definition->info_global_attr; + + // reference attributes for easy manipulation + $attr =& $token->attr; + + // do global transformations (pre) + // nothing currently utilizes this + foreach ($definition->info_attr_transform_pre as $transform) { + $attr = $transform->transform($o = $attr, $config, $context); + if ($e && ($attr != $o)) $e->send(E_NOTICE, 'AttrValidator: Attributes transformed', $o, $attr); + } + + // do local transformations only applicable to this element (pre) + // ex.

to

+ foreach ($definition->info[$token->name]->attr_transform_pre as $transform) { + $attr = $transform->transform($o = $attr, $config, $context); + if ($e && ($attr != $o)) $e->send(E_NOTICE, 'AttrValidator: Attributes transformed', $o, $attr); + } + + // create alias to this element's attribute definition array, see + // also $d_defs (global attribute definition array) + // DEFINITION CALL + $defs = $definition->info[$token->name]->attr; + + $attr_key = false; + $context->register('CurrentAttr', $attr_key); + + // iterate through all the attribute keypairs + // Watch out for name collisions: $key has previously been used + foreach ($attr as $attr_key => $value) { + + // call the definition + if ( isset($defs[$attr_key]) ) { + // there is a local definition defined + if ($defs[$attr_key] === false) { + // We've explicitly been told not to allow this element. + // This is usually when there's a global definition + // that must be overridden. + // Theoretically speaking, we could have a + // AttrDef_DenyAll, but this is faster! + $result = false; + } else { + // validate according to the element's definition + $result = $defs[$attr_key]->validate( + $value, $config, $context + ); + } + } elseif ( isset($d_defs[$attr_key]) ) { + // there is a global definition defined, validate according + // to the global definition + $result = $d_defs[$attr_key]->validate( + $value, $config, $context + ); + } else { + // system never heard of the attribute? DELETE! + $result = false; + } + + // put the results into effect + if ($result === false || $result === null) { + // this is a generic error message that should replaced + // with more specific ones when possible + if ($e) $e->send(E_ERROR, 'AttrValidator: Attribute removed'); + + // remove the attribute + unset($attr[$attr_key]); + } elseif (is_string($result)) { + // generally, if a substitution is happening, there + // was some sort of implicit correction going on. We'll + // delegate it to the attribute classes to say exactly what. + + // simple substitution + $attr[$attr_key] = $result; + } + + // we'd also want slightly more complicated substitution + // involving an array as the return value, + // although we're not sure how colliding attributes would + // resolve (certain ones would be completely overriden, + // others would prepend themselves). + } + + $context->destroy('CurrentAttr'); + + // post transforms + + // global (error reporting untested) + foreach ($definition->info_attr_transform_post as $transform) { + $attr = $transform->transform($o = $attr, $config, $context); + if ($e && ($attr != $o)) $e->send(E_NOTICE, 'AttrValidator: Attributes transformed', $o, $attr); + } + + // local (error reporting untested) + foreach ($definition->info[$token->name]->attr_transform_post as $transform) { + $attr = $transform->transform($o = $attr, $config, $context); + if ($e && ($attr != $o)) $e->send(E_NOTICE, 'AttrValidator: Attributes transformed', $o, $attr); + } + + // destroy CurrentToken if we made it ourselves + if (!$current_token) $context->destroy('CurrentToken'); + + } + + +} + diff --git a/lib/htmlpurifier/HTMLPurifier/CSSDefinition.php b/lib/htmlpurifier/HTMLPurifier/CSSDefinition.php index 23a66ab76a..8de2aa7b70 100644 --- a/lib/htmlpurifier/HTMLPurifier/CSSDefinition.php +++ b/lib/htmlpurifier/HTMLPurifier/CSSDefinition.php @@ -1,5 +1,7 @@ + Revision identifier for your custom definition. See + %HTML.DefinitionRev for details. This directive has been available + since 2.0.0. +

+'); + /** * Defines allowed CSS attributes and what their values are. * @see HTMLPurifier_HTMLDefinition */ -class HTMLPurifier_CSSDefinition +class HTMLPurifier_CSSDefinition extends HTMLPurifier_Definition { + var $type = 'CSS'; + /** * Assoc array of attribute name to definition object. */ @@ -30,7 +43,7 @@ class HTMLPurifier_CSSDefinition /** * Constructs the info array. The meat of this class. */ - function setup($config) { + function doSetup($config) { $this->info['text-align'] = new HTMLPurifier_AttrDef_Enum( array('left', 'right', 'center', 'justify'), false); @@ -213,4 +226,3 @@ class HTMLPurifier_CSSDefinition } -?> \ No newline at end of file diff --git a/lib/htmlpurifier/HTMLPurifier/ChildDef.php b/lib/htmlpurifier/HTMLPurifier/ChildDef.php index bed43cacd3..5236d266c5 100644 --- a/lib/htmlpurifier/HTMLPurifier/ChildDef.php +++ b/lib/htmlpurifier/HTMLPurifier/ChildDef.php @@ -36,6 +36,11 @@ class HTMLPurifier_ChildDef */ var $allow_empty; + /** + * Lookup array of all elements that this definition could possibly allow + */ + var $elements = array(); + /** * Validates nodes according to definition and returns modification. * @@ -52,4 +57,4 @@ class HTMLPurifier_ChildDef } } -?> + diff --git a/lib/htmlpurifier/HTMLPurifier/ChildDef/Chameleon.php b/lib/htmlpurifier/HTMLPurifier/ChildDef/Chameleon.php index afe0299fa7..b338354d38 100644 --- a/lib/htmlpurifier/HTMLPurifier/ChildDef/Chameleon.php +++ b/lib/htmlpurifier/HTMLPurifier/ChildDef/Chameleon.php @@ -35,6 +35,7 @@ class HTMLPurifier_ChildDef_Chameleon extends HTMLPurifier_ChildDef function HTMLPurifier_ChildDef_Chameleon($inline, $block) { $this->inline = new HTMLPurifier_ChildDef_Optional($inline); $this->block = new HTMLPurifier_ChildDef_Optional($block); + $this->elements = $this->block->elements; } function validateChildren($tokens_of_children, $config, &$context) { @@ -48,4 +49,3 @@ class HTMLPurifier_ChildDef_Chameleon extends HTMLPurifier_ChildDef } } -?> \ No newline at end of file diff --git a/lib/htmlpurifier/HTMLPurifier/ChildDef/Custom.php b/lib/htmlpurifier/HTMLPurifier/ChildDef/Custom.php index de18cd7070..ba722d0595 100644 --- a/lib/htmlpurifier/HTMLPurifier/ChildDef/Custom.php +++ b/lib/htmlpurifier/HTMLPurifier/ChildDef/Custom.php @@ -38,8 +38,27 @@ class HTMLPurifier_ChildDef_Custom extends HTMLPurifier_ChildDef if ($raw{0} != '(') { $raw = "($raw)"; } - $reg = str_replace(',', ',?', $raw); - $reg = preg_replace('/([#a-zA-Z0-9_.-]+)/', '(,?\\0)', $reg); + $el = '[#a-zA-Z0-9_.-]+'; + $reg = $raw; + + // COMPLICATED! AND MIGHT BE BUGGY! I HAVE NO CLUE WHAT I'M + // DOING! Seriously: if there's problems, please report them. + + // collect all elements into the $elements array + preg_match_all("/$el/", $reg, $matches); + foreach ($matches[0] as $match) { + $this->elements[$match] = true; + } + + // setup all elements as parentheticals with leading commas + $reg = preg_replace("/$el/", '(,\\0)', $reg); + + // remove commas when they were not solicited + $reg = preg_replace("/([^,(|]\(+),/", '\\1', $reg); + + // remove all non-paranthetical commas: they are handled by first regex + $reg = preg_replace("/,\(/", '(', $reg); + $this->_pcre_regex = $reg; } function validateChildren($tokens_of_children, $config, &$context) { @@ -60,11 +79,11 @@ class HTMLPurifier_ChildDef_Custom extends HTMLPurifier_ChildDef $list_of_children .= $token->name . ','; } } - $list_of_children = rtrim($list_of_children, ','); - + // add leading comma to deal with stray comma declarations + $list_of_children = ',' . rtrim($list_of_children, ','); $okay = preg_match( - '/^'.$this->_pcre_regex.'$/', + '/^,?'.$this->_pcre_regex.'$/', $list_of_children ); @@ -72,4 +91,3 @@ class HTMLPurifier_ChildDef_Custom extends HTMLPurifier_ChildDef } } -?> \ No newline at end of file diff --git a/lib/htmlpurifier/HTMLPurifier/ChildDef/Empty.php b/lib/htmlpurifier/HTMLPurifier/ChildDef/Empty.php index 1ab4fdd657..6e63730770 100644 --- a/lib/htmlpurifier/HTMLPurifier/ChildDef/Empty.php +++ b/lib/htmlpurifier/HTMLPurifier/ChildDef/Empty.php @@ -19,4 +19,3 @@ class HTMLPurifier_ChildDef_Empty extends HTMLPurifier_ChildDef } } -?> \ No newline at end of file diff --git a/lib/htmlpurifier/HTMLPurifier/ChildDef/Optional.php b/lib/htmlpurifier/HTMLPurifier/ChildDef/Optional.php index cc8883263e..779a7f06b9 100644 --- a/lib/htmlpurifier/HTMLPurifier/ChildDef/Optional.php +++ b/lib/htmlpurifier/HTMLPurifier/ChildDef/Optional.php @@ -20,4 +20,3 @@ class HTMLPurifier_ChildDef_Optional extends HTMLPurifier_ChildDef_Required } } -?> \ No newline at end of file diff --git a/lib/htmlpurifier/HTMLPurifier/ChildDef/Required.php b/lib/htmlpurifier/HTMLPurifier/ChildDef/Required.php index c6f706e29a..f4d908b05d 100644 --- a/lib/htmlpurifier/HTMLPurifier/ChildDef/Required.php +++ b/lib/htmlpurifier/HTMLPurifier/ChildDef/Required.php @@ -25,11 +25,10 @@ class HTMLPurifier_ChildDef_Required extends HTMLPurifier_ChildDef $elements = array_flip($elements); foreach ($elements as $i => $x) { $elements[$i] = true; - if (empty($i)) unset($elements[$i]); + if (empty($i)) unset($elements[$i]); // remove blank } } $this->elements = $elements; - $this->gen = new HTMLPurifier_Generator(); } var $allow_empty = false; var $type = 'required'; @@ -57,6 +56,12 @@ class HTMLPurifier_ChildDef_Required extends HTMLPurifier_ChildDef // some configuration $escape_invalid_children = $config->get('Core', 'EscapeInvalidChildren'); + // generator + static $gen = null; + if ($gen === null) { + $gen = new HTMLPurifier_Generator(); + } + foreach ($tokens_of_children as $token) { if (!empty($token->is_whitespace)) { $result[] = $token; @@ -80,7 +85,7 @@ class HTMLPurifier_ChildDef_Required extends HTMLPurifier_ChildDef $result[] = $token; } elseif ($pcdata_allowed && $escape_invalid_children) { $result[] = new HTMLPurifier_Token_Text( - $this->gen->generateFromToken($token, $config) + $gen->generateFromToken($token, $config) ); } continue; @@ -91,7 +96,7 @@ class HTMLPurifier_ChildDef_Required extends HTMLPurifier_ChildDef } elseif ($pcdata_allowed && $escape_invalid_children) { $result[] = new HTMLPurifier_Token_Text( - $this->gen->generateFromToken( $token, $config ) + $gen->generateFromToken( $token, $config ) ); } else { // drop silently @@ -104,4 +109,3 @@ class HTMLPurifier_ChildDef_Required extends HTMLPurifier_ChildDef } } -?> \ No newline at end of file diff --git a/lib/htmlpurifier/HTMLPurifier/ChildDef/StrictBlockquote.php b/lib/htmlpurifier/HTMLPurifier/ChildDef/StrictBlockquote.php index 9280a9f50a..60dcbc4a15 100644 --- a/lib/htmlpurifier/HTMLPurifier/ChildDef/StrictBlockquote.php +++ b/lib/htmlpurifier/HTMLPurifier/ChildDef/StrictBlockquote.php @@ -45,8 +45,8 @@ extends HTMLPurifier_ChildDef_Required if (!$is_inline) { if (!$depth) { if ( - $token->type == 'text' || - !isset($this->elements[$token->name]) + ($token->type == 'text' && !$token->is_whitespace) || + ($token->type != 'text' && !isset($this->elements[$token->name])) ) { $is_inline = true; $ret[] = $block_wrap_start; @@ -73,4 +73,3 @@ extends HTMLPurifier_ChildDef_Required } } -?> \ No newline at end of file diff --git a/lib/htmlpurifier/HTMLPurifier/ChildDef/Table.php b/lib/htmlpurifier/HTMLPurifier/ChildDef/Table.php index 3534cdd0a6..ca3c83cc0e 100644 --- a/lib/htmlpurifier/HTMLPurifier/ChildDef/Table.php +++ b/lib/htmlpurifier/HTMLPurifier/ChildDef/Table.php @@ -9,6 +9,8 @@ class HTMLPurifier_ChildDef_Table extends HTMLPurifier_ChildDef { var $allow_empty = false; var $type = 'table'; + var $elements = array('tr' => true, 'tbody' => true, 'thead' => true, + 'tfoot' => true, 'caption' => true, 'colgroup' => true, 'col' => true); function HTMLPurifier_ChildDef_Table() {} function validateChildren($tokens_of_children, $config, &$context) { if (empty($tokens_of_children)) return false; @@ -139,4 +141,3 @@ class HTMLPurifier_ChildDef_Table extends HTMLPurifier_ChildDef } } -?> \ No newline at end of file diff --git a/lib/htmlpurifier/HTMLPurifier/Config.php b/lib/htmlpurifier/HTMLPurifier/Config.php index c94e01f636..b9b77178e0 100644 --- a/lib/htmlpurifier/HTMLPurifier/Config.php +++ b/lib/htmlpurifier/HTMLPurifier/Config.php @@ -1,5 +1,29 @@ +if (!defined('PHP_EOL')) { + switch (strtoupper(substr(PHP_OS, 0, 3))) { + case 'WIN': + define('PHP_EOL', "\r\n"); + break; + case 'DAR': + define('PHP_EOL', "\r"); + break; + default: + define('PHP_EOL', "\n"); + } +} + /** * Configuration object that triggers customizable behavior. * @@ -15,6 +39,11 @@ class HTMLPurifier_Config { + /** + * HTML Purifier's version + */ + var $version = '2.1.1'; + /** * Two-level associative array of configuration directives */ @@ -26,14 +55,31 @@ class HTMLPurifier_Config var $def; /** - * Cached instance of HTMLPurifier_HTMLDefinition + * Indexed array of definitions + */ + var $definitions; + + /** + * Bool indicator whether or not config is finalized */ - var $html_definition; + var $finalized = false; /** - * Cached instance of HTMLPurifier_CSSDefinition + * Bool indicator whether or not to automatically finalize + * the object if a read operation is done */ - var $css_definition; + var $autoFinalize = true; + + /** + * Namespace indexed array of serials for specific namespaces (see + * getSerial for more info). + */ + var $serials = array(); + + /** + * Serial for entire configuration object + */ + var $serial; /** * @param $definition HTMLPurifier_ConfigSchema that defines what directives @@ -54,7 +100,10 @@ class HTMLPurifier_Config * @return Configured HTMLPurifier_Config object */ function create($config) { - if (is_a($config, 'HTMLPurifier_Config')) return $config; + if (is_a($config, 'HTMLPurifier_Config')) { + // pass-through + return $config; + } $ret = HTMLPurifier_Config::createDefault(); if (is_string($config)) $ret->loadIni($config); elseif (is_array($config)) $ret->loadArray($config); @@ -78,13 +127,16 @@ class HTMLPurifier_Config * @param $key String key */ function get($namespace, $key, $from_alias = false) { + if (!$this->finalized && $this->autoFinalize) $this->finalize(); if (!isset($this->def->info[$namespace][$key])) { - trigger_error('Cannot retrieve value of undefined directive', + // can't add % due to SimpleTest bug + trigger_error('Cannot retrieve value of undefined directive ' . htmlspecialchars("$namespace.$key"), E_USER_WARNING); return; } if ($this->def->info[$namespace][$key]->class == 'alias') { - trigger_error('Cannot get value from aliased directive, use real name', + $d = $this->def->info[$namespace][$key]; + trigger_error('Cannot get value from aliased directive, use real name ' . $d->namespace . '.' . $d->name, E_USER_ERROR); return; } @@ -96,14 +148,50 @@ class HTMLPurifier_Config * @param $namespace String namespace */ function getBatch($namespace) { + if (!$this->finalized && $this->autoFinalize) $this->finalize(); if (!isset($this->def->info[$namespace])) { - trigger_error('Cannot retrieve undefined namespace', + trigger_error('Cannot retrieve undefined namespace ' . htmlspecialchars($namespace), E_USER_WARNING); return; } return $this->conf[$namespace]; } + /** + * Returns a md5 signature of a segment of the configuration object + * that uniquely identifies that particular configuration + * @note Revision is handled specially and is removed from the batch + * before processing! + * @param $namespace Namespace to get serial for + */ + function getBatchSerial($namespace) { + if (empty($this->serials[$namespace])) { + $batch = $this->getBatch($namespace); + unset($batch['DefinitionRev']); + $this->serials[$namespace] = md5(serialize($batch)); + } + return $this->serials[$namespace]; + } + + /** + * Returns a md5 signature for the entire configuration object + * that uniquely identifies that particular configuration + */ + function getSerial() { + if (empty($this->serial)) { + $this->serial = md5(serialize($this->getAll())); + } + return $this->serial; + } + + /** + * Retrieves all directives, organized by namespace + */ + function getAll() { + if (!$this->finalized && $this->autoFinalize) $this->finalize(); + return $this->conf; + } + /** * Sets a value to configuration. * @param $namespace String namespace @@ -111,15 +199,16 @@ class HTMLPurifier_Config * @param $value Mixed value */ function set($namespace, $key, $value, $from_alias = false) { + if ($this->isFinalized('Cannot set directive after finalization')) return; if (!isset($this->def->info[$namespace][$key])) { - trigger_error('Cannot set undefined directive to value', + trigger_error('Cannot set undefined directive ' . htmlspecialchars("$namespace.$key") . ' to value', E_USER_WARNING); return; } if ($this->def->info[$namespace][$key]->class == 'alias') { if ($from_alias) { trigger_error('Double-aliases not allowed, please fix '. - 'ConfigSchema bug'); + 'ConfigSchema bug with' . "$namespace.$key"); } $this->set($this->def->info[$namespace][$key]->namespace, $this->def->info[$namespace][$key]->name, @@ -128,7 +217,7 @@ class HTMLPurifier_Config } $value = $this->def->validate( $value, - $this->def->info[$namespace][$key]->type, + $type = $this->def->info[$namespace][$key]->type, $this->def->info[$namespace][$key]->allow_null ); if (is_string($value)) { @@ -139,23 +228,36 @@ class HTMLPurifier_Config if ($this->def->info[$namespace][$key]->allowed !== true) { // check to see if the value is allowed if (!isset($this->def->info[$namespace][$key]->allowed[$value])) { - trigger_error('Value not supported', E_USER_WARNING); + trigger_error('Value not supported, valid values are: ' . + $this->_listify($this->def->info[$namespace][$key]->allowed), E_USER_WARNING); return; } } } if ($this->def->isError($value)) { - trigger_error('Value is of invalid type', E_USER_WARNING); + trigger_error('Value for ' . "$namespace.$key" . ' is of invalid type, should be ' . $type, E_USER_WARNING); return; } $this->conf[$namespace][$key] = $value; - if ($namespace == 'HTML' || $namespace == 'Attr') { - // reset HTML definition if relevant attributes changed - $this->html_definition = null; - } - if ($namespace == 'CSS') { - $this->css_definition = null; + + // reset definitions if the directives they depend on changed + // this is a very costly process, so it's discouraged + // with finalization + if ($namespace == 'HTML' || $namespace == 'CSS') { + $this->definitions[$namespace] = null; } + + $this->serials[$namespace] = false; + } + + /** + * Convenience function for error reporting + * @private + */ + function _listify($lookup) { + $list = array(); + foreach ($lookup as $name => $b) $list[] = $name; + return implode(', ', $list); } /** @@ -164,26 +266,76 @@ class HTMLPurifier_Config * called before it's been setup, otherwise won't work. */ function &getHTMLDefinition($raw = false) { - if ( - empty($this->html_definition) || // hasn't ever been setup - ($raw && $this->html_definition->setup) // requesting new one - ) { - $this->html_definition = new HTMLPurifier_HTMLDefinition($this); - if ($raw) return $this->html_definition; // no setup! - } - if (!$this->html_definition->setup) $this->html_definition->setup(); - return $this->html_definition; + $def =& $this->getDefinition('HTML', $raw); + return $def; // prevent PHP 4.4.0 from complaining } /** * Retrieves reference to the CSS definition */ - function &getCSSDefinition() { - if ($this->css_definition === null) { - $this->css_definition = new HTMLPurifier_CSSDefinition(); - $this->css_definition->setup($this); + function &getCSSDefinition($raw = false) { + $def =& $this->getDefinition('CSS', $raw); + return $def; + } + + /** + * Retrieves a definition + * @param $type Type of definition: HTML, CSS, etc + * @param $raw Whether or not definition should be returned raw + */ + function &getDefinition($type, $raw = false) { + if (!$this->finalized && $this->autoFinalize) $this->finalize(); + $factory = HTMLPurifier_DefinitionCacheFactory::instance(); + $cache = $factory->create($type, $this); + if (!$raw) { + // see if we can quickly supply a definition + if (!empty($this->definitions[$type])) { + if (!$this->definitions[$type]->setup) { + $this->definitions[$type]->setup($this); + $cache->set($this->definitions[$type], $this); + } + return $this->definitions[$type]; + } + // memory check missed, try cache + $this->definitions[$type] = $cache->get($this); + if ($this->definitions[$type]) { + // definition in cache, return it + return $this->definitions[$type]; + } + } elseif ( + !empty($this->definitions[$type]) && + !$this->definitions[$type]->setup + ) { + // raw requested, raw in memory, quick return + return $this->definitions[$type]; + } + // quick checks failed, let's create the object + if ($type == 'HTML') { + $this->definitions[$type] = new HTMLPurifier_HTMLDefinition(); + } elseif ($type == 'CSS') { + $this->definitions[$type] = new HTMLPurifier_CSSDefinition(); + } elseif ($type == 'URI') { + $this->definitions[$type] = new HTMLPurifier_URIDefinition(); + } else { + trigger_error("Definition of $type type not supported"); + $false = false; + return $false; + } + // quick abort if raw + if ($raw) { + if (is_null($this->get($type, 'DefinitionID'))) { + // fatally error out if definition ID not set + trigger_error("Cannot retrieve raw version without specifying %$type.DefinitionID", E_USER_ERROR); + $false = new HTMLPurifier_Error(); + return $false; + } + return $this->definitions[$type]; } - return $this->css_definition; + // set it up + $this->definitions[$type]->setup($this); + // save in cache + $cache->set($this->definitions[$type], $this); + return $this->definitions[$type]; } /** @@ -192,6 +344,7 @@ class HTMLPurifier_Config * @param $config_array Configuration associative array */ function loadArray($config_array) { + if ($this->isFinalized('Cannot load directives after finalization')) return; foreach ($config_array as $key => $value) { $key = str_replace('_', '.', $key); if (strpos($key, '.') !== false) { @@ -208,15 +361,134 @@ class HTMLPurifier_Config } } + /** + * Returns a list of array(namespace, directive) for all directives + * that are allowed in a web-form context as per an allowed + * namespaces/directives list. + * @param $allowed List of allowed namespaces/directives + * @static + */ + function getAllowedDirectivesForForm($allowed) { + $schema = HTMLPurifier_ConfigSchema::instance(); + if ($allowed !== true) { + if (is_string($allowed)) $allowed = array($allowed); + $allowed_ns = array(); + $allowed_directives = array(); + $blacklisted_directives = array(); + foreach ($allowed as $ns_or_directive) { + if (strpos($ns_or_directive, '.') !== false) { + // directive + if ($ns_or_directive[0] == '-') { + $blacklisted_directives[substr($ns_or_directive, 1)] = true; + } else { + $allowed_directives[$ns_or_directive] = true; + } + } else { + // namespace + $allowed_ns[$ns_or_directive] = true; + } + } + } + $ret = array(); + foreach ($schema->info as $ns => $keypairs) { + foreach ($keypairs as $directive => $def) { + if ($allowed !== true) { + if (isset($blacklisted_directives["$ns.$directive"])) continue; + if (!isset($allowed_directives["$ns.$directive"]) && !isset($allowed_ns[$ns])) continue; + } + if ($def->class == 'alias') continue; + if ($directive == 'DefinitionID' || $directive == 'DefinitionRev') continue; + $ret[] = array($ns, $directive); + } + } + return $ret; + } + + /** + * Loads configuration values from $_GET/$_POST that were posted + * via ConfigForm + * @param $array $_GET or $_POST array to import + * @param $index Index/name that the config variables are in + * @param $allowed List of allowed namespaces/directives + * @param $mq_fix Boolean whether or not to enable magic quotes fix + * @static + */ + function loadArrayFromForm($array, $index, $allowed = true, $mq_fix = true) { + $ret = HTMLPurifier_Config::prepareArrayFromForm($array, $index, $allowed, $mq_fix); + $config = HTMLPurifier_Config::create($ret); + return $config; + } + + /** + * Merges in configuration values from $_GET/$_POST to object. NOT STATIC. + * @note Same parameters as loadArrayFromForm + */ + function mergeArrayFromForm($array, $index, $allowed = true, $mq_fix = true) { + $ret = HTMLPurifier_Config::prepareArrayFromForm($array, $index, $allowed, $mq_fix); + $this->loadArray($ret); + } + + /** + * Prepares an array from a form into something usable for the more + * strict parts of HTMLPurifier_Config + * @static + */ + function prepareArrayFromForm($array, $index, $allowed = true, $mq_fix = true) { + $array = (isset($array[$index]) && is_array($array[$index])) ? $array[$index] : array(); + $mq = get_magic_quotes_gpc() && $mq_fix; + + $allowed = HTMLPurifier_Config::getAllowedDirectivesForForm($allowed); + $ret = array(); + foreach ($allowed as $key) { + list($ns, $directive) = $key; + $skey = "$ns.$directive"; + if (!empty($array["Null_$skey"])) { + $ret[$ns][$directive] = null; + continue; + } + if (!isset($array[$skey])) continue; + $value = $mq ? stripslashes($array[$skey]) : $array[$skey]; + $ret[$ns][$directive] = $value; + } + return $ret; + } + /** * Loads configuration values from an ini file * @param $filename Name of ini file */ function loadIni($filename) { + if ($this->isFinalized('Cannot load directives after finalization')) return; $array = parse_ini_file($filename, true); $this->loadArray($array); } + /** + * Checks whether or not the configuration object is finalized. + * @param $error String error message, or false for no error + */ + function isFinalized($error = false) { + if ($this->finalized && $error) { + trigger_error($error, E_USER_ERROR); + } + return $this->finalized; + } + + /** + * Finalizes configuration only if auto finalize is on and not + * already finalized + */ + function autoFinalize() { + if (!$this->finalized && $this->autoFinalize) $this->finalize(); + } + + /** + * Finalizes a configuration object, prohibiting further change + */ + function finalize() { + $this->finalized = true; + } + } -?> + diff --git a/lib/htmlpurifier/HTMLPurifier/ConfigDef.php b/lib/htmlpurifier/HTMLPurifier/ConfigDef.php index b92640dc61..21825e01b8 100644 --- a/lib/htmlpurifier/HTMLPurifier/ConfigDef.php +++ b/lib/htmlpurifier/HTMLPurifier/ConfigDef.php @@ -7,4 +7,3 @@ class HTMLPurifier_ConfigDef { var $class = false; } -?> \ No newline at end of file diff --git a/lib/htmlpurifier/HTMLPurifier/ConfigDef/Directive.php b/lib/htmlpurifier/HTMLPurifier/ConfigDef/Directive.php index 39026540b3..21c33fae8d 100644 --- a/lib/htmlpurifier/HTMLPurifier/ConfigDef/Directive.php +++ b/lib/htmlpurifier/HTMLPurifier/ConfigDef/Directive.php @@ -61,6 +61,12 @@ class HTMLPurifier_ConfigDef_Directive extends HTMLPurifier_ConfigDef */ var $aliases = array(); + /** + * Advisory list of directive aliases, i.e. other directives that + * redirect here + */ + var $directiveAliases = array(); + /** * Adds a description to the array */ @@ -71,4 +77,3 @@ class HTMLPurifier_ConfigDef_Directive extends HTMLPurifier_ConfigDef } -?> \ No newline at end of file diff --git a/lib/htmlpurifier/HTMLPurifier/ConfigDef/DirectiveAlias.php b/lib/htmlpurifier/HTMLPurifier/ConfigDef/DirectiveAlias.php index 81a4451413..6637802621 100644 --- a/lib/htmlpurifier/HTMLPurifier/ConfigDef/DirectiveAlias.php +++ b/lib/htmlpurifier/HTMLPurifier/ConfigDef/DirectiveAlias.php @@ -24,4 +24,3 @@ class HTMLPurifier_ConfigDef_DirectiveAlias extends HTMLPurifier_ConfigDef } } -?> \ No newline at end of file diff --git a/lib/htmlpurifier/HTMLPurifier/ConfigDef/Namespace.php b/lib/htmlpurifier/HTMLPurifier/ConfigDef/Namespace.php index f53892b47e..21d732114f 100644 --- a/lib/htmlpurifier/HTMLPurifier/ConfigDef/Namespace.php +++ b/lib/htmlpurifier/HTMLPurifier/ConfigDef/Namespace.php @@ -20,4 +20,3 @@ class HTMLPurifier_ConfigDef_Namespace extends HTMLPurifier_ConfigDef { } -?> \ No newline at end of file diff --git a/lib/htmlpurifier/HTMLPurifier/ConfigSchema.php b/lib/htmlpurifier/HTMLPurifier/ConfigSchema.php index 940e8e6199..d6700e6ec1 100644 --- a/lib/htmlpurifier/HTMLPurifier/ConfigSchema.php +++ b/lib/htmlpurifier/HTMLPurifier/ConfigSchema.php @@ -6,8 +6,11 @@ require_once 'HTMLPurifier/ConfigDef/Namespace.php'; require_once 'HTMLPurifier/ConfigDef/Directive.php'; require_once 'HTMLPurifier/ConfigDef/DirectiveAlias.php'; +if (!defined('HTMLPURIFIER_SCHEMA_STRICT')) define('HTMLPURIFIER_SCHEMA_STRICT', false); + /** * Configuration definition, defines directives and their defaults. + * @note If you update this, please update Printer_ConfigForm * @todo The ability to define things multiple times is confusing and should * be factored out to its own function named registerDependency() or * addNote(), where only the namespace.name and an extra descriptions @@ -48,6 +51,8 @@ class HTMLPurifier_ConfigSchema { var $types = array( 'string' => 'String', 'istring' => 'Case-insensitive string', + 'text' => 'Text', + 'itext' => 'Case-insensitive text', 'int' => 'Integer', 'float' => 'Float', 'bool' => 'Boolean', @@ -66,6 +71,10 @@ class HTMLPurifier_ConfigSchema { $this->defineNamespace('URI', 'Features regarding Uniform Resource Identifiers.'); $this->defineNamespace('HTML', 'Configuration regarding allowed HTML.'); $this->defineNamespace('CSS', 'Configuration regarding allowed CSS.'); + $this->defineNamespace('AutoFormat', 'Configuration for activating auto-formatting functionality (also known as Injectors)'); + $this->defineNamespace('AutoFormatParam', 'Configuration for customizing auto-formatting functionality'); + $this->defineNamespace('Output', 'Configuration relating to the generation of (X)HTML.'); + $this->defineNamespace('Cache', 'Configuration for DefinitionCache and related subclasses.'); $this->defineNamespace('Test', 'Developer testing configuration for our unit tests.'); } @@ -95,27 +104,30 @@ class HTMLPurifier_ConfigSchema { * HTMLPurifier_DirectiveDef::$type for allowed values * @param $description Description of directive for documentation */ - function define( - $namespace, $name, $default, $type, - $description - ) { + function define($namespace, $name, $default, $type, $description) { $def =& HTMLPurifier_ConfigSchema::instance(); - if (!isset($def->info[$namespace])) { - trigger_error('Cannot define directive for undefined namespace', - E_USER_ERROR); - return; - } - if (!ctype_alnum($name)) { - trigger_error('Directive name must be alphanumeric', - E_USER_ERROR); - return; - } - if (empty($description)) { - trigger_error('Description must be non-empty', - E_USER_ERROR); - return; + + // basic sanity checks + if (HTMLPURIFIER_SCHEMA_STRICT) { + if (!isset($def->info[$namespace])) { + trigger_error('Cannot define directive for undefined namespace', + E_USER_ERROR); + return; + } + if (!ctype_alnum($name)) { + trigger_error('Directive name must be alphanumeric', + E_USER_ERROR); + return; + } + if (empty($description)) { + trigger_error('Description must be non-empty', + E_USER_ERROR); + return; + } } + if (isset($def->info[$namespace][$name])) { + // already defined if ( $def->info[$namespace][$name]->type !== $type || $def->defaults[$namespace][$name] !== $default @@ -124,29 +136,35 @@ class HTMLPurifier_ConfigSchema { return; } } else { - // process modifiers + // needs defining + + // process modifiers (OPTIMIZE!) $type_values = explode('/', $type, 2); $type = $type_values[0]; $modifier = isset($type_values[1]) ? $type_values[1] : false; $allow_null = ($modifier === 'null'); - if (!isset($def->types[$type])) { - trigger_error('Invalid type for configuration directive', - E_USER_ERROR); - return; - } - $default = $def->validate($default, $type, $allow_null); - if ($def->isError($default)) { - trigger_error('Default value does not match directive type', - E_USER_ERROR); - return; + if (HTMLPURIFIER_SCHEMA_STRICT) { + if (!isset($def->types[$type])) { + trigger_error('Invalid type for configuration directive', + E_USER_ERROR); + return; + } + $default = $def->validate($default, $type, $allow_null); + if ($def->isError($default)) { + trigger_error('Default value does not match directive type', + E_USER_ERROR); + return; + } } + $def->info[$namespace][$name] = new HTMLPurifier_ConfigDef_Directive(); $def->info[$namespace][$name]->type = $type; $def->info[$namespace][$name]->allow_null = $allow_null; $def->defaults[$namespace][$name] = $default; } + if (!HTMLPURIFIER_SCHEMA_STRICT) return; $backtrace = debug_backtrace(); $file = $def->mungeFilename($backtrace[0]['file']); $line = $backtrace[0]['line']; @@ -161,19 +179,21 @@ class HTMLPurifier_ConfigSchema { */ function defineNamespace($namespace, $description) { $def =& HTMLPurifier_ConfigSchema::instance(); - if (isset($def->info[$namespace])) { - trigger_error('Cannot redefine namespace', E_USER_ERROR); - return; - } - if (!ctype_alnum($namespace)) { - trigger_error('Namespace name must be alphanumeric', - E_USER_ERROR); - return; - } - if (empty($description)) { - trigger_error('Description must be non-empty', - E_USER_ERROR); - return; + if (HTMLPURIFIER_SCHEMA_STRICT) { + if (isset($def->info[$namespace])) { + trigger_error('Cannot redefine namespace', E_USER_ERROR); + return; + } + if (!ctype_alnum($namespace)) { + trigger_error('Namespace name must be alphanumeric', + E_USER_ERROR); + return; + } + if (empty($description)) { + trigger_error('Description must be non-empty', + E_USER_ERROR); + return; + } } $def->info[$namespace] = array(); $def->info_namespace[$namespace] = new HTMLPurifier_ConfigDef_Namespace(); @@ -194,23 +214,25 @@ class HTMLPurifier_ConfigSchema { */ function defineValueAliases($namespace, $name, $aliases) { $def =& HTMLPurifier_ConfigSchema::instance(); - if (!isset($def->info[$namespace][$name])) { + if (HTMLPURIFIER_SCHEMA_STRICT && !isset($def->info[$namespace][$name])) { trigger_error('Cannot set value alias for non-existant directive', E_USER_ERROR); return; } foreach ($aliases as $alias => $real) { - if (!$def->info[$namespace][$name] !== true && - !isset($def->info[$namespace][$name]->allowed[$real]) - ) { - trigger_error('Cannot define alias to value that is not allowed', - E_USER_ERROR); - return; - } - if (isset($def->info[$namespace][$name]->allowed[$alias])) { - trigger_error('Cannot define alias over allowed value', - E_USER_ERROR); - return; + if (HTMLPURIFIER_SCHEMA_STRICT) { + if (!$def->info[$namespace][$name] !== true && + !isset($def->info[$namespace][$name]->allowed[$real]) + ) { + trigger_error('Cannot define alias to value that is not allowed', + E_USER_ERROR); + return; + } + if (isset($def->info[$namespace][$name]->allowed[$alias])) { + trigger_error('Cannot define alias over allowed value', + E_USER_ERROR); + return; + } } $def->info[$namespace][$name]->aliases[$alias] = $real; } @@ -225,14 +247,14 @@ class HTMLPurifier_ConfigSchema { */ function defineAllowedValues($namespace, $name, $allowed_values) { $def =& HTMLPurifier_ConfigSchema::instance(); - if (!isset($def->info[$namespace][$name])) { + if (HTMLPURIFIER_SCHEMA_STRICT && !isset($def->info[$namespace][$name])) { trigger_error('Cannot define allowed values for undefined directive', E_USER_ERROR); return; } $directive =& $def->info[$namespace][$name]; $type = $directive->type; - if ($type != 'string' && $type != 'istring') { + if (HTMLPURIFIER_SCHEMA_STRICT && $type != 'string' && $type != 'istring') { trigger_error('Cannot define allowed values for directive whose type is not string', E_USER_ERROR); return; @@ -243,8 +265,11 @@ class HTMLPurifier_ConfigSchema { foreach ($allowed_values as $value) { $directive->allowed[$value] = true; } - if ($def->defaults[$namespace][$name] !== null && - !isset($directive->allowed[$def->defaults[$namespace][$name]])) { + if ( + HTMLPURIFIER_SCHEMA_STRICT && + $def->defaults[$namespace][$name] !== null && + !isset($directive->allowed[$def->defaults[$namespace][$name]]) + ) { trigger_error('Default value must be in allowed range of variables', E_USER_ERROR); $directive->allowed = true; // undo undo! @@ -262,34 +287,37 @@ class HTMLPurifier_ConfigSchema { */ function defineAlias($namespace, $name, $new_namespace, $new_name) { $def =& HTMLPurifier_ConfigSchema::instance(); - if (!isset($def->info[$namespace])) { - trigger_error('Cannot define directive alias in undefined namespace', - E_USER_ERROR); - return; - } - if (!ctype_alnum($name)) { - trigger_error('Directive name must be alphanumeric', - E_USER_ERROR); - return; - } - if (isset($def->info[$namespace][$name])) { - trigger_error('Cannot define alias over directive', - E_USER_ERROR); - return; - } - if (!isset($def->info[$new_namespace][$new_name])) { - trigger_error('Cannot define alias to undefined directive', - E_USER_ERROR); - return; - } - if ($def->info[$new_namespace][$new_name]->class == 'alias') { - trigger_error('Cannot define alias to alias', - E_USER_ERROR); - return; + if (HTMLPURIFIER_SCHEMA_STRICT) { + if (!isset($def->info[$namespace])) { + trigger_error('Cannot define directive alias in undefined namespace', + E_USER_ERROR); + return; + } + if (!ctype_alnum($name)) { + trigger_error('Directive name must be alphanumeric', + E_USER_ERROR); + return; + } + if (isset($def->info[$namespace][$name])) { + trigger_error('Cannot define alias over directive', + E_USER_ERROR); + return; + } + if (!isset($def->info[$new_namespace][$new_name])) { + trigger_error('Cannot define alias to undefined directive', + E_USER_ERROR); + return; + } + if ($def->info[$new_namespace][$new_name]->class == 'alias') { + trigger_error('Cannot define alias to alias', + E_USER_ERROR); + return; + } } $def->info[$namespace][$name] = new HTMLPurifier_ConfigDef_DirectiveAlias( $new_namespace, $new_name); + $def->info[$new_namespace][$new_name]->directiveAliases[] = "$namespace.$name"; } /** @@ -303,11 +331,14 @@ class HTMLPurifier_ConfigSchema { if ($allow_null && $var === null) return null; switch ($type) { case 'mixed': + //if (is_string($var)) $var = unserialize($var); return $var; case 'istring': case 'string': + case 'text': // no difference, just is longer/multiple line string + case 'itext': if (!is_string($var)) break; - if ($type === 'istring') $var = strtolower($var); + if ($type === 'istring' || $type === 'itext') $var = strtolower($var); return $var; case 'int': if (is_string($var) && ctype_digit($var)) $var = (int) $var; @@ -338,11 +369,25 @@ class HTMLPurifier_ConfigSchema { // a single empty string item, but having an empty // array is more intuitive if ($var == '') return array(); - // simplistic string to array method that only works - // for simple lists of tag names or alphanumeric characters - $var = explode(',',$var); + if (strpos($var, "\n") === false && strpos($var, "\r") === false) { + // simplistic string to array method that only works + // for simple lists of tag names or alphanumeric characters + $var = explode(',',$var); + } else { + $var = preg_split('/(,|[\n\r]+)/', $var); + } // remove spaces foreach ($var as $i => $j) $var[$i] = trim($j); + if ($type === 'hash') { + // key:value,key2:value2 + $nvar = array(); + foreach ($var as $keypair) { + $c = explode(':', $keypair, 2); + if (!isset($c[1])) continue; + $nvar[$c[0]] = $c[1]; + } + $var = $nvar; + } } if (!is_array($var)) break; $keys = array_keys($var); @@ -371,6 +416,7 @@ class HTMLPurifier_ConfigSchema { * Takes an absolute path and munges it into a more manageable relative path */ function mungeFilename($filename) { + if (!HTMLPURIFIER_SCHEMA_STRICT) return $filename; $offset = strrpos($filename, 'HTMLPurifier'); $filename = substr($filename, $offset); $filename = str_replace('\\', '/', $filename); @@ -387,4 +433,4 @@ class HTMLPurifier_ConfigSchema { } } -?> + diff --git a/lib/htmlpurifier/HTMLPurifier/ContentSets.php b/lib/htmlpurifier/HTMLPurifier/ContentSets.php index de5c532e18..7baf7a3101 100644 --- a/lib/htmlpurifier/HTMLPurifier/ContentSets.php +++ b/lib/htmlpurifier/HTMLPurifier/ContentSets.php @@ -5,6 +5,9 @@ require_once 'HTMLPurifier/ChildDef.php'; require_once 'HTMLPurifier/ChildDef/Empty.php'; require_once 'HTMLPurifier/ChildDef/Required.php'; require_once 'HTMLPurifier/ChildDef/Optional.php'; +require_once 'HTMLPurifier/ChildDef/Custom.php'; + +// NOT UNIT TESTED!!! class HTMLPurifier_ContentSets { @@ -145,4 +148,3 @@ class HTMLPurifier_ContentSets } -?> \ No newline at end of file diff --git a/lib/htmlpurifier/HTMLPurifier/Context.php b/lib/htmlpurifier/HTMLPurifier/Context.php index ce6fe51e05..a78a6fb6f6 100644 --- a/lib/htmlpurifier/HTMLPurifier/Context.php +++ b/lib/htmlpurifier/HTMLPurifier/Context.php @@ -2,6 +2,8 @@ /** * Registry object that contains information about the current context. + * @warning Is a bit buggy when variables are set to null: it thinks + * they don't exist! So use false instead, please. */ class HTMLPurifier_Context { @@ -19,7 +21,7 @@ class HTMLPurifier_Context */ function register($name, &$ref) { if (isset($this->_storage[$name])) { - trigger_error('Name collision, cannot re-register', + trigger_error("Name $name produces collision, cannot re-register", E_USER_ERROR); return; } @@ -29,11 +31,14 @@ class HTMLPurifier_Context /** * Retrieves a variable reference from the context. * @param $name String name + * @param $ignore_error Boolean whether or not to ignore error */ - function &get($name) { + function &get($name, $ignore_error = false) { if (!isset($this->_storage[$name])) { - trigger_error('Attempted to retrieve non-existent variable', - E_USER_ERROR); + if (!$ignore_error) { + trigger_error("Attempted to retrieve non-existent variable $name", + E_USER_ERROR); + } $var = null; // so we can return by reference return $var; } @@ -46,7 +51,7 @@ class HTMLPurifier_Context */ function destroy($name) { if (!isset($this->_storage[$name])) { - trigger_error('Attempted to destroy non-existent variable', + trigger_error("Attempted to destroy non-existent variable $name", E_USER_ERROR); return; } @@ -73,4 +78,3 @@ class HTMLPurifier_Context } -?> \ No newline at end of file diff --git a/lib/htmlpurifier/HTMLPurifier/Definition.php b/lib/htmlpurifier/HTMLPurifier/Definition.php new file mode 100644 index 0000000000..8f958e4798 --- /dev/null +++ b/lib/htmlpurifier/HTMLPurifier/Definition.php @@ -0,0 +1,40 @@ +setup) return; + $this->setup = true; + $this->doSetup($config); + } + +} + diff --git a/lib/htmlpurifier/HTMLPurifier/DefinitionCache.php b/lib/htmlpurifier/HTMLPurifier/DefinitionCache.php new file mode 100644 index 0000000000..d4c9d239f2 --- /dev/null +++ b/lib/htmlpurifier/HTMLPurifier/DefinitionCache.php @@ -0,0 +1,128 @@ +type = $type; + } + + /** + * Generates a unique identifier for a particular configuration + * @param Instance of HTMLPurifier_Config + */ + function generateKey($config) { + return $config->version . '-' . // possibly replace with function calls + $config->getBatchSerial($this->type) . '-' . + $config->get($this->type, 'DefinitionRev'); + } + + /** + * Tests whether or not a key is old with respect to the configuration's + * version and revision number. + * @param $key Key to test + * @param $config Instance of HTMLPurifier_Config to test against + */ + function isOld($key, $config) { + if (substr_count($key, '-') < 2) return true; + list($version, $hash, $revision) = explode('-', $key, 3); + $compare = version_compare($version, $config->version); + // version mismatch, is always old + if ($compare != 0) return true; + // versions match, ids match, check revision number + if ( + $hash == $config->getBatchSerial($this->type) && + $revision < $config->get($this->type, 'DefinitionRev') + ) return true; + return false; + } + + /** + * Checks if a definition's type jives with the cache's type + * @note Throws an error on failure + * @param $def Definition object to check + * @return Boolean true if good, false if not + */ + function checkDefType($def) { + if ($def->type !== $this->type) { + trigger_error("Cannot use definition of type {$def->type} in cache for {$this->type}"); + return false; + } + return true; + } + + /** + * Adds a definition object to the cache + */ + function add($def, $config) { + trigger_error('Cannot call abstract method', E_USER_ERROR); + } + + /** + * Unconditionally saves a definition object to the cache + */ + function set($def, $config) { + trigger_error('Cannot call abstract method', E_USER_ERROR); + } + + /** + * Replace an object in the cache + */ + function replace($def, $config) { + trigger_error('Cannot call abstract method', E_USER_ERROR); + } + + /** + * Retrieves a definition object from the cache + */ + function get($config) { + trigger_error('Cannot call abstract method', E_USER_ERROR); + } + + /** + * Removes a definition object to the cache + */ + function remove($config) { + trigger_error('Cannot call abstract method', E_USER_ERROR); + } + + /** + * Clears all objects from cache + */ + function flush($config) { + trigger_error('Cannot call abstract method', E_USER_ERROR); + } + + /** + * Clears all expired (older version or revision) objects from cache + */ + function cleanup($config) { + trigger_error('Cannot call abstract method', E_USER_ERROR); + } +} + diff --git a/lib/htmlpurifier/HTMLPurifier/DefinitionCache/Decorator.php b/lib/htmlpurifier/HTMLPurifier/DefinitionCache/Decorator.php new file mode 100644 index 0000000000..14fca85974 --- /dev/null +++ b/lib/htmlpurifier/HTMLPurifier/DefinitionCache/Decorator.php @@ -0,0 +1,59 @@ +copy(); + // reference is necessary for mocks in PHP 4 + $decorator->cache =& $cache; + $decorator->type = $cache->type; + return $decorator; + } + + /** + * Cross-compatible clone substitute + */ + function copy() { + return new HTMLPurifier_DefinitionCache_Decorator(); + } + + function add($def, $config) { + return $this->cache->add($def, $config); + } + + function set($def, $config) { + return $this->cache->set($def, $config); + } + + function replace($def, $config) { + return $this->cache->replace($def, $config); + } + + function get($config) { + return $this->cache->get($config); + } + + function flush($config) { + return $this->cache->flush($config); + } + + function cleanup($config) { + return $this->cache->cleanup($config); + } + +} + diff --git a/lib/htmlpurifier/HTMLPurifier/DefinitionCache/Decorator/Cleanup.php b/lib/htmlpurifier/HTMLPurifier/DefinitionCache/Decorator/Cleanup.php new file mode 100644 index 0000000000..eb47c433fb --- /dev/null +++ b/lib/htmlpurifier/HTMLPurifier/DefinitionCache/Decorator/Cleanup.php @@ -0,0 +1,44 @@ +definitions[$this->generateKey($config)] = $def; + return $status; + } + + function set($def, $config) { + $status = parent::set($def, $config); + if ($status) $this->definitions[$this->generateKey($config)] = $def; + return $status; + } + + function replace($def, $config) { + $status = parent::replace($def, $config); + if ($status) $this->definitions[$this->generateKey($config)] = $def; + return $status; + } + + function get($config) { + $key = $this->generateKey($config); + if (isset($this->definitions[$key])) return $this->definitions[$key]; + $this->definitions[$key] = parent::get($config); + return $this->definitions[$key]; + } + +} + diff --git a/lib/htmlpurifier/HTMLPurifier/DefinitionCache/Decorator/Template.php.in b/lib/htmlpurifier/HTMLPurifier/DefinitionCache/Decorator/Template.php.in new file mode 100644 index 0000000000..62235e225d --- /dev/null +++ b/lib/htmlpurifier/HTMLPurifier/DefinitionCache/Decorator/Template.php.in @@ -0,0 +1,46 @@ + + Absolute path with no trailing slash to store serialized definitions in. + Default is within the + HTML Purifier library inside DefinitionCache/Serializer. This + path must be writable by the webserver. This directive has been + available since 2.0.0. +

+'); + +class HTMLPurifier_DefinitionCache_Serializer extends + HTMLPurifier_DefinitionCache +{ + + function add($def, $config) { + if (!$this->checkDefType($def)) return; + $file = $this->generateFilePath($config); + if (file_exists($file)) return false; + if (!$this->_prepareDir($config)) return false; + return $this->_write($file, serialize($def)); + } + + function set($def, $config) { + if (!$this->checkDefType($def)) return; + $file = $this->generateFilePath($config); + if (!$this->_prepareDir($config)) return false; + return $this->_write($file, serialize($def)); + } + + function replace($def, $config) { + if (!$this->checkDefType($def)) return; + $file = $this->generateFilePath($config); + if (!file_exists($file)) return false; + if (!$this->_prepareDir($config)) return false; + return $this->_write($file, serialize($def)); + } + + function get($config) { + $file = $this->generateFilePath($config); + if (!file_exists($file)) return false; + return unserialize(file_get_contents($file)); + } + + function remove($config) { + $file = $this->generateFilePath($config); + if (!file_exists($file)) return false; + return unlink($file); + } + + function flush($config) { + if (!$this->_prepareDir($config)) return false; + $dir = $this->generateDirectoryPath($config); + $dh = opendir($dir); + while (false !== ($filename = readdir($dh))) { + if (empty($filename)) continue; + if ($filename[0] === '.') continue; + unlink($dir . '/' . $filename); + } + } + + function cleanup($config) { + if (!$this->_prepareDir($config)) return false; + $dir = $this->generateDirectoryPath($config); + $dh = opendir($dir); + while (false !== ($filename = readdir($dh))) { + if (empty($filename)) continue; + if ($filename[0] === '.') continue; + $key = substr($filename, 0, strlen($filename) - 4); + if ($this->isOld($key, $config)) unlink($dir . '/' . $filename); + } + } + + /** + * Generates the file path to the serial file corresponding to + * the configuration and definition name + */ + function generateFilePath($config) { + $key = $this->generateKey($config); + return $this->generateDirectoryPath($config) . '/' . $key . '.ser'; + } + + /** + * Generates the path to the directory contain this cache's serial files + * @note No trailing slash + */ + function generateDirectoryPath($config) { + $base = $this->generateBaseDirectoryPath($config); + return $base . '/' . $this->type; + } + + /** + * Generates path to base directory that contains all definition type + * serials + */ + function generateBaseDirectoryPath($config) { + $base = $config->get('Cache', 'SerializerPath'); + $base = is_null($base) ? HTMLPURIFIER_PREFIX . '/HTMLPurifier/DefinitionCache/Serializer' : $base; + return $base; + } + + /** + * Convenience wrapper function for file_put_contents + * @param $file File name to write to + * @param $data Data to write into file + * @return Number of bytes written if success, or false if failure. + */ + function _write($file, $data) { + static $file_put_contents; + if ($file_put_contents === null) { + $file_put_contents = function_exists('file_put_contents'); + } + if ($file_put_contents) { + return file_put_contents($file, $data); + } + $fh = fopen($file, 'w'); + if (!$fh) return false; + $status = fwrite($fh, $data); + fclose($fh); + return $status; + } + + /** + * Prepares the directory that this type stores the serials in + * @return True if successful + */ + function _prepareDir($config) { + $directory = $this->generateDirectoryPath($config); + if (!is_dir($directory)) { + $base = $this->generateBaseDirectoryPath($config); + if (!is_dir($base)) { + trigger_error('Base directory '.$base.' does not exist, + please create or change using %Cache.SerializerPath', + E_USER_ERROR); + return false; + } elseif (!$this->_testPermissions($base)) { + return false; + } + mkdir($directory); + } elseif (!$this->_testPermissions($directory)) { + return false; + } + return true; + } + + /** + * Tests permissions on a directory and throws out friendly + * error messages and attempts to chmod it itself if possible + */ + function _testPermissions($dir) { + // early abort, if it is writable, everything is hunky-dory + if (is_writable($dir)) return true; + if (!is_dir($dir)) { + // generally, you'll want to handle this beforehand + // so a more specific error message can be given + trigger_error('Directory '.$dir.' does not exist', + E_USER_ERROR); + return false; + } + if (function_exists('posix_getuid')) { + // POSIX system, we can give more specific advice + if (fileowner($dir) === posix_getuid()) { + // we can chmod it ourselves + chmod($dir, 0755); + return true; + } elseif (filegroup($dir) === posix_getgid()) { + $chmod = '775'; + } else { + // PHP's probably running as nobody, so we'll + // need to give global permissions + $chmod = '777'; + } + trigger_error('Directory '.$dir.' not writable, '. + 'please chmod to ' . $chmod, + E_USER_ERROR); + } else { + // generic error message + trigger_error('Directory '.$dir.' not writable, '. + 'please alter file permissions', + E_USER_ERROR); + } + return false; + } + +} + diff --git a/lib/htmlpurifier/HTMLPurifier/DefinitionCacheFactory.php b/lib/htmlpurifier/HTMLPurifier/DefinitionCacheFactory.php new file mode 100644 index 0000000000..acc661828a --- /dev/null +++ b/lib/htmlpurifier/HTMLPurifier/DefinitionCacheFactory.php @@ -0,0 +1,94 @@ + array()); + var $decorators = array(); + + /** + * Initialize default decorators + */ + function setup() { + $this->addDecorator('Cleanup'); + } + + /** + * Retrieves an instance of global definition cache factory. + * @static + */ + function &instance($prototype = null) { + static $instance; + if ($prototype !== null) { + $instance = $prototype; + } elseif ($instance === null || $prototype === true) { + $instance = new HTMLPurifier_DefinitionCacheFactory(); + $instance->setup(); + } + return $instance; + } + + /** + * Factory method that creates a cache object based on configuration + * @param $name Name of definitions handled by cache + * @param $config Instance of HTMLPurifier_Config + */ + function &create($type, $config) { + // only one implementation as for right now, $config will + // be used to determine implementation + $method = $config->get('Cache', 'DefinitionImpl'); + if ($method === null) { + $null = new HTMLPurifier_DefinitionCache_Null($type); + return $null; + } + if (!empty($this->caches[$method][$type])) { + return $this->caches[$method][$type]; + } + $cache = new HTMLPurifier_DefinitionCache_Serializer($type); + foreach ($this->decorators as $decorator) { + $new_cache = $decorator->decorate($cache); + // prevent infinite recursion in PHP 4 + unset($cache); + $cache = $new_cache; + } + $this->caches[$method][$type] = $cache; + return $this->caches[$method][$type]; + } + + /** + * Registers a decorator to add to all new cache objects + * @param + */ + function addDecorator($decorator) { + if (is_string($decorator)) { + $class = "HTMLPurifier_DefinitionCache_Decorator_$decorator"; + $decorator = new $class; + } + $this->decorators[$decorator->name] = $decorator; + } + +} + diff --git a/lib/htmlpurifier/HTMLPurifier/Doctype.php b/lib/htmlpurifier/HTMLPurifier/Doctype.php new file mode 100644 index 0000000000..7afdcd74a2 --- /dev/null +++ b/lib/htmlpurifier/HTMLPurifier/Doctype.php @@ -0,0 +1,66 @@ +renderDoctype. + * If structure changes, please update that function. + */ +class HTMLPurifier_Doctype +{ + /** + * Full name of doctype + */ + var $name; + + /** + * List of standard modules (string identifiers or literal objects) + * that this doctype uses + */ + var $modules = array(); + + /** + * List of modules to use for tidying up code + */ + var $tidyModules = array(); + + /** + * Is the language derived from XML (i.e. XHTML)? + */ + var $xml = true; + + /** + * List of aliases for this doctype + */ + var $aliases = array(); + + /** + * Public DTD identifier + */ + var $dtdPublic; + + /** + * System DTD identifier + */ + var $dtdSystem; + + function HTMLPurifier_Doctype($name = null, $xml = true, $modules = array(), + $tidyModules = array(), $aliases = array(), $dtd_public = null, $dtd_system = null + ) { + $this->name = $name; + $this->xml = $xml; + $this->modules = $modules; + $this->tidyModules = $tidyModules; + $this->aliases = $aliases; + $this->dtdPublic = $dtd_public; + $this->dtdSystem = $dtd_system; + } + + /** + * Clones the doctype, use before resolving modes and the like + */ + function copy() { + return unserialize(serialize($this)); + } +} + diff --git a/lib/htmlpurifier/HTMLPurifier/DoctypeRegistry.php b/lib/htmlpurifier/HTMLPurifier/DoctypeRegistry.php new file mode 100644 index 0000000000..e657b3da4b --- /dev/null +++ b/lib/htmlpurifier/HTMLPurifier/DoctypeRegistry.php @@ -0,0 +1,124 @@ +doctypes[$doctype->name] =& $doctype; + $name = $doctype->name; + // hookup aliases + foreach ($doctype->aliases as $alias) { + if (isset($this->doctypes[$alias])) continue; + $this->aliases[$alias] = $name; + } + // remove old aliases + if (isset($this->aliases[$name])) unset($this->aliases[$name]); + return $doctype; + } + + /** + * Retrieves reference to a doctype of a certain name + * @note This function resolves aliases + * @note When possible, use the more fully-featured make() + * @param $doctype Name of doctype + * @return Reference to doctype object + */ + function &get($doctype) { + if (isset($this->aliases[$doctype])) $doctype = $this->aliases[$doctype]; + if (!isset($this->doctypes[$doctype])) { + trigger_error('Doctype ' . htmlspecialchars($doctype) . ' does not exist', E_USER_ERROR); + $anon = new HTMLPurifier_Doctype($doctype); + return $anon; + } + return $this->doctypes[$doctype]; + } + + /** + * Creates a doctype based on a configuration object, + * will perform initialization on the doctype + * @note Use this function to get a copy of doctype that config + * can hold on to (this is necessary in order to tell + * Generator whether or not the current document is XML + * based or not). + */ + function make($config) { + $original_doctype = $this->get($this->getDoctypeFromConfig($config)); + $doctype = $original_doctype->copy(); + return $doctype; + } + + /** + * Retrieves the doctype from the configuration object + */ + function getDoctypeFromConfig($config) { + // recommended test + $doctype = $config->get('HTML', 'Doctype'); + if (!empty($doctype)) return $doctype; + $doctype = $config->get('HTML', 'CustomDoctype'); + if (!empty($doctype)) return $doctype; + // backwards-compatibility + if ($config->get('HTML', 'XHTML')) { + $doctype = 'XHTML 1.0'; + } else { + $doctype = 'HTML 4.01'; + } + if ($config->get('HTML', 'Strict')) { + $doctype .= ' Strict'; + } else { + $doctype .= ' Transitional'; + } + return $doctype; + } + +} + diff --git a/lib/htmlpurifier/HTMLPurifier/ElementDef.php b/lib/htmlpurifier/HTMLPurifier/ElementDef.php index 73c94abe13..21e1a5a764 100644 --- a/lib/htmlpurifier/HTMLPurifier/ElementDef.php +++ b/lib/htmlpurifier/HTMLPurifier/ElementDef.php @@ -3,6 +3,8 @@ /** * Structure that stores an HTML element definition. Used by * HTMLPurifier_HTMLDefinition and HTMLPurifier_HTMLModule. + * @note This class is inspected by HTMLPurifier_Printer_HTMLDefinition. + * Please update that class too. */ class HTMLPurifier_ElementDef { @@ -51,6 +53,8 @@ class HTMLPurifier_ElementDef * Abstract string representation of internal ChildDef rules. See * HTMLPurifier_ContentSets for how this is parsed and then transformed * into an HTMLPurifier_ChildDef. + * @warning This is a temporary variable that is not available after + * being processed by HTMLDefinition * @public */ var $content_model; @@ -58,19 +62,15 @@ class HTMLPurifier_ElementDef /** * Value of $child->type, used to determine which ChildDef to use, * used in combination with $content_model. + * @warning This must be lowercase + * @warning This is a temporary variable that is not available after + * being processed by HTMLDefinition * @public */ var $content_model_type; - /** - * Lookup table of tags that close this tag. Used during parsing - * to make sure we don't attempt to nest unclosed tags. - * @public - */ - var $auto_close = array(); - /** * Does the element have a content model (#PCDATA | Inline)*? This * is important for chameleon ins and del processing in @@ -78,14 +78,47 @@ class HTMLPurifier_ElementDef * have to worry about this one. * @public */ - var $descendants_are_inline; + var $descendants_are_inline = false; + + /** + * List of the names of required attributes this element has. Dynamically + * populated. + * @public + */ + var $required_attr = array(); /** * Lookup table of tags excluded from all descendants of this tag. + * @note SGML permits exclusions for all descendants, but this is + * not possible with DTDs or XML Schemas. W3C has elected to + * use complicated compositions of content_models to simulate + * exclusion for children, but we go the simpler, SGML-style + * route of flat-out exclusions, which correctly apply to + * all descendants and not just children. Note that the XHTML + * Modularization Abstract Modules are blithely unaware of such + * distinctions. * @public */ var $excludes = array(); + /** + * Is this element safe for untrusted users to use? + */ + var $safe; + + /** + * Low-level factory constructor for creating new standalone element defs + * @static + */ + function create($safe, $content_model, $content_model_type, $attr) { + $def = new HTMLPurifier_ElementDef(); + $def->safe = (bool) $safe; + $def->content_model = $content_model; + $def->content_model_type = $content_model_type; + $def->attr = $attr; + return $def; + } + /** * Merges the values of another element definition into this one. * Values from the new element def take precedence if a value is @@ -99,24 +132,56 @@ class HTMLPurifier_ElementDef // merge in the includes // sorry, no way to override an include foreach ($v as $v2) { - $def->attr[0][] = $v2; + $this->attr[0][] = $v2; } continue; } + if ($v === false) { + if (isset($this->attr[$k])) unset($this->attr[$k]); + continue; + } $this->attr[$k] = $v; } - foreach($def->attr_transform_pre as $k => $v) $this->attr_transform_pre[$k] = $v; - foreach($def->attr_transform_post as $k => $v) $this->attr_transform_post[$k] = $v; - foreach($def->auto_close as $k => $v) $this->auto_close[$k] = $v; - foreach($def->excludes as $k => $v) $this->excludes[$k] = $v; + $this->_mergeAssocArray($this->attr_transform_pre, $def->attr_transform_pre); + $this->_mergeAssocArray($this->attr_transform_post, $def->attr_transform_post); + $this->_mergeAssocArray($this->excludes, $def->excludes); + if(!empty($def->content_model)) { + $this->content_model .= ' | ' . $def->content_model; + $this->child = false; + } + if(!empty($def->content_model_type)) { + $this->content_model_type = $def->content_model_type; + $this->child = false; + } if(!is_null($def->child)) $this->child = $def->child; - if(!empty($def->content_model)) $this->content_model .= ' | ' . $def->content_model; - if(!empty($def->content_model_type)) $this->content_model_type = $def->content_model_type; - if(!is_null($def->descendants_are_inline)) $this->descendants_are_inline = $def->descendants_are_inline; + if($def->descendants_are_inline) $this->descendants_are_inline = $def->descendants_are_inline; + if(!is_null($def->safe)) $this->safe = $def->safe; } + /** + * Merges one array into another, removes values which equal false + * @param $a1 Array by reference that is merged into + * @param $a2 Array that merges into $a1 + */ + function _mergeAssocArray(&$a1, $a2) { + foreach ($a2 as $k => $v) { + if ($v === false) { + if (isset($a1[$k])) unset($a1[$k]); + continue; + } + $a1[$k] = $v; + } + } + + /** + * Retrieves a copy of the element definition + */ + function copy() { + return unserialize(serialize($this)); + } + } -?> + diff --git a/lib/htmlpurifier/HTMLPurifier/Encoder.php b/lib/htmlpurifier/HTMLPurifier/Encoder.php index 1a22b4525c..e5adf83f59 100644 --- a/lib/htmlpurifier/HTMLPurifier/Encoder.php +++ b/lib/htmlpurifier/HTMLPurifier/Encoder.php @@ -1,7 +1,5 @@ \ No newline at end of file diff --git a/lib/htmlpurifier/HTMLPurifier/EntityLookup.php b/lib/htmlpurifier/HTMLPurifier/EntityLookup.php index f950cc2231..8204867be3 100644 --- a/lib/htmlpurifier/HTMLPurifier/EntityLookup.php +++ b/lib/htmlpurifier/HTMLPurifier/EntityLookup.php @@ -19,7 +19,7 @@ class HTMLPurifier_EntityLookup { */ function setup($file = false) { if (!$file) { - $file = dirname(__FILE__) . '/EntityLookup/entities.ser'; + $file = HTMLPURIFIER_PREFIX . '/HTMLPurifier/EntityLookup/entities.ser'; } $this->table = unserialize(file_get_contents($file)); } @@ -43,4 +43,3 @@ class HTMLPurifier_EntityLookup { } -?> \ No newline at end of file diff --git a/lib/htmlpurifier/HTMLPurifier/EntityParser.php b/lib/htmlpurifier/HTMLPurifier/EntityParser.php index 069c5ce17e..2547241350 100644 --- a/lib/htmlpurifier/HTMLPurifier/EntityParser.php +++ b/lib/htmlpurifier/HTMLPurifier/EntityParser.php @@ -24,8 +24,8 @@ class HTMLPurifier_EntityParser * @protected */ var $_substituteEntitiesRegex = -'/&(?:[#]x([a-fA-F0-9]+)|[#]0*(\d+)|([A-Za-z]+));?/'; -// 1. hex 2. dec 3. string +'/&(?:[#]x([a-fA-F0-9]+)|[#]0*(\d+)|([A-Za-z_:][A-Za-z0-9.\-_:]*));?/'; +// 1. hex 2. dec 3. string (XML style) /** @@ -97,7 +97,6 @@ class HTMLPurifier_EntityParser } else { if (isset($this->_special_ent2dec[$matches[3]])) return $entity; if (!$this->_entity_lookup) { - require_once 'HTMLPurifier/EntityLookup.php'; $this->_entity_lookup = HTMLPurifier_EntityLookup::instance(); } if (isset($this->_entity_lookup->table[$matches[3]])) { @@ -155,4 +154,3 @@ class HTMLPurifier_EntityParser } -?> \ No newline at end of file diff --git a/lib/htmlpurifier/HTMLPurifier/Error.php b/lib/htmlpurifier/HTMLPurifier/Error.php index adc81dc56d..2ca4d7323c 100644 --- a/lib/htmlpurifier/HTMLPurifier/Error.php +++ b/lib/htmlpurifier/HTMLPurifier/Error.php @@ -5,4 +5,3 @@ */ class HTMLPurifier_Error {} -?> \ No newline at end of file diff --git a/lib/htmlpurifier/HTMLPurifier/ErrorCollector.php b/lib/htmlpurifier/HTMLPurifier/ErrorCollector.php new file mode 100644 index 0000000000..70ac5d9a00 --- /dev/null +++ b/lib/htmlpurifier/HTMLPurifier/ErrorCollector.php @@ -0,0 +1,118 @@ +locale =& $context->get('Locale'); + $this->generator =& $context->get('Generator'); + $this->context =& $context; + } + + /** + * Sends an error message to the collector for later use + * @param $line Integer line number, or HTMLPurifier_Token that caused error + * @param $severity int Error severity, PHP error style (don't use E_USER_) + * @param $msg string Error message text + */ + function send($severity, $msg) { + + $args = array(); + if (func_num_args() > 2) { + $args = func_get_args(); + array_shift($args); + unset($args[0]); + } + + $token = $this->context->get('CurrentToken', true); + $line = $token ? $token->line : $this->context->get('CurrentLine', true); + $attr = $this->context->get('CurrentAttr', true); + + // perform special substitutions, also add custom parameters + $subst = array(); + if (!is_null($token)) { + $args['CurrentToken'] = $token; + } + if (!is_null($attr)) { + $subst['$CurrentAttr.Name'] = $attr; + if (isset($token->attr[$attr])) $subst['$CurrentAttr.Value'] = $token->attr[$attr]; + } + + if (empty($args)) { + $msg = $this->locale->getMessage($msg); + } else { + $msg = $this->locale->formatMessage($msg, $args); + } + + if (!empty($subst)) $msg = strtr($msg, $subst); + + $this->errors[] = array($line, $severity, $msg); + } + + /** + * Retrieves raw error data for custom formatter to use + * @param List of arrays in format of array(Error message text, + * token that caused error, tokens surrounding token) + */ + function getRaw() { + return $this->errors; + } + + /** + * Default HTML formatting implementation for error messages + * @param $config Configuration array, vital for HTML output nature + */ + function getHTMLFormatted($config) { + $ret = array(); + + $errors = $this->errors; + + // sort error array by line + // line numbers are enabled if they aren't explicitly disabled + if ($config->get('Core', 'MaintainLineNumbers') !== false) { + $has_line = array(); + $lines = array(); + $original_order = array(); + foreach ($errors as $i => $error) { + $has_line[] = (int) (bool) $error[0]; + $lines[] = $error[0]; + $original_order[] = $i; + } + array_multisort($has_line, SORT_DESC, $lines, SORT_ASC, $original_order, SORT_ASC, $errors); + } + + foreach ($errors as $error) { + list($line, $severity, $msg) = $error; + $string = ''; + $string .= '' . $this->locale->getErrorName($severity) . ': '; + $string .= $this->generator->escape($msg); + if ($line) { + // have javascript link generation that causes + // textarea to skip to the specified line + $string .= $this->locale->formatMessage( + 'ErrorCollector: At line', array('line' => $line)); + } + $ret[] = $string; + } + + if (empty($errors)) { + return '

' . $this->locale->getMessage('ErrorCollector: No errors') . '

'; + } else { + return ''; + } + + } + +} + diff --git a/lib/htmlpurifier/HTMLPurifier/Filter.php b/lib/htmlpurifier/HTMLPurifier/Filter.php index 94c5ae7bb2..99cdeebaae 100644 --- a/lib/htmlpurifier/HTMLPurifier/Filter.php +++ b/lib/htmlpurifier/HTMLPurifier/Filter.php @@ -36,4 +36,3 @@ class HTMLPurifier_Filter } -?> \ No newline at end of file diff --git a/lib/htmlpurifier/HTMLPurifier/Filter/YouTube.php b/lib/htmlpurifier/HTMLPurifier/Filter/YouTube.php index 433f17cf47..4f63ad6d56 100644 --- a/lib/htmlpurifier/HTMLPurifier/Filter/YouTube.php +++ b/lib/htmlpurifier/HTMLPurifier/Filter/YouTube.php @@ -31,4 +31,3 @@ class HTMLPurifier_Filter_YouTube extends HTMLPurifier_Filter } -?> \ No newline at end of file diff --git a/lib/htmlpurifier/HTMLPurifier/Generator.php b/lib/htmlpurifier/HTMLPurifier/Generator.php index b6a9aa24d7..5322b8c20b 100644 --- a/lib/htmlpurifier/HTMLPurifier/Generator.php +++ b/lib/htmlpurifier/HTMLPurifier/Generator.php @@ -1,58 +1,74 @@ Determines whether or not to run Tidy on the final output for pretty '. - 'formatting reasons, such as indentation and wrap.

This can greatly '. - 'improve readability for editors who are hand-editing the HTML, but is '. - 'by no means necessary as HTML Purifier has already fixed all major '. - 'errors the HTML may have had. Tidy is a non-default extension, and this directive '. - 'will silently fail if Tidy is not available.

If you are looking to make '. - 'the overall look of your page\'s source better, I recommend running Tidy '. - 'on the entire page rather than just user-content (after all, the '. - 'indentation relative to the containing blocks will be incorrect).

This '. - 'directive was available since 1.1.1.

' + 'Output', 'TidyFormat', false, 'bool', << + Determines whether or not to run Tidy on the final output for pretty + formatting reasons, such as indentation and wrap. +

+

+ This can greatly improve readability for editors who are hand-editing + the HTML, but is by no means necessary as HTML Purifier has already + fixed all major errors the HTML may have had. Tidy is a non-default + extension, and this directive will silently fail if Tidy is not + available. +

+

+ If you are looking to make the overall look of your page's source + better, I recommend running Tidy on the entire page rather than just + user-content (after all, the indentation relative to the containing + blocks will be incorrect). +

+

+ This directive was available since 1.1.1. +

+HTML ); +HTMLPurifier_ConfigSchema::defineAlias('Core', 'TidyFormat', 'Output', 'TidyFormat'); + +HTMLPurifier_ConfigSchema::define('Output', 'Newline', null, 'string/null', ' +

+ Newline string to format final output with. If left null, HTML Purifier + will auto-detect the default newline type of the system and use that; + you can manually override it here. Remember, \r\n is Windows, \r + is Mac, and \n is Unix. This directive was available since 2.0.1. +

+'); /** * Generates HTML from tokens. + * @todo Refactor interface so that configuration/context is determined + * upon instantiation, no need for messy generateFromTokens() calls */ class HTMLPurifier_Generator { /** - * Bool cache of %Core.CleanUTF8DuringGeneration + * Bool cache of %HTML.XHTML * @private */ - var $_clean_utf8 = false; + var $_xhtml = true; /** - * Bool cache of %Core.XHTML + * Bool cache of %Output.CommentScriptContents * @private */ - var $_xhtml = true; + var $_scriptFix = false; + + /** + * Cache of HTMLDefinition + * @private + */ + var $_def; /** * Generates HTML from an array of tokens. @@ -63,13 +79,28 @@ class HTMLPurifier_Generator function generateFromTokens($tokens, $config, &$context) { $html = ''; if (!$config) $config = HTMLPurifier_Config::createDefault(); - $this->_clean_utf8 = $config->get('Core', 'CleanUTF8DuringGeneration'); - $this->_xhtml = $config->get('Core', 'XHTML'); + $this->_scriptFix = $config->get('Output', 'CommentScriptContents'); + + $this->_def = $config->getHTMLDefinition(); + $this->_xhtml = $this->_def->doctype->xml; + if (!$tokens) return ''; - foreach ($tokens as $token) { - $html .= $this->generateFromToken($token); + for ($i = 0, $size = count($tokens); $i < $size; $i++) { + if ($this->_scriptFix && $tokens[$i]->name === 'script' + && $i + 2 < $size && $tokens[$i+2]->type == 'end') { + // script special case + // the contents of the script block must be ONE token + // for this to work + $html .= $this->generateFromToken($tokens[$i++]); + $html .= $this->generateScriptFromToken($tokens[$i++]); + // We're not going to do this: it wouldn't be valid anyway + //while ($tokens[$i]->name != 'script') { + // $html .= $this->generateScriptFromToken($tokens[$i++]); + //} + } + $html .= $this->generateFromToken($tokens[$i]); } - if ($config->get('Core', 'TidyFormat') && extension_loaded('tidy')) { + if ($config->get('Output', 'TidyFormat') && extension_loaded('tidy')) { $tidy_options = array( 'indent'=> true, @@ -93,6 +124,10 @@ class HTMLPurifier_Generator $html = (string) $tidy; } } + // normalize newlines to system + $nl = $config->get('Output', 'Newline'); + if ($nl === null) $nl = PHP_EOL; + $html = str_replace("\n", $nl, $html); return $html; } @@ -104,14 +139,14 @@ class HTMLPurifier_Generator function generateFromToken($token) { if (!isset($token->type)) return ''; if ($token->type == 'start') { - $attr = $this->generateAttributes($token->attr); + $attr = $this->generateAttributes($token->attr, $token->name); return '<' . $token->name . ($attr ? ' ' : '') . $attr . '>'; } elseif ($token->type == 'end') { return 'name . '>'; } elseif ($token->type == 'empty') { - $attr = $this->generateAttributes($token->attr); + $attr = $this->generateAttributes($token->attr, $token->name); return '<' . $token->name . ($attr ? ' ' : '') . $attr . ( $this->_xhtml ? ' /': '' ) . '>'; @@ -125,18 +160,35 @@ class HTMLPurifier_Generator } } + /** + * Special case processor for the contents of script tags + * @warning This runs into problems if there's already a literal + * --> somewhere inside the script contents. + */ + function generateScriptFromToken($token) { + if ($token->type != 'text') return $this->generateFromToken($token); + // return ''; + // more advanced version: + // thanks + $data = preg_replace('#//\s*$#', '', $token->data); + return ''; + } + /** * Generates attribute declarations from attribute array. * @param $assoc_array_of_attributes Attribute array * @return Generate HTML fragment for insertion. */ - function generateAttributes($assoc_array_of_attributes) { + function generateAttributes($assoc_array_of_attributes, $element) { $html = ''; foreach ($assoc_array_of_attributes as $key => $value) { if (!$this->_xhtml) { // remove namespaced attributes if (strpos($key, ':') !== false) continue; - // also needed: check for attribute minimization + if (!empty($this->_def->info[$element]->attr[$key]->minimized)) { + $html .= $key . ' '; + continue; + } } $html .= $key.'="'.$this->escape($value).'" '; } @@ -149,10 +201,8 @@ class HTMLPurifier_Generator * @return String escaped data. */ function escape($string) { - if ($this->_clean_utf8) $string = HTMLPurifier_Lexer::cleanUTF8($string); return htmlspecialchars($string, ENT_COMPAT, 'UTF-8'); } } -?> \ No newline at end of file diff --git a/lib/htmlpurifier/HTMLPurifier/HTMLDefinition.php b/lib/htmlpurifier/HTMLPurifier/HTMLDefinition.php index c1dd6535c4..aaeb8bae38 100644 --- a/lib/htmlpurifier/HTMLPurifier/HTMLDefinition.php +++ b/lib/htmlpurifier/HTMLPurifier/HTMLDefinition.php @@ -1,61 +1,133 @@ + Unique identifier for a custom-built HTML definition. If you edit + the raw version of the HTMLDefinition, introducing changes that the + configuration object does not reflect, you must specify this variable. + If you change your custom edits, you should change this directive, or + clear your cache. Example: +

+
+$config = HTMLPurifier_Config::createDefault();
+$config->set(\'HTML\', \'DefinitionID\', \'1\');
+$def = $config->getHTMLDefinition();
+$def->addAttribute(\'a\', \'tabindex\', \'Number\');
+
+

+ In the above example, the configuration is still at the defaults, but + using the advanced API, an extra attribute has been added. The + configuration object normally has no way of knowing that this change + has taken place, so it needs an extra directive: %HTML.DefinitionID. + If someone else attempts to use the default configuration, these two + pieces of code will not clobber each other in the cache, since one has + an extra directive attached to it. +

+

+ This directive has been available since 2.0.0, and in that version or + later you must specify a value to this directive to use the + advanced API features. +

+'); HTMLPurifier_ConfigSchema::define( - 'HTML', 'BlockWrapper', 'p', 'string', - 'String name of element to wrap inline elements that are inside a block '. - 'context. This only occurs in the children of blockquote in strict mode. '. - 'Example: by default value, <blockquote>Foo</blockquote> '. - 'would become <blockquote><p>Foo</p></blockquote>. The '. - '<p> tags can be replaced '. - 'with whatever you desire, as long as it is a block level element. '. - 'This directive has been available since 1.3.0.' -); + 'HTML', 'DefinitionRev', 1, 'int', ' +

+ Revision identifier for your custom definition specified in + %HTML.DefinitionID. This serves the same purpose: uniquely identifying + your custom definition, but this one does so in a chronological + context: revision 3 is more up-to-date then revision 2. Thus, when + this gets incremented, the cache handling is smart enough to clean + up any older revisions of your definition as well as flush the + cache. This directive has been available since 2.0.0. +

+'); HTMLPurifier_ConfigSchema::define( - 'HTML', 'Parent', 'div', 'string', - 'String name of element that HTML fragment passed to library will be '. - 'inserted in. An interesting variation would be using span as the '. - 'parent element, meaning that only inline tags would be allowed. '. - 'This directive has been available since 1.3.0.' -); + 'HTML', 'BlockWrapper', 'p', 'string', ' +

+ String name of element to wrap inline elements that are inside a block + context. This only occurs in the children of blockquote in strict mode. +

+

+ Example: by default value, + <blockquote>Foo</blockquote> would become + <blockquote><p>Foo</p></blockquote>. + The <p> tags can be replaced with whatever you desire, + as long as it is a block level element. This directive has been available + since 1.3.0. +

+'); HTMLPurifier_ConfigSchema::define( - 'HTML', 'AllowedElements', null, 'lookup/null', - 'If HTML Purifier\'s tag set is unsatisfactory for your needs, you '. - 'can overload it with your own list of tags to allow. Note that this '. - 'method is subtractive: it does its job by taking away from HTML Purifier '. - 'usual feature set, so you cannot add a tag that HTML Purifier never '. - 'supported in the first place (like embed, form or head). If you change this, you '. - 'probably also want to change %HTML.AllowedAttributes. '. - 'Warning: If another directive conflicts with the '. - 'elements here, that directive will win and override. '. - 'This directive has been available since 1.3.0.' -); + 'HTML', 'Parent', 'div', 'string', ' +

+ String name of element that HTML fragment passed to library will be + inserted in. An interesting variation would be using span as the + parent element, meaning that only inline tags would be allowed. + This directive has been available since 1.3.0. +

+'); HTMLPurifier_ConfigSchema::define( - 'HTML', 'AllowedAttributes', null, 'lookup/null', - 'IF HTML Purifier\'s attribute set is unsatisfactory, overload it! '. - 'The syntax is \'tag.attr\' or \'*.attr\' for the global attributes '. - '(style, id, class, dir, lang, xml:lang).'. - 'Warning: If another directive conflicts with the '. - 'elements here, that directive will win and override. For '. - 'example, %HTML.EnableAttrID will take precedence over *.id in this '. - 'directive. You must set that directive to true before you can use '. - 'IDs at all. This directive has been available since 1.3.0.' -); + 'HTML', 'AllowedElements', null, 'lookup/null', ' +

+ If HTML Purifier\'s tag set is unsatisfactory for your needs, you + can overload it with your own list of tags to allow. Note that this + method is subtractive: it does its job by taking away from HTML Purifier + usual feature set, so you cannot add a tag that HTML Purifier never + supported in the first place (like embed, form or head). If you + change this, you probably also want to change %HTML.AllowedAttributes. +

+

+ Warning: If another directive conflicts with the + elements here, that directive will win and override. + This directive has been available since 1.3.0. +

+'); + +HTMLPurifier_ConfigSchema::define( + 'HTML', 'AllowedAttributes', null, 'lookup/null', ' +

+ If HTML Purifier\'s attribute set is unsatisfactory, overload it! + The syntax is "tag.attr" or "*.attr" for the global attributes + (style, id, class, dir, lang, xml:lang). +

+

+ Warning: If another directive conflicts with the + elements here, that directive will win and override. For + example, %HTML.EnableAttrID will take precedence over *.id in this + directive. You must set that directive to true before you can use + IDs at all. This directive has been available since 1.3.0. +

+'); + +HTMLPurifier_ConfigSchema::define( + 'HTML', 'Allowed', null, 'itext/null', ' +

+ This is a convenience directive that rolls the functionality of + %HTML.AllowedElements and %HTML.AllowedAttributes into one directive. + Specify elements and attributes that are allowed using: + element1[attr1|attr2],element2.... You can also use + newlines instead of commas to separate elements. +

+

+ Warning: + All of the constraints on the component directives are still enforced. + The syntax is a subset of TinyMCE\'s valid_elements + whitelist: directly copy-pasting it here will probably result in + broken whitelists. If %HTML.AllowedElements or %HTML.AllowedAttributes + are set, this directive has no effect. + This directive has been available since 2.0.0. +

+'); /** * Definition of the purified HTML that describes allowed children, @@ -74,13 +146,13 @@ HTMLPurifier_ConfigSchema::define( * Purifier internals. Many of them, however, are public, and may be * edited by userspace code to tweak the behavior of HTMLDefinition. * - * HTMLPurifier_Printer_HTMLDefinition is a notable exception to this - * rule: in the interest of comprehensiveness, it will sniff everything. + * @note This class is inspected by Printer_HTMLDefinition; please + * update that class if things here change. */ -class HTMLPurifier_HTMLDefinition +class HTMLPurifier_HTMLDefinition extends HTMLPurifier_Definition { - /** FULLY-PUBLIC VARIABLES */ + // FULLY-PUBLIC VARIABLES --------------------------------------------- /** * Associative array of element names to HTMLPurifier_ElementDef @@ -139,50 +211,97 @@ class HTMLPurifier_HTMLDefinition */ var $info_content_sets = array(); + /** + * Doctype object + */ + var $doctype; + + + + // RAW CUSTOMIZATION STUFF -------------------------------------------- + + /** + * Adds a custom attribute to a pre-existing element + * @param $element_name String element name to add attribute to + * @param $attr_name String name of attribute + * @param $def Attribute definition, can be string or object, see + * HTMLPurifier_AttrTypes for details + */ + function addAttribute($element_name, $attr_name, $def) { + $module =& $this->getAnonymousModule(); + $element =& $module->addBlankElement($element_name); + $element->attr[$attr_name] = $def; + } + + /** + * Adds a custom element to your HTML definition + * @note See HTMLPurifier_HTMLModule::addElement for detailed + * parameter descriptions. + */ + function addElement($element_name, $type, $contents, $attr_collections, $attributes) { + $module =& $this->getAnonymousModule(); + // assume that if the user is calling this, the element + // is safe. This may not be a good idea + $module->addElement($element_name, true, $type, $contents, $attr_collections, $attributes); + } + + /** + * Retrieves a reference to the anonymous module, so you can + * bust out advanced features without having to make your own + * module. + */ + function &getAnonymousModule() { + if (!$this->_anonModule) { + $this->_anonModule = new HTMLPurifier_HTMLModule(); + $this->_anonModule->name = 'Anonymous'; + } + return $this->_anonModule; + } + var $_anonModule; - /** PUBLIC BUT INTERNAL VARIABLES */ - var $setup = false; /**< Has setup() been called yet? */ - var $config; /**< Temporary instance of HTMLPurifier_Config */ + // PUBLIC BUT INTERNAL VARIABLES -------------------------------------- + var $type = 'HTML'; var $manager; /**< Instance of HTMLPurifier_HTMLModuleManager */ /** * Performs low-cost, preliminary initialization. - * @param $config Instance of HTMLPurifier_Config */ - function HTMLPurifier_HTMLDefinition(&$config) { - $this->config =& $config; + function HTMLPurifier_HTMLDefinition() { $this->manager = new HTMLPurifier_HTMLModuleManager(); } - /** - * Processes internals into form usable by HTMLPurifier internals. - * Modifying the definition after calling this function should not - * be done. - */ - function setup() { - - // multiple call guard - if ($this->setup) {return;} else {$this->setup = true;} - - $this->processModules(); - $this->setupConfigStuff(); - - unset($this->config); + function doSetup($config) { + $this->processModules($config); + $this->setupConfigStuff($config); unset($this->manager); + // cleanup some of the element definitions + foreach ($this->info as $k => $v) { + unset($this->info[$k]->content_model); + unset($this->info[$k]->content_model_type); + } } /** * Extract out the information from the manager */ - function processModules() { + function processModules($config) { + + if ($this->_anonModule) { + // for user specific changes + // this is late-loaded so we don't have to deal with PHP4 + // reference wonky-ness + $this->manager->addModule($this->_anonModule); + unset($this->_anonModule); + } - $this->manager->setup($this->config); + $this->manager->setup($config); + $this->doctype = $this->manager->doctype; - foreach ($this->manager->activeModules as $module) { + foreach ($this->manager->modules as $module) { foreach($module->info_tag_transform as $k => $v) { if ($v === false) unset($this->info_tag_transform[$k]); else $this->info_tag_transform[$k] = $v; @@ -197,7 +316,7 @@ class HTMLPurifier_HTMLDefinition } } - $this->info = $this->manager->getElements($this->config); + $this->info = $this->manager->getElements(); $this->info_content_sets = $this->manager->contentSets->lookup; } @@ -205,9 +324,9 @@ class HTMLPurifier_HTMLDefinition /** * Sets up stuff based on config. We need a better way of doing this. */ - function setupConfigStuff() { + function setupConfigStuff($config) { - $block_wrapper = $this->config->get('HTML', 'BlockWrapper'); + $block_wrapper = $config->get('HTML', 'BlockWrapper'); if (isset($this->info_content_sets['Block'][$block_wrapper])) { $this->info_block_wrapper = $block_wrapper; } else { @@ -215,24 +334,33 @@ class HTMLPurifier_HTMLDefinition E_USER_ERROR); } - $parent = $this->config->get('HTML', 'Parent'); - $def = $this->manager->getElement($parent, $this->config); + $parent = $config->get('HTML', 'Parent'); + $def = $this->manager->getElement($parent, true); if ($def) { $this->info_parent = $parent; $this->info_parent_def = $def; } else { trigger_error('Cannot use unrecognized element as parent.', E_USER_ERROR); - $this->info_parent_def = $this->manager->getElement( - $this->info_parent, $this->config); + $this->info_parent_def = $this->manager->getElement($this->info_parent, true); } // support template text $support = "(for information on implementing this, see the ". "support forums) "; - // setup allowed elements, SubtractiveWhitelist module - $allowed_elements = $this->config->get('HTML', 'AllowedElements'); + // setup allowed elements + + $allowed_elements = $config->get('HTML', 'AllowedElements'); + $allowed_attributes = $config->get('HTML', 'AllowedAttributes'); + + if (!is_array($allowed_elements) && !is_array($allowed_attributes)) { + $allowed = $config->get('HTML', 'Allowed'); + if (is_string($allowed)) { + list($allowed_elements, $allowed_attributes) = $this->parseTinyMCEAllowedList($allowed); + } + } + if (is_array($allowed_elements)) { foreach ($this->info as $name => $d) { if(!isset($allowed_elements[$name])) unset($this->info[$name]); @@ -240,11 +368,11 @@ class HTMLPurifier_HTMLDefinition } // emit errors foreach ($allowed_elements as $element => $d) { + $element = htmlspecialchars($element); trigger_error("Element '$element' is not supported $support", E_USER_WARNING); } } - $allowed_attributes = $this->config->get('HTML', 'AllowedAttributes'); $allowed_attributes_mutable = $allowed_attributes; // by copy! if (is_array($allowed_attributes)) { foreach ($this->info_global_attr as $attr_key => $info) { @@ -271,6 +399,8 @@ class HTMLPurifier_HTMLDefinition // emit errors foreach ($allowed_attributes_mutable as $elattr => $d) { list($element, $attribute) = explode('.', $elattr); + $element = htmlspecialchars($element); + $attribute = htmlspecialchars($attribute); if ($element == '*') { trigger_error("Global attribute '$attribute' is not ". "supported in any elements $support", @@ -284,7 +414,43 @@ class HTMLPurifier_HTMLDefinition } + /** + * Parses a TinyMCE-flavored Allowed Elements and Attributes list into + * separate lists for processing. Format is element[attr1|attr2],element2... + * @warning Although it's largely drawn from TinyMCE's implementation, + * it is different, and you'll probably have to modify your lists + * @param $list String list to parse + * @param array($allowed_elements, $allowed_attributes) + */ + function parseTinyMCEAllowedList($list) { + + $elements = array(); + $attributes = array(); + + $chunks = preg_split('/(,|[\n\r]+)/', $list); + foreach ($chunks as $chunk) { + if (empty($chunk)) continue; + // remove TinyMCE element control characters + if (!strpos($chunk, '[')) { + $element = $chunk; + $attr = false; + } else { + list($element, $attr) = explode('[', $chunk); + } + if ($element !== '*') $elements[$element] = true; + if (!$attr) continue; + $attr = substr($attr, 0, strlen($attr) - 1); // remove trailing ] + $attr = explode('|', $attr); + foreach ($attr as $key) { + $attributes["$element.$key"] = true; + } + } + + return array($elements, $attributes); + + } + } -?> + diff --git a/lib/htmlpurifier/HTMLPurifier/HTMLModule.php b/lib/htmlpurifier/HTMLPurifier/HTMLModule.php index 930b605d11..077daff88e 100644 --- a/lib/htmlpurifier/HTMLPurifier/HTMLModule.php +++ b/lib/htmlpurifier/HTMLPurifier/HTMLModule.php @@ -16,16 +16,14 @@ class HTMLPurifier_HTMLModule { + + // -- Overloadable ---------------------------------------------------- + /** * Short unique string identifier of the module */ var $name; - /** - * Dynamically set integer that specifies when the module was loaded in. - */ - var $order; - /** * Informally, a list of elements this module changes. Not used in * any significant way. @@ -99,27 +97,127 @@ class HTMLPurifier_HTMLModule */ function getChildDef($def) {return false;} + // -- Convenience ----------------------------------------------------- + + /** + * Convenience function that sets up a new element + * @param $element Name of element to add + * @param $safe Is element safe for untrusted users to use? + * @param $type What content set should element be registered to? + * Set as false to skip this step. + * @param $contents Allowed children in form of: + * "$content_model_type: $content_model" + * @param $attr_includes What attribute collections to register to + * element? + * @param $attr What unique attributes does the element define? + * @note See ElementDef for in-depth descriptions of these parameters. + * @return Reference to created element definition object, so you + * can set advanced parameters + * @protected + */ + function &addElement($element, $safe, $type, $contents, $attr_includes = array(), $attr = array()) { + $this->elements[] = $element; + // parse content_model + list($content_model_type, $content_model) = $this->parseContents($contents); + // merge in attribute inclusions + $this->mergeInAttrIncludes($attr, $attr_includes); + // add element to content sets + if ($type) $this->addElementToContentSet($element, $type); + // create element + $this->info[$element] = HTMLPurifier_ElementDef::create( + $safe, $content_model, $content_model_type, $attr + ); + // literal object $contents means direct child manipulation + if (!is_string($contents)) $this->info[$element]->child = $contents; + return $this->info[$element]; + } + + /** + * Convenience function that creates a totally blank, non-standalone + * element. + * @param $element Name of element to create + * @return Reference to created element + */ + function &addBlankElement($element) { + if (!isset($this->info[$element])) { + $this->elements[] = $element; + $this->info[$element] = new HTMLPurifier_ElementDef(); + $this->info[$element]->standalone = false; + } else { + trigger_error("Definition for $element already exists in module, cannot redefine"); + } + return $this->info[$element]; + } + /** - * Hook method that lets module perform arbitrary operations on - * HTMLPurifier_HTMLDefinition before the module gets processed. - * @param $definition Reference to HTMLDefinition being setup + * Convenience function that registers an element to a content set + * @param Element to register + * @param Name content set (warning: case sensitive, usually upper-case + * first letter) + * @protected */ - function preProcess(&$definition) {} + function addElementToContentSet($element, $type) { + if (!isset($this->content_sets[$type])) $this->content_sets[$type] = ''; + else $this->content_sets[$type] .= ' | '; + $this->content_sets[$type] .= $element; + } /** - * Hook method that lets module perform arbitrary operations - * on HTMLPurifier_HTMLDefinition after the module gets processed. - * @param $definition Reference to HTMLDefinition being setup + * Convenience function that transforms single-string contents + * into separate content model and content model type + * @param $contents Allowed children in form of: + * "$content_model_type: $content_model" + * @note If contents is an object, an array of two nulls will be + * returned, and the callee needs to take the original $contents + * and use it directly. */ - function postProcess(&$definition) {} + function parseContents($contents) { + if (!is_string($contents)) return array(null, null); // defer + switch ($contents) { + // check for shorthand content model forms + case 'Empty': + return array('empty', ''); + case 'Inline': + return array('optional', 'Inline | #PCDATA'); + case 'Flow': + return array('optional', 'Flow | #PCDATA'); + } + list($content_model_type, $content_model) = explode(':', $contents); + $content_model_type = strtolower(trim($content_model_type)); + $content_model = trim($content_model); + return array($content_model_type, $content_model); + } /** - * Hook method that is called when a module gets registered to - * the definition. - * @param $definition Reference to HTMLDefinition being setup + * Convenience function that merges a list of attribute includes into + * an attribute array. + * @param $attr Reference to attr array to modify + * @param $attr_includes Array of includes / string include to merge in */ - function setup(&$definition) {} + function mergeInAttrIncludes(&$attr, $attr_includes) { + if (!is_array($attr_includes)) { + if (empty($attr_includes)) $attr_includes = array(); + else $attr_includes = array($attr_includes); + } + $attr[0] = $attr_includes; + } + /** + * Convenience function that generates a lookup table with boolean + * true as value. + * @param $list List of values to turn into a lookup + * @note You can also pass an arbitrary number of arguments in + * place of the regular argument + * @return Lookup array equivalent of list + */ + function makeLookup($list) { + if (is_string($list)) $list = func_get_args(); + $ret = array(); + foreach ($list as $value) { + if (is_null($value)) continue; + $ret[$value] = true; + } + return $ret; + } } -?> \ No newline at end of file diff --git a/lib/htmlpurifier/HTMLPurifier/HTMLModule/Bdo.php b/lib/htmlpurifier/HTMLPurifier/HTMLModule/Bdo.php index 6feae0050d..2d9dffb622 100644 --- a/lib/htmlpurifier/HTMLPurifier/HTMLModule/Bdo.php +++ b/lib/htmlpurifier/HTMLPurifier/HTMLModule/Bdo.php @@ -11,32 +11,23 @@ class HTMLPurifier_HTMLModule_Bdo extends HTMLPurifier_HTMLModule { var $name = 'Bdo'; - var $elements = array('bdo'); - var $content_sets = array('Inline' => 'bdo'); var $attr_collections = array( 'I18N' => array('dir' => false) ); function HTMLPurifier_HTMLModule_Bdo() { - $dir = new HTMLPurifier_AttrDef_Enum(array('ltr','rtl'), false); - $this->attr_collections['I18N']['dir'] = $dir; - $this->info['bdo'] = new HTMLPurifier_ElementDef(); - $this->info['bdo']->attr = array( - 0 => array('Core', 'Lang'), - 'dir' => $dir, // required - // The Abstract Module specification has the attribute - // inclusions wrong for bdo: bdo allows - // xml:lang too (and we'll toss in lang for good measure, - // though it is not allowed for XHTML 1.1, this will - // be managed with a global attribute transform) + $bdo =& $this->addElement( + 'bdo', true, 'Inline', 'Inline', array('Core', 'Lang'), + array( + 'dir' => 'Enum#ltr,rtl', // required + // The Abstract Module specification has the attribute + // inclusions wrong for bdo: bdo allows Lang + ) ); - $this->info['bdo']->content_model = '#PCDATA | Inline'; - $this->info['bdo']->content_model_type = 'optional'; - // provides fallback behavior if dir's missing (dir is required) - $this->info['bdo']->attr_transform_post['required-dir'] = - new HTMLPurifier_AttrTransform_BdoDir(); + $bdo->attr_transform_post['required-dir'] = new HTMLPurifier_AttrTransform_BdoDir(); + + $this->attr_collections['I18N']['dir'] = 'Enum#ltr,rtl'; } } -?> \ No newline at end of file diff --git a/lib/htmlpurifier/HTMLPurifier/HTMLModule/CommonAttributes.php b/lib/htmlpurifier/HTMLPurifier/HTMLModule/CommonAttributes.php index 8f17c2f0a3..7e291c4ac5 100644 --- a/lib/htmlpurifier/HTMLPurifier/HTMLModule/CommonAttributes.php +++ b/lib/htmlpurifier/HTMLPurifier/HTMLModule/CommonAttributes.php @@ -1,5 +1,7 @@ 'ID', 'title' => 'CDATA', ), - 'Lang' => array( - 'xml:lang' => false, // see constructor - ), + 'Lang' => array(), 'I18N' => array( 0 => array('Lang'), // proprietary, for xml:lang/lang ), @@ -22,10 +22,5 @@ class HTMLPurifier_HTMLModule_CommonAttributes extends HTMLPurifier_HTMLModule 0 => array('Core', 'I18N') ) ); - - function HTMLPurifier_HTMLModule_CommonAttributes() { - $this->attr_collections['Lang']['xml:lang'] = new HTMLPurifier_AttrDef_Lang(); - } } -?> \ No newline at end of file diff --git a/lib/htmlpurifier/HTMLPurifier/HTMLModule/Edit.php b/lib/htmlpurifier/HTMLPurifier/HTMLModule/Edit.php index c3dc019700..37a53c337a 100644 --- a/lib/htmlpurifier/HTMLPurifier/HTMLModule/Edit.php +++ b/lib/htmlpurifier/HTMLPurifier/HTMLModule/Edit.php @@ -11,28 +11,24 @@ class HTMLPurifier_HTMLModule_Edit extends HTMLPurifier_HTMLModule { var $name = 'Edit'; - var $elements = array('del', 'ins'); - var $content_sets = array('Inline' => 'del | ins'); function HTMLPurifier_HTMLModule_Edit() { - foreach ($this->elements as $element) { - $this->info[$element] = new HTMLPurifier_ElementDef(); - $this->info[$element]->attr = array( - 0 => array('Common'), - 'cite' => 'URI', - // 'datetime' => 'Datetime' // Datetime not implemented - ); - // Inline context ! Block context (exclamation mark is - // separator, see getChildDef for parsing) - $this->info[$element]->content_model = - '#PCDATA | Inline ! #PCDATA | Flow'; - // HTML 4.01 specifies that ins/del must not contain block - // elements when used in an inline context, chameleon is - // a complicated workaround to acheive this effect - $this->info[$element]->content_model_type = 'chameleon'; - } + $contents = 'Chameleon: #PCDATA | Inline ! #PCDATA | Flow'; + $attr = array( + 'cite' => 'URI', + // 'datetime' => 'Datetime', // not implemented + ); + $this->addElement('del', true, 'Inline', $contents, 'Common', $attr); + $this->addElement('ins', true, 'Inline', $contents, 'Common', $attr); } + // HTML 4.01 specifies that ins/del must not contain block + // elements when used in an inline context, chameleon is + // a complicated workaround to acheive this effect + + // Inline context ! Block context (exclamation mark is + // separator, see getChildDef for parsing) + var $defines_child_def = true; function getChildDef($def) { if ($def->content_model_type != 'chameleon') return false; @@ -42,4 +38,3 @@ class HTMLPurifier_HTMLModule_Edit extends HTMLPurifier_HTMLModule } -?> \ No newline at end of file diff --git a/lib/htmlpurifier/HTMLPurifier/HTMLModule/Hypertext.php b/lib/htmlpurifier/HTMLPurifier/HTMLModule/Hypertext.php index baa20fd14b..74aa692998 100644 --- a/lib/htmlpurifier/HTMLPurifier/HTMLModule/Hypertext.php +++ b/lib/htmlpurifier/HTMLPurifier/HTMLModule/Hypertext.php @@ -10,27 +10,23 @@ class HTMLPurifier_HTMLModule_Hypertext extends HTMLPurifier_HTMLModule { var $name = 'Hypertext'; - var $elements = array('a'); - var $content_sets = array('Inline' => 'a'); function HTMLPurifier_HTMLModule_Hypertext() { - $this->info['a'] = new HTMLPurifier_ElementDef(); - $this->info['a']->attr = array( - 0 => array('Common'), - // 'accesskey' => 'Character', - // 'charset' => 'Charset', - 'href' => 'URI', - //'hreflang' => 'LanguageCode', - 'rel' => new HTMLPurifier_AttrDef_HTML_LinkTypes('rel'), - 'rev' => new HTMLPurifier_AttrDef_HTML_LinkTypes('rev'), - //'tabindex' => 'Number', - //'type' => 'ContentType', + $a =& $this->addElement( + 'a', true, 'Inline', 'Inline', 'Common', + array( + // 'accesskey' => 'Character', + // 'charset' => 'Charset', + 'href' => 'URI', + // 'hreflang' => 'LanguageCode', + 'rel' => new HTMLPurifier_AttrDef_HTML_LinkTypes('rel'), + 'rev' => new HTMLPurifier_AttrDef_HTML_LinkTypes('rev'), + // 'tabindex' => 'Number', + // 'type' => 'ContentType', + ) ); - $this->info['a']->content_model = '#PCDATA | Inline'; - $this->info['a']->content_model_type = 'optional'; - $this->info['a']->excludes = array('a' => true); + $a->excludes = array('a' => true); } } -?> \ No newline at end of file diff --git a/lib/htmlpurifier/HTMLPurifier/HTMLModule/Image.php b/lib/htmlpurifier/HTMLPurifier/HTMLModule/Image.php index bf234b1372..64ce2a09a6 100644 --- a/lib/htmlpurifier/HTMLPurifier/HTMLModule/Image.php +++ b/lib/htmlpurifier/HTMLPurifier/HTMLModule/Image.php @@ -14,24 +14,23 @@ class HTMLPurifier_HTMLModule_Image extends HTMLPurifier_HTMLModule { var $name = 'Image'; - var $elements = array('img'); - var $content_sets = array('Inline' => 'img'); function HTMLPurifier_HTMLModule_Image() { - $this->info['img'] = new HTMLPurifier_ElementDef(); - $this->info['img']->attr = array( - 0 => array('Common'), - 'alt' => 'Text', - 'height' => 'Length', - 'longdesc' => 'URI', - 'src' => new HTMLPurifier_AttrDef_URI(true), // embedded - 'width' => 'Length' + $img =& $this->addElement( + 'img', true, 'Inline', 'Empty', 'Common', + array( + 'alt*' => 'Text', + 'height' => 'Length', + 'longdesc' => 'URI', + 'src*' => new HTMLPurifier_AttrDef_URI(true), // embedded + 'width' => 'Length' + ) ); - $this->info['img']->content_model_type = 'empty'; - $this->info['img']->attr_transform_post[] = + // kind of strange, but splitting things up would be inefficient + $img->attr_transform_pre[] = + $img->attr_transform_post[] = new HTMLPurifier_AttrTransform_ImgRequired(); } } -?> \ No newline at end of file diff --git a/lib/htmlpurifier/HTMLPurifier/HTMLModule/Legacy.php b/lib/htmlpurifier/HTMLPurifier/HTMLModule/Legacy.php index a0613a2f7e..f702b58154 100644 --- a/lib/htmlpurifier/HTMLPurifier/HTMLModule/Legacy.php +++ b/lib/htmlpurifier/HTMLPurifier/HTMLModule/Legacy.php @@ -1,5 +1,7 @@ elements as $name) { - $this->info[$name] = new HTMLPurifier_ElementDef(); - // for u, s, strike, as more elements get added, add - // conditionals as necessary - $this->info[$name]->content_model = 'Inline | #PCDATA'; - $this->info[$name]->content_model_type = 'optional'; - $this->info[$name]->attr[0] = array('Common'); - } + + $this->addElement('basefont', true, 'Inline', 'Empty', false, array( + 'color' => 'Color', + 'face' => 'Text', // extremely broad, we should + 'size' => 'Text', // tighten it + 'id' => 'ID' + )); + $this->addElement('center', true, 'Block', 'Flow', 'Common'); + $this->addElement('dir', true, 'Block', 'Required: li', 'Common', array( + 'compact' => 'Bool#compact' + )); + $this->addElement('font', true, 'Inline', 'Inline', array('Core', 'I18N'), array( + 'color' => 'Color', + 'face' => 'Text', // extremely broad, we should + 'size' => 'Text', // tighten it + )); + $this->addElement('menu', true, 'Block', 'Required: li', 'Common', array( + 'compact' => 'Bool#compact' + )); + $this->addElement('s', true, 'Inline', 'Inline', 'Common'); + $this->addElement('strike', true, 'Inline', 'Inline', 'Common'); + $this->addElement('u', true, 'Inline', 'Inline', 'Common'); // setup modifications to old elements - foreach ($this->non_standalone_elements as $name) { - $this->info[$name] = new HTMLPurifier_ElementDef(); - $this->info[$name]->standalone = false; + + $align = 'Enum#left,right,center,justify'; + + $address =& $this->addBlankElement('address'); + $address->content_model = 'Inline | #PCDATA | p'; + $address->content_model_type = 'optional'; + $address->child = false; + + $blockquote =& $this->addBlankElement('blockquote'); + $blockquote->content_model = 'Flow | #PCDATA'; + $blockquote->content_model_type = 'optional'; + $blockquote->child = false; + + $br =& $this->addBlankElement('br'); + $br->attr['clear'] = 'Enum#left,all,right,none'; + + $caption =& $this->addBlankElement('caption'); + $caption->attr['align'] = 'Enum#top,bottom,left,right'; + + $div =& $this->addBlankElement('div'); + $div->attr['align'] = $align; + + $dl =& $this->addBlankElement('dl'); + $dl->attr['compact'] = 'Bool#compact'; + + for ($i = 1; $i <= 6; $i++) { + $h =& $this->addBlankElement("h$i"); + $h->attr['align'] = $align; } - $this->info['li']->attr['value'] = new HTMLPurifier_AttrDef_Integer(); - $this->info['ol']->attr['start'] = new HTMLPurifier_AttrDef_Integer(); + $hr =& $this->addBlankElement('hr'); + $hr->attr['align'] = $align; + $hr->attr['noshade'] = 'Bool#noshade'; + $hr->attr['size'] = 'Pixels'; + $hr->attr['width'] = 'Length'; + + $img =& $this->addBlankElement('img'); + $img->attr['align'] = 'Enum#top,middle,bottom,left,right'; + $img->attr['border'] = 'Pixels'; + $img->attr['hspace'] = 'Pixels'; + $img->attr['vspace'] = 'Pixels'; + + // figure out this integer business + + $li =& $this->addBlankElement('li'); + $li->attr['value'] = new HTMLPurifier_AttrDef_Integer(); + $li->attr['type'] = 'Enum#s:1,i,I,a,A,disc,square,circle'; + + $ol =& $this->addBlankElement('ol'); + $ol->attr['compact'] = 'Bool#compact'; + $ol->attr['start'] = new HTMLPurifier_AttrDef_Integer(); + $ol->attr['type'] = 'Enum#s:1,i,I,a,A'; + + $p =& $this->addBlankElement('p'); + $p->attr['align'] = $align; + + $pre =& $this->addBlankElement('pre'); + $pre->attr['width'] = 'Number'; + + // script omitted + + $table =& $this->addBlankElement('table'); + $table->attr['align'] = 'Enum#left,center,right'; + $table->attr['bgcolor'] = 'Color'; + + $tr =& $this->addBlankElement('tr'); + $tr->attr['bgcolor'] = 'Color'; + + $th =& $this->addBlankElement('th'); + $th->attr['bgcolor'] = 'Color'; + $th->attr['height'] = 'Length'; + $th->attr['nowrap'] = 'Bool#nowrap'; + $th->attr['width'] = 'Length'; - $this->info['address']->content_model = 'Inline | #PCDATA | p'; - $this->info['address']->content_model_type = 'optional'; - $this->info['address']->child = false; + $td =& $this->addBlankElement('td'); + $td->attr['bgcolor'] = 'Color'; + $td->attr['height'] = 'Length'; + $td->attr['nowrap'] = 'Bool#nowrap'; + $td->attr['width'] = 'Length'; - $this->info['blockquote']->content_model = 'Flow | #PCDATA'; - $this->info['blockquote']->content_model_type = 'optional'; - $this->info['blockquote']->child = false; + $ul =& $this->addBlankElement('ul'); + $ul->attr['compact'] = 'Bool#compact'; + $ul->attr['type'] = 'Enum#square,disc,circle'; } } -?> \ No newline at end of file diff --git a/lib/htmlpurifier/HTMLPurifier/HTMLModule/List.php b/lib/htmlpurifier/HTMLPurifier/HTMLModule/List.php index f9f2c4e21f..dea99f36d4 100644 --- a/lib/htmlpurifier/HTMLPurifier/HTMLModule/List.php +++ b/lib/htmlpurifier/HTMLPurifier/HTMLModule/List.php @@ -9,7 +9,6 @@ class HTMLPurifier_HTMLModule_List extends HTMLPurifier_HTMLModule { var $name = 'List'; - var $elements = array('dl', 'dt', 'dd', 'ol', 'ul', 'li'); // According to the abstract schema, the List content set is a fully formed // one or more expr, but it invariably occurs in an optional declaration @@ -19,28 +18,19 @@ class HTMLPurifier_HTMLModule_List extends HTMLPurifier_HTMLModule // Furthermore, the actual XML Schema may disagree. Regardless, // we don't have support for such nested expressions without using // the incredibly inefficient and draconic Custom ChildDef. - var $content_sets = array('List' => 'dl | ol | ul', 'Flow' => 'List'); + + var $content_sets = array('Flow' => 'List'); function HTMLPurifier_HTMLModule_List() { - foreach ($this->elements as $element) { - $this->info[$element] = new HTMLPurifier_ElementDef(); - $this->info[$element]->attr = array(0 => array('Common')); - if ($element == 'li' || $element == 'dd') { - $this->info[$element]->content_model = '#PCDATA | Flow'; - $this->info[$element]->content_model_type = 'optional'; - } elseif ($element == 'ol' || $element == 'ul') { - $this->info[$element]->content_model = 'li'; - $this->info[$element]->content_model_type = 'required'; - } - } - $this->info['dt']->content_model = '#PCDATA | Inline'; - $this->info['dt']->content_model_type = 'optional'; - $this->info['dl']->content_model = 'dt | dd'; - $this->info['dl']->content_model_type = 'required'; - // this could be a LOT more robust - $this->info['li']->auto_close = array('li' => true); + $this->addElement('ol', true, 'List', 'Required: li', 'Common'); + $this->addElement('ul', true, 'List', 'Required: li', 'Common'); + $this->addElement('dl', true, 'List', 'Required: dt | dd', 'Common'); + + $this->addElement('li', true, false, 'Flow', 'Common'); + + $this->addElement('dd', true, false, 'Flow', 'Common'); + $this->addElement('dt', true, false, 'Inline', 'Common'); } } -?> \ No newline at end of file diff --git a/lib/htmlpurifier/HTMLPurifier/HTMLModule/NonXMLCommonAttributes.php b/lib/htmlpurifier/HTMLPurifier/HTMLModule/NonXMLCommonAttributes.php new file mode 100644 index 0000000000..6d20997567 --- /dev/null +++ b/lib/htmlpurifier/HTMLPurifier/HTMLModule/NonXMLCommonAttributes.php @@ -0,0 +1,15 @@ + array( + 'lang' => 'LanguageCode', + ) + ); +} + diff --git a/lib/htmlpurifier/HTMLPurifier/HTMLModule/Presentation.php b/lib/htmlpurifier/HTMLPurifier/HTMLModule/Presentation.php index 5c80db407b..9e483dc153 100644 --- a/lib/htmlpurifier/HTMLPurifier/HTMLModule/Presentation.php +++ b/lib/htmlpurifier/HTMLPurifier/HTMLModule/Presentation.php @@ -16,25 +16,17 @@ class HTMLPurifier_HTMLModule_Presentation extends HTMLPurifier_HTMLModule { var $name = 'Presentation'; - var $elements = array('b', 'big', 'hr', 'i', 'small', 'sub', 'sup', 'tt'); - var $content_sets = array( - 'Block' => 'hr', - 'Inline' => 'b | big | i | small | sub | sup | tt' - ); function HTMLPurifier_HTMLModule_Presentation() { - foreach ($this->elements as $element) { - $this->info[$element] = new HTMLPurifier_ElementDef(); - $this->info[$element]->attr = array(0 => array('Common')); - if ($element == 'hr') { - $this->info[$element]->content_model_type = 'empty'; - } else { - $this->info[$element]->content_model = '#PCDATA | Inline'; - $this->info[$element]->content_model_type = 'optional'; - } - } + $this->addElement('b', true, 'Inline', 'Inline', 'Common'); + $this->addElement('big', true, 'Inline', 'Inline', 'Common'); + $this->addElement('hr', true, 'Block', 'Empty', 'Common'); + $this->addElement('i', true, 'Inline', 'Inline', 'Common'); + $this->addElement('small', true, 'Inline', 'Inline', 'Common'); + $this->addElement('sub', true, 'Inline', 'Inline', 'Common'); + $this->addElement('sup', true, 'Inline', 'Inline', 'Common'); + $this->addElement('tt', true, 'Inline', 'Inline', 'Common'); } } -?> \ No newline at end of file diff --git a/lib/htmlpurifier/HTMLPurifier/HTMLModule/Ruby.php b/lib/htmlpurifier/HTMLPurifier/HTMLModule/Ruby.php new file mode 100644 index 0000000000..f54324468d --- /dev/null +++ b/lib/htmlpurifier/HTMLPurifier/HTMLModule/Ruby.php @@ -0,0 +1,28 @@ +addElement('ruby', true, 'Inline', + 'Custom: ((rb, (rt | (rp, rt, rp))) | (rbc, rtc, rtc?))', + 'Common'); + $this->addElement('rbc', true, false, 'Required: rb', 'Common'); + $this->addElement('rtc', true, false, 'Required: rt', 'Common'); + $rb =& $this->addElement('rb', true, false, 'Inline', 'Common'); + $rb->excludes = array('ruby' => true); + $rt =& $this->addElement('rt', true, false, 'Inline', 'Common', array('rbspan' => 'Number')); + $rt->excludes = array('ruby' => true); + $this->addElement('rp', true, false, 'Optional: #PCDATA', 'Common'); + } + +} + diff --git a/lib/htmlpurifier/HTMLPurifier/HTMLModule/Scripting.php b/lib/htmlpurifier/HTMLPurifier/HTMLModule/Scripting.php index e3ef802bf4..d9f9db1a4d 100644 --- a/lib/htmlpurifier/HTMLPurifier/HTMLModule/Scripting.php +++ b/lib/htmlpurifier/HTMLPurifier/HTMLModule/Scripting.php @@ -5,14 +5,6 @@ WARNING: THIS MODULE IS EXTREMELY DANGEROUS AS IT ENABLES INLINE SCRIPTING INSIDE HTML PURIFIER DOCUMENTS. USE ONLY WITH TRUSTED USER INPUT!!! -Usage: - -require_once 'HTMLPurifier/HTMLModule/Scripting.php'; -$def =& $config->getHTMLDefinition(true); // get the raw version -$def->manager->addModule('Scripting'); - -This must come before any other calls to getHTMLDefinition() - */ /** @@ -46,8 +38,12 @@ class HTMLPurifier_HTMLModule_Scripting extends HTMLPurifier_HTMLModule // blockquote's custom definition (we would use it but // blockquote's contents are optional while noscript's contents // are required) + + // TODO: convert this to new syntax, main problem is getting + // both content sets working foreach ($this->elements as $element) { $this->info[$element] = new HTMLPurifier_ElementDef(); + $this->info[$element]->safe = false; } $this->info['noscript']->attr = array( 0 => array('Common') ); $this->info['noscript']->content_model = 'Heading | List | Block'; @@ -59,9 +55,9 @@ class HTMLPurifier_HTMLModule_Scripting extends HTMLPurifier_HTMLModule ); $this->info['script']->content_model = '#PCDATA'; $this->info['script']->content_model_type = 'optional'; + $this->info['script']->attr_transform_pre['type'] = $this->info['script']->attr_transform_post['type'] = new HTMLPurifier_AttrTransform_ScriptRequired(); } } -?> \ No newline at end of file diff --git a/lib/htmlpurifier/HTMLPurifier/HTMLModule/StyleAttribute.php b/lib/htmlpurifier/HTMLPurifier/HTMLModule/StyleAttribute.php index 5ee5d1cf65..d121d7405b 100644 --- a/lib/htmlpurifier/HTMLPurifier/HTMLModule/StyleAttribute.php +++ b/lib/htmlpurifier/HTMLPurifier/HTMLModule/StyleAttribute.php @@ -24,4 +24,3 @@ class HTMLPurifier_HTMLModule_StyleAttribute extends HTMLPurifier_HTMLModule } -?> \ No newline at end of file diff --git a/lib/htmlpurifier/HTMLPurifier/HTMLModule/Tables.php b/lib/htmlpurifier/HTMLPurifier/HTMLModule/Tables.php index 003ff62487..2b2d41ce28 100644 --- a/lib/htmlpurifier/HTMLPurifier/HTMLModule/Tables.php +++ b/lib/htmlpurifier/HTMLPurifier/HTMLModule/Tables.php @@ -10,78 +10,59 @@ class HTMLPurifier_HTMLModule_Tables extends HTMLPurifier_HTMLModule { var $name = 'Tables'; - var $elements = array('caption', 'table', 'td', 'th', 'tr', 'col', - 'colgroup', 'tbody', 'thead', 'tfoot'); - var $content_sets = array('Block' => 'table'); function HTMLPurifier_HTMLModule_Tables() { - foreach ($this->elements as $e) { - $this->info[$e] = new HTMLPurifier_ElementDef(); - $this->info[$e]->attr = array(0 => array('Common')); - $attr =& $this->info[$e]->attr; - if ($e == 'caption') continue; - if ($e == 'table'){ - $attr['border'] = 'Pixels'; - $attr['cellpadding'] = 'Length'; - $attr['cellspacing'] = 'Length'; - $attr['frame'] = new HTMLPurifier_AttrDef_Enum(array( - 'void', 'above', 'below', 'hsides', 'lhs', 'rhs', - 'vsides', 'box', 'border' - ), false); - $attr['rules'] = new HTMLPurifier_AttrDef_Enum(array( - 'none', 'groups', 'rows', 'cols', 'all' - ), false); - $attr['summary'] = 'Text'; - $attr['width'] = 'Length'; - continue; - } - if ($e == 'col' || $e == 'colgroup') { - $attr['span'] = 'Number'; - $attr['width'] = 'MultiLength'; - } - if ($e == 'td' || $e == 'th') { - $attr['abbr'] = 'Text'; - $attr['colspan'] = 'Number'; - $attr['rowspan'] = 'Number'; - } - $attr['align'] = new HTMLPurifier_AttrDef_Enum(array( - 'left', 'center', 'right', 'justify', 'char' - ), false); - $attr['valign'] = new HTMLPurifier_AttrDef_Enum(array( - 'top', 'middle', 'bottom', 'baseline' - ), false); - $attr['charoff'] = 'Length'; - } - $this->info['caption']->content_model = '#PCDATA | Inline'; - $this->info['caption']->content_model_type = 'optional'; - // Is done directly because it doesn't leverage substitution - // mechanisms. True model is: - // 'caption?, ( col* | colgroup* ), (( thead?, tfoot?, tbody+ ) | ( tr+ ))' - $this->info['table']->child = new HTMLPurifier_ChildDef_Table(); + $this->addElement('caption', true, false, 'Inline', 'Common'); - $this->info['td']->content_model = - $this->info['th']->content_model = '#PCDATA | Flow'; - $this->info['td']->content_model_type = - $this->info['th']->content_model_type = 'optional'; + $this->addElement('table', true, 'Block', + new HTMLPurifier_ChildDef_Table(), 'Common', + array( + 'border' => 'Pixels', + 'cellpadding' => 'Length', + 'cellspacing' => 'Length', + 'frame' => 'Enum#void,above,below,hsides,lhs,rhs,vsides,box,border', + 'rules' => 'Enum#none,groups,rows,cols,all', + 'summary' => 'Text', + 'width' => 'Length' + ) + ); - $this->info['tr']->content_model = 'td | th'; - $this->info['tr']->content_model_type = 'required'; + // common attributes + $cell_align = array( + 'align' => 'Enum#left,center,right,justify,char', + 'charoff' => 'Length', + 'valign' => 'Enum#top,middle,bottom,baseline', + ); - $this->info['col']->content_model_type = 'empty'; + $cell_t = array_merge( + array( + 'abbr' => 'Text', + 'colspan' => 'Number', + 'rowspan' => 'Number', + ), + $cell_align + ); + $this->addElement('td', true, false, 'Flow', 'Common', $cell_t); + $this->addElement('th', true, false, 'Flow', 'Common', $cell_t); - $this->info['colgroup']->content_model = 'col'; - $this->info['colgroup']->content_model_type = 'optional'; + $this->addElement('tr', true, false, 'Required: td | th', 'Common', $cell_align); - $this->info['tbody']->content_model = - $this->info['thead']->content_model = - $this->info['tfoot']->content_model = 'tr'; - $this->info['tbody']->content_model_type = - $this->info['thead']->content_model_type = - $this->info['tfoot']->content_model_type = 'required'; + $cell_col = array_merge( + array( + 'span' => 'Number', + 'width' => 'MultiLength', + ), + $cell_align + ); + $this->addElement('col', true, false, 'Empty', 'Common', $cell_col); + $this->addElement('colgroup', true, false, 'Optional: col', 'Common', $cell_col); + + $this->addElement('tbody', true, false, 'Required: tr', 'Common', $cell_align); + $this->addElement('thead', true, false, 'Required: tr', 'Common', $cell_align); + $this->addElement('tfoot', true, false, 'Required: tr', 'Common', $cell_align); } } -?> \ No newline at end of file diff --git a/lib/htmlpurifier/HTMLPurifier/HTMLModule/Target.php b/lib/htmlpurifier/HTMLPurifier/HTMLModule/Target.php index 1c2104bae8..57da9c3abe 100644 --- a/lib/htmlpurifier/HTMLPurifier/HTMLModule/Target.php +++ b/lib/htmlpurifier/HTMLPurifier/HTMLModule/Target.php @@ -9,13 +9,12 @@ class HTMLPurifier_HTMLModule_Target extends HTMLPurifier_HTMLModule { var $name = 'Target'; - var $elements = array('a'); function HTMLPurifier_HTMLModule_Target() { - foreach ($this->elements as $e) { - $this->info[$e] = new HTMLPurifier_ElementDef(); - $this->info[$e]->standalone = false; - $this->info[$e]->attr = array( + $elements = array('a'); + foreach ($elements as $name) { + $e =& $this->addBlankElement($name); + $e->attr = array( 'target' => new HTMLPurifier_AttrDef_HTML_FrameTarget() ); } @@ -23,4 +22,3 @@ class HTMLPurifier_HTMLModule_Target extends HTMLPurifier_HTMLModule } -?> \ No newline at end of file diff --git a/lib/htmlpurifier/HTMLPurifier/HTMLModule/Text.php b/lib/htmlpurifier/HTMLPurifier/HTMLModule/Text.php index 6f81dcf389..24a80dbc30 100644 --- a/lib/htmlpurifier/HTMLPurifier/HTMLModule/Text.php +++ b/lib/htmlpurifier/HTMLPurifier/HTMLModule/Text.php @@ -10,67 +10,62 @@ require_once 'HTMLPurifier/HTMLModule.php'; * - Block Structural (div, p) * - Inline Phrasal (abbr, acronym, cite, code, dfn, em, kbd, q, samp, strong, var) * - Inline Structural (br, span) - * We have elected not to follow suite, but this may change. + * This module, functionally, does not distinguish between these + * sub-modules, but the code is internally structured to reflect + * these distinctions. */ class HTMLPurifier_HTMLModule_Text extends HTMLPurifier_HTMLModule { var $name = 'Text'; - - var $elements = array('abbr', 'acronym', 'address', 'blockquote', - 'br', 'cite', 'code', 'dfn', 'div', 'em', 'h1', 'h2', 'h3', - 'h4', 'h5', 'h6', 'kbd', 'p', 'pre', 'q', 'samp', 'span', 'strong', - 'var', 'nolink', 'tex', 'algebra'); //moodle modification - var $content_sets = array( - 'Heading' => 'h1 | h2 | h3 | h4 | h5 | h6', - 'Block' => 'address | blockquote | div | p | pre | nolink | tex | algebra', //moodle modification - 'Inline' => 'abbr | acronym | br | cite | code | dfn | em | kbd | q | samp | span | strong | var', 'Flow' => 'Heading | Block | Inline' ); function HTMLPurifier_HTMLModule_Text() { - foreach ($this->elements as $element) { - $this->info[$element] = new HTMLPurifier_ElementDef(); - // attributes - if ($element == 'br') { - $this->info[$element]->attr = array(0 => array('Core')); - } elseif ($element == 'blockquote' || $element == 'q') { - $this->info[$element]->attr = array(0 => array('Common'), 'cite' => 'URI'); - } else { - $this->info[$element]->attr = array(0 => array('Common')); - } - // content models - if ($element == 'br') { - $this->info[$element]->content_model_type = 'empty'; - } elseif ($element == 'blockquote') { - $this->info[$element]->content_model = 'Heading | Block | List'; - $this->info[$element]->content_model_type = 'optional'; - } elseif ($element == 'div') { - $this->info[$element]->content_model = '#PCDATA | Flow'; - $this->info[$element]->content_model_type = 'optional'; - } else { - $this->info[$element]->content_model = '#PCDATA | Inline'; - $this->info[$element]->content_model_type = 'optional'; - } - } - // SGML permits exclusions for all descendants, but this is - // not possible with DTDs or XML Schemas. W3C has elected to - // use complicated compositions of content_models to simulate - // exclusion for children, but we go the simpler, SGML-style - // route of flat-out exclusions. Note that the Abstract Module - // is blithely unaware of such distinctions. - $this->info['pre']->excludes = array_flip(array( - 'img', 'big', 'small', - 'object', 'applet', 'font', 'basefont' // generally not allowed - )); - $this->info['p']->auto_close = array_flip(array( - 'address', 'blockquote', 'dd', 'dir', 'div', 'dl', 'dt', - 'h1', 'h2', 'h3', 'h4', 'h5', 'h6', 'hr', 'ol', 'p', 'pre', - 'table', 'ul', 'nolink', 'tex', 'algebra' //moodle modification - )); + + // Inline Phrasal ------------------------------------------------- + $this->addElement('abbr', true, 'Inline', 'Inline', 'Common'); + $this->addElement('acronym', true, 'Inline', 'Inline', 'Common'); + $this->addElement('cite', true, 'Inline', 'Inline', 'Common'); + $this->addElement('code', true, 'Inline', 'Inline', 'Common'); + $this->addElement('dfn', true, 'Inline', 'Inline', 'Common'); + $this->addElement('em', true, 'Inline', 'Inline', 'Common'); + $this->addElement('kbd', true, 'Inline', 'Inline', 'Common'); + $this->addElement('q', true, 'Inline', 'Inline', 'Common', array('cite' => 'URI')); + $this->addElement('samp', true, 'Inline', 'Inline', 'Common'); + $this->addElement('strong', true, 'Inline', 'Inline', 'Common'); + $this->addElement('var', true, 'Inline', 'Inline', 'Common'); + + // Inline Structural ---------------------------------------------- + $this->addElement('span', true, 'Inline', 'Inline', 'Common'); + $this->addElement('br', true, 'Inline', 'Empty', 'Core'); + + // Moodle specific elements - start + $this->addElement('nolink', true, 'Inline', 'Flow'); + $this->addElement('tex', true, 'Inline', 'Flow'); + $this->addElement('algebra', true, 'Inline', 'Flow'); + $this->addElement('lang', true, 'Inline', 'Flow', 'I18N'); + // Moodle specific elements - end + + // Block Phrasal -------------------------------------------------- + $this->addElement('address', true, 'Block', 'Inline', 'Common'); + $this->addElement('blockquote', true, 'Block', 'Optional: Heading | Block | List', 'Common', array('cite' => 'URI') ); + $pre =& $this->addElement('pre', true, 'Block', 'Inline', 'Common'); + $pre->excludes = $this->makeLookup( + 'img', 'big', 'small', 'object', 'applet', 'font', 'basefont' ); + $this->addElement('h1', true, 'Heading', 'Inline', 'Common'); + $this->addElement('h2', true, 'Heading', 'Inline', 'Common'); + $this->addElement('h3', true, 'Heading', 'Inline', 'Common'); + $this->addElement('h4', true, 'Heading', 'Inline', 'Common'); + $this->addElement('h5', true, 'Heading', 'Inline', 'Common'); + $this->addElement('h6', true, 'Heading', 'Inline', 'Common'); + + // Block Structural ----------------------------------------------- + $this->addElement('p', true, 'Block', 'Inline', 'Common'); + $this->addElement('div', true, 'Block', 'Flow', 'Common'); + } } -?> \ No newline at end of file diff --git a/lib/htmlpurifier/HTMLPurifier/HTMLModule/Tidy.php b/lib/htmlpurifier/HTMLPurifier/HTMLModule/Tidy.php new file mode 100644 index 0000000000..411fd47bfe --- /dev/null +++ b/lib/htmlpurifier/HTMLPurifier/HTMLModule/Tidy.php @@ -0,0 +1,241 @@ +General level of cleanliness the Tidy module should enforce. +There are four allowed values:

+
+
none
+
No extra tidying should be done
+
light
+
Only fix elements that would be discarded otherwise due to + lack of support in doctype
+
medium
+
Enforce best practices
+
heavy
+
Transform all deprecated elements and attributes to standards + compliant equivalents
+
+

This directive has been available since 2.0.0

+' ); +HTMLPurifier_ConfigSchema::defineAllowedValues( + 'HTML', 'TidyLevel', array('none', 'light', 'medium', 'heavy') +); + +HTMLPurifier_ConfigSchema::define( + 'HTML', 'TidyAdd', array(), 'lookup', ' +Fixes to add to the default set of Tidy fixes as per your level. This +directive has been available since 2.0.0. +' ); + +HTMLPurifier_ConfigSchema::define( + 'HTML', 'TidyRemove', array(), 'lookup', ' +Fixes to remove from the default set of Tidy fixes as per your level. This +directive has been available since 2.0.0. +' ); + +/** + * Abstract class for a set of proprietary modules that clean up (tidy) + * poorly written HTML. + */ +class HTMLPurifier_HTMLModule_Tidy extends HTMLPurifier_HTMLModule +{ + + /** + * List of supported levels. Index zero is a special case "no fixes" + * level. + */ + var $levels = array(0 => 'none', 'light', 'medium', 'heavy'); + + /** + * Default level to place all fixes in. Disabled by default + */ + var $defaultLevel = null; + + /** + * Lists of fixes used by getFixesForLevel(). Format is: + * HTMLModule_Tidy->fixesForLevel[$level] = array('fix-1', 'fix-2'); + */ + var $fixesForLevel = array( + 'light' => array(), + 'medium' => array(), + 'heavy' => array() + ); + + /** + * Lazy load constructs the module by determining the necessary + * fixes to create and then delegating to the populate() function. + * @todo Wildcard matching and error reporting when an added or + * subtracted fix has no effect. + */ + function construct($config) { + + // create fixes, initialize fixesForLevel + $fixes = $this->makeFixes(); + $this->makeFixesForLevel($fixes); + + // figure out which fixes to use + $level = $config->get('HTML', 'TidyLevel'); + $fixes_lookup = $this->getFixesForLevel($level); + + // get custom fix declarations: these need namespace processing + $add_fixes = $config->get('HTML', 'TidyAdd'); + $remove_fixes = $config->get('HTML', 'TidyRemove'); + + foreach ($fixes as $name => $fix) { + // needs to be refactored a little to implement globbing + if ( + isset($remove_fixes[$name]) || + (!isset($add_fixes[$name]) && !isset($fixes_lookup[$name])) + ) { + unset($fixes[$name]); + } + } + + // populate this module with necessary fixes + $this->populate($fixes); + + } + + /** + * Retrieves all fixes per a level, returning fixes for that specific + * level as well as all levels below it. + * @param $level String level identifier, see $levels for valid values + * @return Lookup up table of fixes + */ + function getFixesForLevel($level) { + if ($level == $this->levels[0]) { + return array(); + } + $activated_levels = array(); + for ($i = 1, $c = count($this->levels); $i < $c; $i++) { + $activated_levels[] = $this->levels[$i]; + if ($this->levels[$i] == $level) break; + } + if ($i == $c) { + trigger_error( + 'Tidy level ' . htmlspecialchars($level) . ' not recognized', + E_USER_WARNING + ); + return array(); + } + $ret = array(); + foreach ($activated_levels as $level) { + foreach ($this->fixesForLevel[$level] as $fix) { + $ret[$fix] = true; + } + } + return $ret; + } + + /** + * Dynamically populates the $fixesForLevel member variable using + * the fixes array. It may be custom overloaded, used in conjunction + * with $defaultLevel, or not used at all. + */ + function makeFixesForLevel($fixes) { + if (!isset($this->defaultLevel)) return; + if (!isset($this->fixesForLevel[$this->defaultLevel])) { + trigger_error( + 'Default level ' . $this->defaultLevel . ' does not exist', + E_USER_ERROR + ); + return; + } + $this->fixesForLevel[$this->defaultLevel] = array_keys($fixes); + } + + /** + * Populates the module with transforms and other special-case code + * based on a list of fixes passed to it + * @param $lookup Lookup table of fixes to activate + */ + function populate($fixes) { + foreach ($fixes as $name => $fix) { + // determine what the fix is for + list($type, $params) = $this->getFixType($name); + switch ($type) { + case 'attr_transform_pre': + case 'attr_transform_post': + $attr = $params['attr']; + if (isset($params['element'])) { + $element = $params['element']; + if (empty($this->info[$element])) { + $e =& $this->addBlankElement($element); + } else { + $e =& $this->info[$element]; + } + } else { + $type = "info_$type"; + $e =& $this; + } + $f =& $e->$type; + $f[$attr] = $fix; + break; + case 'tag_transform': + $this->info_tag_transform[$params['element']] = $fix; + break; + case 'child': + case 'content_model_type': + $element = $params['element']; + if (empty($this->info[$element])) { + $e =& $this->addBlankElement($element); + } else { + $e =& $this->info[$element]; + } + $e->$type = $fix; + break; + default: + trigger_error("Fix type $type not supported", E_USER_ERROR); + break; + } + } + } + + /** + * Parses a fix name and determines what kind of fix it is, as well + * as other information defined by the fix + * @param $name String name of fix + * @return array(string $fix_type, array $fix_parameters) + * @note $fix_parameters is type dependant, see populate() for usage + * of these parameters + */ + function getFixType($name) { + // parse it + $property = $attr = null; + if (strpos($name, '#') !== false) list($name, $property) = explode('#', $name); + if (strpos($name, '@') !== false) list($name, $attr) = explode('@', $name); + + // figure out the parameters + $params = array(); + if ($name !== '') $params['element'] = $name; + if (!is_null($attr)) $params['attr'] = $attr; + + // special case: attribute transform + if (!is_null($attr)) { + if (is_null($property)) $property = 'pre'; + $type = 'attr_transform_' . $property; + return array($type, $params); + } + + // special case: tag transform + if (is_null($property)) { + return array('tag_transform', $params); + } + + return array($property, $params); + + } + + /** + * Defines all fixes the module will perform in a compact + * associative array of fix name to fix implementation. + * @abstract + */ + function makeFixes() {} + +} + + diff --git a/lib/htmlpurifier/HTMLPurifier/HTMLModule/Tidy/Proprietary.php b/lib/htmlpurifier/HTMLPurifier/HTMLModule/Tidy/Proprietary.php new file mode 100644 index 0000000000..3b4b116024 --- /dev/null +++ b/lib/htmlpurifier/HTMLPurifier/HTMLModule/Tidy/Proprietary.php @@ -0,0 +1,17 @@ + 'text-align:left;', + 'right' => 'text-align:right;', + 'top' => 'caption-side:top;', + 'bottom' => 'caption-side:bottom;' // not supported by IE + )); + + // @align for img ------------------------------------------------- + $r['img@align'] = + new HTMLPurifier_AttrTransform_EnumToCSS('align', array( + 'left' => 'float:left;', + 'right' => 'float:right;', + 'top' => 'vertical-align:top;', + 'middle' => 'vertical-align:middle;', + 'bottom' => 'vertical-align:baseline;', + )); + + // @align for table ----------------------------------------------- + $r['table@align'] = + new HTMLPurifier_AttrTransform_EnumToCSS('align', array( + 'left' => 'float:left;', + 'center' => 'margin-left:auto;margin-right:auto;', + 'right' => 'float:right;' + )); + + // @align for hr ----------------------------------------------- + $r['hr@align'] = + new HTMLPurifier_AttrTransform_EnumToCSS('align', array( + // we use both text-align and margin because these work + // for different browsers (IE and Firefox, respectively) + // and the melange makes for a pretty cross-compatible + // solution + 'left' => 'margin-left:0;margin-right:auto;text-align:left;', + 'center' => 'margin-left:auto;margin-right:auto;text-align:center;', + 'right' => 'margin-left:auto;margin-right:0;text-align:right;' + )); + + // @align for h1, h2, h3, h4, h5, h6, p, div ---------------------- + // {{{ + $align_lookup = array(); + $align_values = array('left', 'right', 'center', 'justify'); + foreach ($align_values as $v) $align_lookup[$v] = "text-align:$v;"; + // }}} + $r['h1@align'] = + $r['h2@align'] = + $r['h3@align'] = + $r['h4@align'] = + $r['h5@align'] = + $r['h6@align'] = + $r['p@align'] = + $r['div@align'] = + new HTMLPurifier_AttrTransform_EnumToCSS('align', $align_lookup); + + // @bgcolor for table, tr, td, th --------------------------------- + $r['table@bgcolor'] = + $r['td@bgcolor'] = + $r['th@bgcolor'] = + new HTMLPurifier_AttrTransform_BgColor(); + + // @border for img ------------------------------------------------ + $r['img@border'] = new HTMLPurifier_AttrTransform_Border(); + + // @clear for br -------------------------------------------------- + $r['br@clear'] = + new HTMLPurifier_AttrTransform_EnumToCSS('clear', array( + 'left' => 'clear:left;', + 'right' => 'clear:right;', + 'all' => 'clear:both;', + 'none' => 'clear:none;', + )); + + // @height for td, th --------------------------------------------- + $r['td@height'] = + $r['th@height'] = + new HTMLPurifier_AttrTransform_Length('height'); + + // @hspace for img ------------------------------------------------ + $r['img@hspace'] = new HTMLPurifier_AttrTransform_ImgSpace('hspace'); + + // @name for img, a ----------------------------------------------- + $r['img@name'] = + $r['a@name'] = new HTMLPurifier_AttrTransform_Name(); + + // @noshade for hr ------------------------------------------------ + // this transformation is not precise but often good enough. + // different browsers use different styles to designate noshade + $r['hr@noshade'] = + new HTMLPurifier_AttrTransform_BoolToCSS( + 'noshade', + 'color:#808080;background-color:#808080;border:0;' + ); + + // @nowrap for td, th --------------------------------------------- + $r['td@nowrap'] = + $r['th@nowrap'] = + new HTMLPurifier_AttrTransform_BoolToCSS( + 'nowrap', + 'white-space:nowrap;' + ); + + // @size for hr -------------------------------------------------- + $r['hr@size'] = new HTMLPurifier_AttrTransform_Length('size', 'height'); + + // @type for li, ol, ul ------------------------------------------- + // {{{ + $ul_types = array( + 'disc' => 'list-style-type:disc;', + 'square' => 'list-style-type:square;', + 'circle' => 'list-style-type:circle;' + ); + $ol_types = array( + '1' => 'list-style-type:decimal;', + 'i' => 'list-style-type:lower-roman;', + 'I' => 'list-style-type:upper-roman;', + 'a' => 'list-style-type:lower-alpha;', + 'A' => 'list-style-type:upper-alpha;' + ); + $li_types = $ul_types + $ol_types; + // }}} + + $r['ul@type'] = new HTMLPurifier_AttrTransform_EnumToCSS('type', $ul_types); + $r['ol@type'] = new HTMLPurifier_AttrTransform_EnumToCSS('type', $ol_types, true); + $r['li@type'] = new HTMLPurifier_AttrTransform_EnumToCSS('type', $li_types, true); + + // @vspace for img ------------------------------------------------ + $r['img@vspace'] = new HTMLPurifier_AttrTransform_ImgSpace('vspace'); + + // @width for hr, td, th ------------------------------------------ + $r['td@width'] = + $r['th@width'] = + $r['hr@width'] = new HTMLPurifier_AttrTransform_Length('width'); + + return $r; + + } + +} + +class HTMLPurifier_HTMLModule_Tidy_Transitional extends + HTMLPurifier_HTMLModule_Tidy_XHTMLAndHTML4 +{ + var $name = 'Tidy_Transitional'; + var $defaultLevel = 'heavy'; +} + +class HTMLPurifier_HTMLModule_Tidy_Strict extends + HTMLPurifier_HTMLModule_Tidy_XHTMLAndHTML4 +{ + var $name = 'Tidy_Strict'; + var $defaultLevel = 'light'; +} + diff --git a/lib/htmlpurifier/HTMLPurifier/HTMLModule/Tidy/XHTMLStrict.php b/lib/htmlpurifier/HTMLPurifier/HTMLModule/Tidy/XHTMLStrict.php new file mode 100644 index 0000000000..b701491ecd --- /dev/null +++ b/lib/htmlpurifier/HTMLPurifier/HTMLModule/Tidy/XHTMLStrict.php @@ -0,0 +1,26 @@ +content_model_type != 'strictblockquote') return false; + return new HTMLPurifier_ChildDef_StrictBlockquote($def->content_model); + } + +} + diff --git a/lib/htmlpurifier/HTMLPurifier/HTMLModule/TransformToStrict.php b/lib/htmlpurifier/HTMLPurifier/HTMLModule/TransformToStrict.php deleted file mode 100644 index 0b6c8370ab..0000000000 --- a/lib/htmlpurifier/HTMLPurifier/HTMLModule/TransformToStrict.php +++ /dev/null @@ -1,200 +0,0 @@ - false, - 'menu' => false, - 'dir' => false, - 'center'=> false - ); - - var $attr_collections = array( - 'Lang' => array( - 'lang' => false // placeholder - ) - ); - - var $info_attr_transform_post = array( - 'lang' => false // placeholder - ); - - function HTMLPurifier_HTMLModule_TransformToStrict() { - - // behavior with transformations when there's another CSS property - // working on it is interesting: the CSS will *always* override - // the deprecated attribute, whereas an inline CSS declaration will - // override the corresponding declaration in, say, an external - // stylesheet. This behavior won't affect most people, but it - // does represent an operational difference we CANNOT fix. - - // deprecated tag transforms - $this->info_tag_transform['font'] = new HTMLPurifier_TagTransform_Font(); - $this->info_tag_transform['menu'] = new HTMLPurifier_TagTransform_Simple('ul'); - $this->info_tag_transform['dir'] = new HTMLPurifier_TagTransform_Simple('ul'); - $this->info_tag_transform['center'] = new HTMLPurifier_TagTransform_Center(); - - foreach ($this->elements as $name) { - $this->info[$name] = new HTMLPurifier_ElementDef(); - $this->info[$name]->standalone = false; - } - - // deprecated attribute transforms - - // align battery - $align_lookup = array(); - $align_values = array('left', 'right', 'center', 'justify'); - foreach ($align_values as $v) $align_lookup[$v] = "text-align:$v;"; - $this->info['h1']->attr_transform_pre['align'] = - $this->info['h2']->attr_transform_pre['align'] = - $this->info['h3']->attr_transform_pre['align'] = - $this->info['h4']->attr_transform_pre['align'] = - $this->info['h5']->attr_transform_pre['align'] = - $this->info['h6']->attr_transform_pre['align'] = - $this->info['p'] ->attr_transform_pre['align'] = - new HTMLPurifier_AttrTransform_EnumToCSS('align', $align_lookup); - - // xml:lang <=> lang mirroring, implement in TransformToStrict, - // this is overridden in TransformToXHTML11 - $this->info_attr_transform_post['lang'] = new HTMLPurifier_AttrTransform_Lang(); - $this->attr_collections['Lang']['lang'] = new HTMLPurifier_AttrDef_Lang(); - - // this should not be applied to XHTML 1.0 Transitional, ONLY - // XHTML 1.0 Strict. We may need three classes - $this->info['blockquote']->content_model_type = 'strictblockquote'; - $this->info['blockquote']->child = false; // recalculate please! - - $this->info['table']->attr_transform_pre['bgcolor'] = - $this->info['tr']->attr_transform_pre['bgcolor'] = - $this->info['td']->attr_transform_pre['bgcolor'] = - $this->info['th']->attr_transform_pre['bgcolor'] = new HTMLPurifier_AttrTransform_BgColor(); - - $this->info['img']->attr_transform_pre['border'] = new HTMLPurifier_AttrTransform_Border(); - - $this->info['img']->attr_transform_pre['name'] = - $this->info['a']->attr_transform_pre['name'] = new HTMLPurifier_AttrTransform_Name(); - - $this->info['td']->attr_transform_pre['width'] = - $this->info['th']->attr_transform_pre['width'] = - $this->info['hr']->attr_transform_pre['width'] = new HTMLPurifier_AttrTransform_Length('width'); - - $this->info['td']->attr_transform_pre['nowrap'] = - $this->info['th']->attr_transform_pre['nowrap'] = new HTMLPurifier_AttrTransform_BoolToCSS('nowrap', 'white-space:nowrap;'); - - $this->info['td']->attr_transform_pre['height'] = - $this->info['th']->attr_transform_pre['height'] = new HTMLPurifier_AttrTransform_Length('height'); - - $this->info['img']->attr_transform_pre['hspace'] = new HTMLPurifier_AttrTransform_ImgSpace('hspace'); - $this->info['img']->attr_transform_pre['vspace'] = new HTMLPurifier_AttrTransform_ImgSpace('vspace'); - - $this->info['hr']->attr_transform_pre['size'] = new HTMLPurifier_AttrTransform_Length('size', 'height'); - - // this transformation is not precise but often good enough. - // different browsers use different styles to designate noshade - $this->info['hr']->attr_transform_pre['noshade'] = new HTMLPurifier_AttrTransform_BoolToCSS('noshade', 'color:#808080;background-color:#808080;border: 0;'); - - $this->info['br']->attr_transform_pre['clear'] = - new HTMLPurifier_AttrTransform_EnumToCSS('clear', array( - 'left' => 'clear:left;', - 'right' => 'clear:right;', - 'all' => 'clear:both;', - 'none' => 'clear:none;', - )); - - // this is a slightly unreasonable attribute - $this->info['caption']->attr_transform_pre['align'] = - new HTMLPurifier_AttrTransform_EnumToCSS('align', array( - // we're following IE's behavior, not Firefox's, due - // to the fact that no one supports caption-side:right, - // W3C included (with CSS 2.1) - 'left' => 'text-align:left;', - 'right' => 'text-align:right;', - 'top' => 'caption-side:top;', - 'bottom' => 'caption-side:bottom;' // not supported by IE - )); - - $this->info['table']->attr_transform_pre['align'] = - new HTMLPurifier_AttrTransform_EnumToCSS('align', array( - 'left' => 'float:left;', - 'center' => 'margin-left:auto;margin-right:auto;', - 'right' => 'float:right;' - )); - - $this->info['img']->attr_transform_pre['align'] = - new HTMLPurifier_AttrTransform_EnumToCSS('align', array( - 'left' => 'float:left;', - 'right' => 'float:right;', - 'top' => 'vertical-align:top;', - 'middle' => 'vertical-align:middle;', - 'bottom' => 'vertical-align:baseline;', - )); - - $this->info['hr']->attr_transform_pre['align'] = - new HTMLPurifier_AttrTransform_EnumToCSS('align', array( - 'left' => 'margin-left:0;margin-right:auto;text-align:left;', - 'center' => 'margin-left:auto;margin-right:auto;text-align:center;', - 'right' => 'margin-left:auto;margin-right:0;text-align:right;' - )); - - $ul_types = array( - 'disc' => 'list-style-type:disc;', - 'square' => 'list-style-type:square;', - 'circle' => 'list-style-type:circle;' - ); - $ol_types = array( - '1' => 'list-style-type:decimal;', - 'i' => 'list-style-type:lower-roman;', - 'I' => 'list-style-type:upper-roman;', - 'a' => 'list-style-type:lower-alpha;', - 'A' => 'list-style-type:upper-alpha;' - ); - $li_types = $ul_types + $ol_types; - - $this->info['ul']->attr_transform_pre['type'] = - new HTMLPurifier_AttrTransform_EnumToCSS('type', $ul_types); - $this->info['ol']->attr_transform_pre['type'] = - new HTMLPurifier_AttrTransform_EnumToCSS('type', $ol_types, true); - $this->info['li']->attr_transform_pre['type'] = - new HTMLPurifier_AttrTransform_EnumToCSS('type', $li_types, true); - - - } - - var $defines_child_def = true; - function getChildDef($def) { - if ($def->content_model_type != 'strictblockquote') return false; - return new HTMLPurifier_ChildDef_StrictBlockquote($def->content_model); - } - -} - -?> \ No newline at end of file diff --git a/lib/htmlpurifier/HTMLPurifier/HTMLModule/TransformToXHTML11.php b/lib/htmlpurifier/HTMLPurifier/HTMLModule/TransformToXHTML11.php deleted file mode 100644 index 68aac61312..0000000000 --- a/lib/htmlpurifier/HTMLPurifier/HTMLModule/TransformToXHTML11.php +++ /dev/null @@ -1,36 +0,0 @@ - array( - 'lang' => false // remove it - ) - ); - - var $info_attr_transform_post = array( - 'lang' => false // remove it - ); - - function HTMLPurifier_HTMLModule_TransformToXHTML11() { - $this->info_attr_transform_pre['lang'] = new HTMLPurifier_AttrTransform_Lang(); - } - -} - -?> \ No newline at end of file diff --git a/lib/htmlpurifier/HTMLPurifier/HTMLModule/XMLCommonAttributes.php b/lib/htmlpurifier/HTMLPurifier/HTMLModule/XMLCommonAttributes.php new file mode 100644 index 0000000000..67f7fc8ae1 --- /dev/null +++ b/lib/htmlpurifier/HTMLPurifier/HTMLModule/XMLCommonAttributes.php @@ -0,0 +1,15 @@ + array( + 'xml:lang' => 'LanguageCode', + ) + ); +} + diff --git a/lib/htmlpurifier/HTMLPurifier/HTMLModuleManager.php b/lib/htmlpurifier/HTMLPurifier/HTMLModuleManager.php index 81ef13a5f4..d4f10d0c7c 100644 --- a/lib/htmlpurifier/HTMLPurifier/HTMLModuleManager.php +++ b/lib/htmlpurifier/HTMLPurifier/HTMLModuleManager.php @@ -2,6 +2,8 @@ require_once 'HTMLPurifier/HTMLModule.php'; require_once 'HTMLPurifier/ElementDef.php'; +require_once 'HTMLPurifier/Doctype.php'; +require_once 'HTMLPurifier/DoctypeRegistry.php'; require_once 'HTMLPurifier/ContentSets.php'; require_once 'HTMLPurifier/AttrTypes.php'; @@ -23,188 +25,209 @@ require_once 'HTMLPurifier/HTMLModule/Image.php'; require_once 'HTMLPurifier/HTMLModule/StyleAttribute.php'; require_once 'HTMLPurifier/HTMLModule/Legacy.php'; require_once 'HTMLPurifier/HTMLModule/Target.php'; +require_once 'HTMLPurifier/HTMLModule/Scripting.php'; +require_once 'HTMLPurifier/HTMLModule/XMLCommonAttributes.php'; +require_once 'HTMLPurifier/HTMLModule/NonXMLCommonAttributes.php'; +require_once 'HTMLPurifier/HTMLModule/Ruby.php'; -// proprietary modules -require_once 'HTMLPurifier/HTMLModule/TransformToStrict.php'; -require_once 'HTMLPurifier/HTMLModule/TransformToXHTML11.php'; +// tidy modules +require_once 'HTMLPurifier/HTMLModule/Tidy.php'; +require_once 'HTMLPurifier/HTMLModule/Tidy/XHTMLAndHTML4.php'; +require_once 'HTMLPurifier/HTMLModule/Tidy/XHTML.php'; +require_once 'HTMLPurifier/HTMLModule/Tidy/XHTMLStrict.php'; +require_once 'HTMLPurifier/HTMLModule/Tidy/Proprietary.php'; HTMLPurifier_ConfigSchema::define( - 'HTML', 'Doctype', null, 'string/null', - 'Doctype to use, valid values are HTML 4.01 Transitional, HTML 4.01 '. - 'Strict, XHTML 1.0 Transitional, XHTML 1.0 Strict, XHTML 1.1. '. + 'HTML', 'Doctype', '', 'string', + 'Doctype to use during filtering. '. 'Technically speaking this is not actually a doctype (as it does '. 'not identify a corresponding DTD), but we are using this name '. - 'for sake of simplicity. This will override any older directives '. - 'like %Core.XHTML or %HTML.Strict.' + 'for sake of simplicity. When non-blank, this will override any older directives '. + 'like %HTML.XHTML or %HTML.Strict.' ); +HTMLPurifier_ConfigSchema::defineAllowedValues('HTML', 'Doctype', array( + '', 'HTML 4.01 Transitional', 'HTML 4.01 Strict', + 'XHTML 1.0 Transitional', 'XHTML 1.0 Strict', + 'XHTML 1.1' +)); + +HTMLPurifier_ConfigSchema::define( + 'HTML', 'CustomDoctype', null, 'string/null', +' +A custom doctype for power-users who defined there own document +type. This directive only applies when %HTML.Doctype is blank. +This directive has been available since 2.0.1. +' +); + +HTMLPurifier_ConfigSchema::define( + 'HTML', 'Trusted', false, 'bool', + 'Indicates whether or not the user input is trusted or not. If the '. + 'input is trusted, a more expansive set of allowed tags and attributes '. + 'will be used. This directive has been available since 2.0.0.' +); + +HTMLPurifier_ConfigSchema::define( + 'HTML', 'AllowedModules', null, 'lookup/null', ' +

+ A doctype comes with a set of usual modules to use. Without having + to mucking about with the doctypes, you can quickly activate or + disable these modules by specifying which modules you wish to allow + with this directive. This is most useful for unit testing specific + modules, although end users may find it useful for their own ends. +

+

+ If you specify a module that does not exist, the manager will silently + fail to use it, so be careful! User-defined modules are not affected + by this directive. Modules defined in %HTML.CoreModules are not + affected by this directive. This directive has been available since 2.0.0. +

+'); + +HTMLPurifier_ConfigSchema::define( + 'HTML', 'CoreModules', array( + 'Structure' => true, + 'Text' => true, + 'Hypertext' => true, + 'List' => true, + 'NonXMLCommonAttributes' => true, + 'XMLCommonAttributes' => true, + 'CommonAttributes' => true + ), 'lookup', ' +

+ Certain modularized doctypes (XHTML, namely), have certain modules + that must be included for the doctype to be an conforming document + type: put those modules here. By default, XHTML\'s core modules + are used. You can set this to a blank array to disable core module + protection, but this is not recommended. This directive has been + available since 2.0.0. +

+'); class HTMLPurifier_HTMLModuleManager { /** - * Array of HTMLPurifier_Module instances, indexed by module's class name. - * All known modules, regardless of use, are in this array. + * Instance of HTMLPurifier_DoctypeRegistry + * @public */ - var $modules = array(); + var $doctypes; /** - * String doctype we will validate against. See $validModules for use. - * - * @note - * There is a special doctype '*' that acts both as the "default" - * doctype if a customized system only defines one doctype and - * also a catch-all doctype that gets merged into all the other - * module collections. When possible, use a private collection to - * share modules between doctypes: this special doctype is to - * make life more convenient for users. + * Instance of current doctype + * @public */ var $doctype; - var $doctypeAliases = array(); /**< Lookup array of strings to real doctypes */ /** - * Associative array: $collections[$type][$doctype] = list of modules. - * This is used to logically separate types of functionality so that - * based on the doctype and other configuration settings they may - * be easily switched and on and off. Custom setups may not need - * to use this abstraction, opting to have only one big collection - * with one valid doctype. + * Instance of HTMLPurifier_AttrTypes + * @public */ - var $collections = array(); + var $attrTypes; /** - * Modules that may be used in a valid doctype of this kind. - * Correctional and leniency modules should not be placed in this - * array unless the user said so: don't stuff every possible lenient - * module for this doctype in here. + * Active instances of modules for the specified doctype are + * indexed, by name, in this array. */ - var $validModules = array(); - var $validCollections = array(); /**< Collections to merge into $validModules */ + var $modules = array(); /** - * Modules that we will allow in input, subset of $validModules. Single - * element definitions may result in us consulting validModules. + * Array of recognized HTMLPurifier_Module instances, indexed by + * module's class name. This array is usually lazy loaded, but a + * user can overload a module by pre-emptively registering it. */ - var $activeModules = array(); - var $activeCollections = array(); /**< Collections to merge into $activeModules */ - - var $counter = 0; /**< Designates next available integer order for modules. */ - var $initialized = false; /**< Says whether initialize() was called */ + var $registeredModules = array(); /** - * Specifies what doctype to siphon new modules from addModule() to, - * or false to disable the functionality. Must be used in conjunction - * with $autoCollection. + * List of extra modules that were added by the user using addModule(). + * These get unconditionally merged into the current doctype, whatever + * it may be. */ - var $autoDoctype = false; + var $userModules = array(); + /** - * Specifies what collection to siphon new modules from addModule() to, - * or false to disable the functionality. Must be used in conjunction - * with $autoCollection. + * Associative array of element name to list of modules that have + * definitions for the element; this array is dynamically filled. */ - var $autoCollection = false; - - /** Associative array of element name to defining modules (always array) */ var $elementLookup = array(); - /** List of prefixes we should use for resolving small names */ + /** List of prefixes we should use for registering small names */ var $prefixes = array('HTMLPurifier_HTMLModule_'); - var $contentSets; /**< Instance of HTMLPurifier_ContentSets */ - var $attrTypes; /**< Instance of HTMLPurifier_AttrTypes */ + var $contentSets; /**< Instance of HTMLPurifier_ContentSets */ var $attrCollections; /**< Instance of HTMLPurifier_AttrCollections */ - /** - * @param $blank If true, don't do any initializing - */ - function HTMLPurifier_HTMLModuleManager($blank = false) { + /** If set to true, unsafe elements and attributes will be allowed */ + var $trusted = false; + + function HTMLPurifier_HTMLModuleManager() { - // the only editable internal object. The rest need to - // be manipulated through modules + // editable internal objects $this->attrTypes = new HTMLPurifier_AttrTypes(); + $this->doctypes = new HTMLPurifier_DoctypeRegistry(); - if (!$blank) $this->initialize(); + // setup default HTML doctypes - } - - function initialize() { - $this->initialized = true; - - // load default modules to the recognized modules list (not active) - $modules = array( - // define - 'CommonAttributes', - 'Text', 'Hypertext', 'List', 'Presentation', - 'Edit', 'Bdo', 'Tables', 'Image', 'StyleAttribute', - 'Target', - // define-redefine - 'Legacy', - // redefine - 'TransformToStrict', 'TransformToXHTML11' + // module reuse + $common = array( + 'CommonAttributes', 'Text', 'Hypertext', 'List', + 'Presentation', 'Edit', 'Bdo', 'Tables', 'Image', + 'StyleAttribute', 'Scripting' ); - foreach ($modules as $module) { - $this->addModule($module); - } - - // Safe modules for supported doctypes. These are included - // in the valid and active module lists by default - $this->collections['Safe'] = array( - '_Common' => array( // leading _ indicates private - 'CommonAttributes', 'Text', 'Hypertext', 'List', - 'Presentation', 'Edit', 'Bdo', 'Tables', 'Image', - 'StyleAttribute' - ), - // HTML definitions, defer to XHTML definitions - 'HTML 4.01 Transitional' => array(array('XHTML 1.0 Transitional')), - 'HTML 4.01 Strict' => array(array('XHTML 1.0 Strict')), - // XHTML definitions - 'XHTML 1.0 Transitional' => array( array('XHTML 1.0 Strict'), 'Legacy', 'Target' ), - 'XHTML 1.0 Strict' => array(array('_Common')), - 'XHTML 1.1' => array(array('_Common')), + $transitional = array('Legacy', 'Target'); + $xml = array('XMLCommonAttributes'); + $non_xml = array('NonXMLCommonAttributes'); + + $this->doctypes->register( + 'HTML 4.01 Transitional', false, + array_merge($common, $transitional, $non_xml), + array('Tidy_Transitional', 'Tidy_Proprietary'), + array(), + '-//W3C//DTD HTML 4.01 Transitional//EN', + 'http://www.w3.org/TR/html4/loose.dtd' ); - // Modules that specify elements that are unsafe from untrusted - // third-parties. These should be registered in $validModules but - // almost never $activeModules unless you really know what you're - // doing. - $this->collections['Unsafe'] = array(); - - // Modules to import if lenient mode (attempt to convert everything - // to a valid representation) is on. These must not be in $validModules - // unless specified so. - $this->collections['Lenient'] = array( - 'HTML 4.01 Strict' => array(array('XHTML 1.0 Strict')), - 'XHTML 1.0 Strict' => array('TransformToStrict'), - 'XHTML 1.1' => array(array('XHTML 1.0 Strict'), 'TransformToXHTML11') + $this->doctypes->register( + 'HTML 4.01 Strict', false, + array_merge($common, $non_xml), + array('Tidy_Strict', 'Tidy_Proprietary'), + array(), + '-//W3C//DTD HTML 4.01//EN', + 'http://www.w3.org/TR/html4/strict.dtd' ); - // Modules to import if correctional mode (correct everything that - // is feasible to strict mode) is on. These must not be in $validModules - // unless specified so. - $this->collections['Correctional'] = array( - 'HTML 4.01 Transitional' => array(array('XHTML 1.0 Transitional')), - 'XHTML 1.0 Transitional' => array('TransformToStrict'), // probably want a different one + $this->doctypes->register( + 'XHTML 1.0 Transitional', true, + array_merge($common, $transitional, $xml, $non_xml), + array('Tidy_Transitional', 'Tidy_XHTML', 'Tidy_Proprietary'), + array(), + '-//W3C//DTD XHTML 1.0 Transitional//EN', + 'http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd' ); - // User-space modules, custom code or whatever - $this->collections['Extension'] = array(); - - // setup active versus valid modules. ORDER IS IMPORTANT! - // definition modules - $this->makeCollectionActive('Safe'); - $this->makeCollectionValid('Unsafe'); - // redefinition modules - $this->makeCollectionActive('Lenient'); - $this->makeCollectionActive('Correctional'); + $this->doctypes->register( + 'XHTML 1.0 Strict', true, + array_merge($common, $xml, $non_xml), + array('Tidy_Strict', 'Tidy_XHTML', 'Tidy_XHTMLStrict', 'Tidy_Proprietary'), + array(), + '-//W3C//DTD XHTML 1.0 Strict//EN', + 'http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd' + ); - $this->autoDoctype = '*'; - $this->autoCollection = 'Extension'; + $this->doctypes->register( + 'XHTML 1.1', true, + array_merge($common, $xml, array('Ruby')), + array('Tidy_Strict', 'Tidy_XHTML', 'Tidy_Proprietary', 'Tidy_XHTMLStrict'), // Tidy_XHTML1_1 + array(), + '-//W3C//DTD XHTML 1.1//EN', + 'http://www.w3.org/TR/xhtml11/DTD/xhtml11.dtd' + ); } /** - * Adds a module to the recognized module list. This does not - * do anything else: the module must be added to a corresponding - * collection to be "activated". + * Registers a module to the recognized module list, useful for + * overloading pre-existing modules. * @param $module Mixed: string module name, with or without * HTMLPurifier_HTMLModule prefix, or instance of * subclass of HTMLPurifier_HTMLModule. @@ -217,10 +240,15 @@ class HTMLPurifier_HTMLModuleManager * - Check for literal object name * - Throw fatal error * If your object name collides with an internal class, specify - * your module manually. + * your module manually. All modules must have been included + * externally: registerModule will not perform inclusions for you! + * @warning If your module has the same name as an already loaded + * module, your module will overload the old one WITHOUT + * warning. */ - function addModule($module) { + function registerModule($module) { if (is_string($module)) { + // attempt to load the module $original_module = $module; $ok = false; foreach ($this->prefixes as $prefix) { @@ -240,16 +268,19 @@ class HTMLPurifier_HTMLModuleManager } $module = new $module(); } - $module->order = $this->counter++; // assign then increment - $this->modules[$module->name] = $module; - if ($this->autoDoctype !== false && $this->autoCollection !== false) { - $this->collections[$this->autoCollection][$this->autoDoctype][] = $module->name; + if (empty($module->name)) { + trigger_error('Module instance of ' . get_class($module) . ' must have name'); + return; } + $this->registeredModules[$module->name] = $module; } /** * Safely tests for class existence without invoking __autoload in PHP5 + * or greater. * @param $name String class name to test + * @note If any other class needs it, we'll need to stash in a + * conjectured "compatibility" class * @private */ function _classExists($name) { @@ -265,55 +296,63 @@ class HTMLPurifier_HTMLModuleManager } /** - * Makes a collection active, while also making it valid if not - * already done so. See $activeModules for the semantics of "active". - * @param $collection_name Name of collection to activate - */ - function makeCollectionActive($collection_name) { - if (!in_array($collection_name, $this->validCollections)) { - $this->makeCollectionValid($collection_name); - } - $this->activeCollections[] = $collection_name; - } - - /** - * Makes a collection valid. See $validModules for the semantics of "valid" + * Adds a module to the current doctype by first registering it, + * and then tacking it on to the active doctype */ - function makeCollectionValid($collection_name) { - $this->validCollections[] = $collection_name; + function addModule($module) { + $this->registerModule($module); + if (is_object($module)) $module = $module->name; + $this->userModules[] = $module; } /** - * Adds a class prefix that addModule() will use to resolve a + * Adds a class prefix that registerModule() will use to resolve a * string name to a concrete class */ function addPrefix($prefix) { - $this->prefixes[] = (string) $prefix; + $this->prefixes[] = $prefix; } + /** + * Performs processing on modules, after being called you may + * use getElement() and getElements() + * @param $config Instance of HTMLPurifier_Config + */ function setup($config) { - // load up the autocollection - if ($this->autoCollection !== false) { - $this->makeCollectionActive($this->autoCollection); - } + $this->trusted = $config->get('HTML', 'Trusted'); + + // generate + $this->doctype = $this->doctypes->make($config); + $modules = $this->doctype->modules; - // retrieve the doctype - $this->doctype = $this->getDoctype($config); - if (isset($this->doctypeAliases[$this->doctype])) { - $this->doctype = $this->doctypeAliases[$this->doctype]; + // take out the default modules that aren't allowed + $lookup = $config->get('HTML', 'AllowedModules'); + $special_cases = $config->get('HTML', 'CoreModules'); + + if (is_array($lookup)) { + foreach ($modules as $k => $m) { + if (isset($special_cases[$m])) continue; + if (!isset($lookup[$m])) unset($modules[$k]); + } } - // process module collections to module name => module instance form - foreach ($this->collections as $col_i => $x) { - $this->processCollections($this->collections[$col_i]); + // merge in custom modules + $modules = array_merge($modules, $this->userModules); + + foreach ($modules as $module) { + $this->processModule($module); } - $this->validModules = $this->assembleModules($this->validCollections); - $this->activeModules = $this->assembleModules($this->activeCollections); + foreach ($this->doctype->tidyModules as $module) { + $this->processModule($module); + if (method_exists($this->modules[$module], 'construct')) { + $this->modules[$module]->construct($config); + } + } // setup lookup table based on all valid modules - foreach ($this->validModules as $module) { + foreach ($this->modules as $module) { foreach ($module->info as $name => $def) { if (!isset($this->elementLookup[$name])) { $this->elementLookup[$name] = array(); @@ -324,214 +363,51 @@ class HTMLPurifier_HTMLModuleManager // note the different choice $this->contentSets = new HTMLPurifier_ContentSets( - // content models that contain non-allowed elements are - // harmless because RemoveForeignElements will ensure - // they never get in anyway, and there is usually no - // reason why you should want to restrict a content - // model beyond what is mandated by the doctype. - // Note, however, that this means redefinitions of - // content models can't be tossed in validModels willy-nilly: - // that stuff still is regulated by configuration. - $this->validModules + // content set assembly deals with all possible modules, + // not just ones deemed to be "safe" + $this->modules ); $this->attrCollections = new HTMLPurifier_AttrCollections( $this->attrTypes, - // only explicitly allowed modules are allowed to affect - // the global attribute collections. This mean's there's - // a distinction between loading the Bdo module, and the - // bdo element: Bdo will enable the dir attribute on all - // elements, while bdo will only define the bdo element, - // which will not have an editable directionality. This might - // catch people who are loading only elements by surprise, so - // we should consider loading an entire module if all the - // elements it defines are requested by the user, especially - // if it affects the global attribute collections. - $this->activeModules + // there is no way to directly disable a global attribute, + // but using AllowedAttributes or simply not including + // the module in your custom doctype should be sufficient + $this->modules ); - } /** - * Takes a list of collections and merges together all the defined - * modules for the current doctype from those collections. - * @param $collections List of collection suffixes we should grab - * modules from (like 'Safe' or 'Lenient') + * Takes a module and adds it to the active module collection, + * registering it if necessary. */ - function assembleModules($collections) { - $modules = array(); - $numOfCollectionsUsed = 0; - foreach ($collections as $name) { - $disable_global = false; - if (!isset($this->collections[$name])) { - trigger_error("$name collection is undefined", E_USER_ERROR); - continue; - } - $cols = $this->collections[$name]; - if (isset($cols[$this->doctype])) { - if (isset($cols[$this->doctype]['*'])) { - unset($cols[$this->doctype]['*']); - $disable_global = true; - } - $modules += $cols[$this->doctype]; - $numOfCollectionsUsed++; - } - // accept catch-all doctype - if ( - $this->doctype !== '*' && - isset($cols['*']) && - !$disable_global - ) { - $modules += $cols['*']; - } + function processModule($module) { + if (!isset($this->registeredModules[$module]) || is_object($module)) { + $this->registerModule($module); } - - if ($numOfCollectionsUsed < 1) { - // possible XSS injection if user-specified doctypes - // are allowed - trigger_error("Doctype {$this->doctype} does not exist, ". - "check for typos (if you desire a doctype that allows ". - "no elements, use an empty array collection)", E_USER_ERROR); - } - return $modules; + $this->modules[$module] = $this->registeredModules[$module]; } /** - * Takes a collection and performs inclusions and substitutions for it. - * @param $cols Reference to collections class member variable + * Retrieves merged element definitions. + * @return Array of HTMLPurifier_ElementDef */ - function processCollections(&$cols) { - - // $cols is the set of collections - // $col_i is the name (index) of a collection - // $col is a collection/list of modules - - // perform inclusions - foreach ($cols as $col_i => $col) { - $seen = array(); - if (!empty($col[0]) && is_array($col[0])) { - $seen[$col_i] = true; // recursion reporting - $includes = $col[0]; - unset($cols[$col_i][0]); // remove inclusions value, recursion guard - } else { - $includes = array(); - } - if (empty($includes)) continue; - for ($i = 0; isset($includes[$i]); $i++) { - $inc = $includes[$i]; - if (isset($seen[$inc])) { - trigger_error( - "Circular inclusion detected in $col_i collection", - E_USER_ERROR - ); - continue; - } else { - $seen[$inc] = true; - } - if (!isset($cols[$inc])) { - trigger_error( - "Collection $col_i tried to include undefined ". - "collection $inc", E_USER_ERROR); - continue; - } - foreach ($cols[$inc] as $module) { - if (is_array($module)) { // another inclusion! - foreach ($module as $inc2) $includes[] = $inc2; - continue; - } - $cols[$col_i][] = $module; // merge in the other modules - } - } - } - - // replace with real modules, invert module from list to - // assoc array of module name to module instance - foreach ($cols as $col_i => $col) { - $ignore_global = false; - $order = array(); - foreach ($col as $module_i => $module) { - unset($cols[$col_i][$module_i]); - if (is_array($module)) { - trigger_error("Illegal inclusion array at index". - " $module_i found collection $col_i, inclusion". - " arrays must be at start of collection (index 0)", - E_USER_ERROR); - continue; - } - if ($module_i === '*' && $module === false) { - $ignore_global = true; - continue; - } - if (!isset($this->modules[$module])) { - trigger_error( - "Collection $col_i references undefined ". - "module $module", - E_USER_ERROR - ); - continue; - } - $module = $this->modules[$module]; - $cols[$col_i][$module->name] = $module; - $order[$module->name] = $module->order; - } - array_multisort( - $order, SORT_ASC, SORT_NUMERIC, $cols[$col_i] - ); - if ($ignore_global) $cols[$col_i]['*'] = false; - } - - // delete pseudo-collections - foreach ($cols as $col_i => $col) { - if ($col_i[0] == '_') unset($cols[$col_i]); - } - - } - - /** - * Retrieves the doctype from the configuration object - */ - function getDoctype($config) { - $doctype = $config->get('HTML', 'Doctype'); - if ($doctype !== null) { - return $doctype; - } - if (!$this->initialized) { - // don't do HTML-oriented backwards compatibility stuff - // use either the auto-doctype, or the catch-all doctype - return $this->autoDoctype ? $this->autoDoctype : '*'; - } - // this is backwards-compatibility stuff - if ($config->get('Core', 'XHTML')) { - $doctype = 'XHTML 1.0'; - } else { - $doctype = 'HTML 4.01'; - } - if ($config->get('HTML', 'Strict')) { - $doctype .= ' Strict'; - } else { - $doctype .= ' Transitional'; - } - return $doctype; - } - - /** - * Retrieves merged element definitions for all active elements. - * @note We may want to generate an elements array during setup - * and pass that on, because a specific combination of - * elements may trigger the loading of a module. - * @param $config Instance of HTMLPurifier_Config, for determining - * stray elements. - */ - function getElements($config) { + function getElements() { $elements = array(); - foreach ($this->activeModules as $module) { + foreach ($this->modules as $module) { foreach ($module->info as $name => $v) { if (isset($elements[$name])) continue; - $elements[$name] = $this->getElement($name, $config); + // if element is not safe, don't use it + if (!$this->trusted && ($v->safe === false)) continue; + $elements[$name] = $this->getElement($name); } } - // standalone elements now loaded + // remove dud elements, this happens when an element that + // appeared to be safe actually wasn't + foreach ($elements as $n => $v) { + if ($v === false) unset($elements[$n]); + } return $elements; @@ -540,13 +416,16 @@ class HTMLPurifier_HTMLModuleManager /** * Retrieves a single merged element definition * @param $name Name of element - * @param $config Instance of HTMLPurifier_Config, may not be necessary. + * @param $trusted Boolean trusted overriding parameter: set to true + * if you want the full version of an element + * @return Merged HTMLPurifier_ElementDef */ - function getElement($name, $config) { + function getElement($name, $trusted = null) { $def = false; + if ($trusted === null) $trusted = $this->trusted; - $modules = $this->validModules; + $modules = $this->modules; if (!isset($this->elementLookup[$name])) { return false; @@ -555,9 +434,23 @@ class HTMLPurifier_HTMLModuleManager foreach($this->elementLookup[$name] as $module_name) { $module = $modules[$module_name]; - $new_def = $module->info[$name]; + + // copy is used because, ideally speaking, the original + // definition should not be modified. Usually, this will + // make no difference, but for consistency's sake + $new_def = $module->info[$name]->copy(); + + // refuse to create/merge in a definition that is deemed unsafe + if (!$trusted && ($new_def->safe === false)) { + $def = false; + continue; + } if (!$def && $new_def->standalone) { + // element with unknown safety is not to be trusted. + // however, a merge-in definition with undefined safety + // is fine + if (!$trusted && !$new_def->safe) continue; $def = $new_def; } elseif ($def) { $def->mergeIn($new_def); @@ -583,6 +476,13 @@ class HTMLPurifier_HTMLModuleManager $this->contentSets->generateChildDef($def, $module); } + + // add information on required attributes + foreach ($def->attr as $attr_name => $attr_def) { + if ($attr_def->required) { + $def->required_attr[] = $attr_name; + } + } return $def; @@ -590,4 +490,4 @@ class HTMLPurifier_HTMLModuleManager } -?> + diff --git a/lib/htmlpurifier/HTMLPurifier/IDAccumulator.php b/lib/htmlpurifier/HTMLPurifier/IDAccumulator.php index 40ff2384bb..525c9aa080 100644 --- a/lib/htmlpurifier/HTMLPurifier/IDAccumulator.php +++ b/lib/htmlpurifier/HTMLPurifier/IDAccumulator.php @@ -39,4 +39,3 @@ class HTMLPurifier_IDAccumulator } -?> \ No newline at end of file diff --git a/lib/htmlpurifier/HTMLPurifier/Injector.php b/lib/htmlpurifier/HTMLPurifier/Injector.php new file mode 100644 index 0000000000..5901716387 --- /dev/null +++ b/lib/htmlpurifier/HTMLPurifier/Injector.php @@ -0,0 +1,111 @@ +inputTokens that indicates what token + * is currently being processed. + */ + var $inputIndex; + + /** + * Array of elements and attributes this injector creates and therefore + * need to be allowed by the definition. Takes form of + * array('element' => array('attr', 'attr2'), 'element2') + */ + var $needed = array(); + + /** + * Prepares the injector by giving it the config and context objects: + * this allows references to important variables to be made within + * the injector. This function also checks if the HTML environment + * will work with the Injector: if p tags are not allowed, the + * Auto-Paragraphing injector should not be enabled. + * @param $config Instance of HTMLPurifier_Config + * @param $context Instance of HTMLPurifier_Context + * @return Boolean false if success, string of missing needed element/attribute if failure + */ + function prepare($config, &$context) { + $this->htmlDefinition = $config->getHTMLDefinition(); + // perform $needed checks + foreach ($this->needed as $element => $attributes) { + if (is_int($element)) $element = $attributes; + if (!isset($this->htmlDefinition->info[$element])) return $element; + if (!is_array($attributes)) continue; + foreach ($attributes as $name) { + if (!isset($this->htmlDefinition->info[$element]->attr[$name])) return "$element.$name"; + } + } + $this->currentNesting =& $context->get('CurrentNesting'); + $this->inputTokens =& $context->get('InputTokens'); + $this->inputIndex =& $context->get('InputIndex'); + return false; + } + + /** + * Tests if the context node allows a certain element + * @param $name Name of element to test for + * @return True if element is allowed, false if it is not + */ + function allowsElement($name) { + if (!empty($this->currentNesting)) { + $parent_token = array_pop($this->currentNesting); + $this->currentNesting[] = $parent_token; + $parent = $this->htmlDefinition->info[$parent_token->name]; + } else { + $parent = $this->htmlDefinition->info_parent_def; + } + if (!isset($parent->child->elements[$name]) || isset($parent->excludes[$name])) { + return false; + } + return true; + } + + /** + * Handler that is called when a text token is processed + */ + function handleText(&$token) {} + + /** + * Handler that is called when a start or empty token is processed + */ + function handleElement(&$token) {} + +} + diff --git a/lib/htmlpurifier/HTMLPurifier/Injector/AutoParagraph.php b/lib/htmlpurifier/HTMLPurifier/Injector/AutoParagraph.php new file mode 100644 index 0000000000..6e0a6a3ed5 --- /dev/null +++ b/lib/htmlpurifier/HTMLPurifier/Injector/AutoParagraph.php @@ -0,0 +1,267 @@ + + This directive turns on auto-paragraphing, where double newlines are + converted in to paragraphs whenever possible. Auto-paragraphing + applies when: +

+ +

+ p tags must be allowed for this directive to take effect. + We do not use br tags for paragraphing, as that is + semantically incorrect. +

+

+ This directive has been available since 2.0.1. +

+'); + +/** + * Injector that auto paragraphs text in the root node based on + * double-spacing. + */ +class HTMLPurifier_Injector_AutoParagraph extends HTMLPurifier_Injector +{ + + var $name = 'AutoParagraph'; + var $needed = array('p'); + + function _pStart() { + $par = new HTMLPurifier_Token_Start('p'); + $par->armor['MakeWellFormed_TagClosedError'] = true; + return $par; + } + + function handleText(&$token) { + $text = $token->data; + if (empty($this->currentNesting)) { + if (!$this->allowsElement('p')) return; + // case 1: we're in root node (and it allows paragraphs) + $token = array($this->_pStart()); + $this->_splitText($text, $token); + } elseif ($this->currentNesting[count($this->currentNesting)-1]->name == 'p') { + // case 2: we're in a paragraph + $token = array(); + $this->_splitText($text, $token); + } elseif ($this->allowsElement('p')) { + // case 3: we're in an element that allows paragraphs + if (strpos($text, "\n\n") !== false) { + // case 3.1: this text node has a double-newline + $token = array($this->_pStart()); + $this->_splitText($text, $token); + } else { + $ok = false; + // test if up-coming tokens are either block or have + // a double newline in them + for ($i = $this->inputIndex + 1; isset($this->inputTokens[$i]); $i++) { + if ($this->inputTokens[$i]->type == 'start'){ + if (!$this->_isInline($this->inputTokens[$i])) { + $ok = true; + } + break; + } + if ($this->inputTokens[$i]->type == 'end') break; + if ($this->inputTokens[$i]->type == 'text') { + if (strpos($this->inputTokens[$i]->data, "\n\n") !== false) { + $ok = true; + } + if (!$this->inputTokens[$i]->is_whitespace) break; + } + } + if ($ok) { + // case 3.2: this text node is next to another node + // that will start a paragraph + $token = array($this->_pStart(), $token); + } + } + } + + } + + function handleElement(&$token) { + // check if we're inside a tag already + if (!empty($this->currentNesting)) { + if ($this->allowsElement('p')) { + // special case: we're in an element that allows paragraphs + + // this token is already paragraph, abort + if ($token->name == 'p') return; + + // this token is a block level, abort + if (!$this->_isInline($token)) return; + + // check if this token is adjacent to the parent token + $prev = $this->inputTokens[$this->inputIndex - 1]; + if ($prev->type != 'start') { + // not adjacent, we can abort early + // add lead paragraph tag if our token is inline + // and the previous tag was an end paragraph + if ( + $prev->name == 'p' && $prev->type == 'end' && + $this->_isInline($token) + ) { + $token = array($this->_pStart(), $token); + } + return; + } + + // this token is the first child of the element that allows + // paragraph. We have to peek ahead and see whether or not + // there is anything inside that suggests that a paragraph + // will be needed + $ok = false; + // maintain a mini-nesting counter, this lets us bail out + // early if possible + $j = 1; // current nesting, one is due to parent (we recalculate current token) + for ($i = $this->inputIndex; isset($this->inputTokens[$i]); $i++) { + if ($this->inputTokens[$i]->type == 'start') $j++; + if ($this->inputTokens[$i]->type == 'end') $j--; + if ($this->inputTokens[$i]->type == 'text') { + if (strpos($this->inputTokens[$i]->data, "\n\n") !== false) { + $ok = true; + break; + } + } + if ($j <= 0) break; + } + if ($ok) { + $token = array($this->_pStart(), $token); + } + } + return; + } + + // check if the start tag counts as a "block" element + if (!$this->_isInline($token)) return; + + // append a paragraph tag before the token + $token = array($this->_pStart(), $token); + } + + /** + * Splits up a text in paragraph tokens and appends them + * to the result stream that will replace the original + * @param $data String text data that will be processed + * into paragraphs + * @param $result Reference to array of tokens that the + * tags will be appended onto + * @param $config Instance of HTMLPurifier_Config + * @param $context Instance of HTMLPurifier_Context + * @private + */ + function _splitText($data, &$result) { + $raw_paragraphs = explode("\n\n", $data); + + // remove empty paragraphs + $paragraphs = array(); + $needs_start = false; + $needs_end = false; + + $c = count($raw_paragraphs); + if ($c == 1) { + // there were no double-newlines, abort quickly + $result[] = new HTMLPurifier_Token_Text($data); + return; + } + + for ($i = 0; $i < $c; $i++) { + $par = $raw_paragraphs[$i]; + if (trim($par) !== '') { + $paragraphs[] = $par; + continue; + } + if ($i == 0 && empty($result)) { + // The empty result indicates that the AutoParagraph + // injector did not add any start paragraph tokens. + // The fact that the first paragraph is empty indicates + // that there was a double-newline at the start of the + // data. + // Combined together, this means that we are in a paragraph, + // and the newline means we should start a new one. + $result[] = new HTMLPurifier_Token_End('p'); + // However, the start token should only be added if + // there is more processing to be done (i.e. there are + // real paragraphs in here). If there are none, the + // next start paragraph tag will be handled by the + // next run-around the injector + $needs_start = true; + } elseif ($i + 1 == $c) { + // a double-paragraph at the end indicates that + // there is an overriding need to start a new paragraph + // for the next section. This has no effect until + // we've processed all of the other paragraphs though + $needs_end = true; + } + } + + // check if there are no "real" paragraphs to be processed + if (empty($paragraphs)) { + return; + } + + // add a start tag if an end tag was added while processing + // the raw paragraphs (that happens if there's a leading double + // newline) + if ($needs_start) $result[] = $this->_pStart(); + + // append the paragraphs onto the result + foreach ($paragraphs as $par) { + $result[] = new HTMLPurifier_Token_Text($par); + $result[] = new HTMLPurifier_Token_End('p'); + $result[] = $this->_pStart(); + } + + // remove trailing start token, if one is needed, it will + // be handled the next time this injector is called + array_pop($result); + + // check the outside to determine whether or not the + // end paragraph tag should be removed. It should be removed + // unless the next non-whitespace token is a paragraph + // or a block element. + $remove_paragraph_end = true; + + if (!$needs_end) { + // Start of the checks one after the current token's index + for ($i = $this->inputIndex + 1; isset($this->inputTokens[$i]); $i++) { + if ($this->inputTokens[$i]->type == 'start' || $this->inputTokens[$i]->type == 'empty') { + $remove_paragraph_end = $this->_isInline($this->inputTokens[$i]); + } + // check if we can abort early (whitespace means we carry-on!) + if ($this->inputTokens[$i]->type == 'text' && !$this->inputTokens[$i]->is_whitespace) break; + // end tags will automatically be handled by MakeWellFormed, + // so we don't have to worry about them + if ($this->inputTokens[$i]->type == 'end') break; + } + } else { + $remove_paragraph_end = false; + } + + // check the outside to determine whether or not the + // end paragraph tag should be removed + if ($remove_paragraph_end) { + array_pop($result); + } + + } + + /** + * Returns true if passed token is inline (and, ergo, allowed in + * paragraph tags) + * @private + */ + function _isInline($token) { + return isset($this->htmlDefinition->info['p']->child->elements[$token->name]); + } + +} + diff --git a/lib/htmlpurifier/HTMLPurifier/Injector/Linkify.php b/lib/htmlpurifier/HTMLPurifier/Injector/Linkify.php new file mode 100644 index 0000000000..bf7abfa977 --- /dev/null +++ b/lib/htmlpurifier/HTMLPurifier/Injector/Linkify.php @@ -0,0 +1,56 @@ + + This directive turns on linkification, auto-linking http, ftp and + https URLs. a tags with the href attribute + must be allowed. This directive has been available since 2.0.1. +

+'); + +/** + * Injector that converts http, https and ftp text URLs to actual links. + */ +class HTMLPurifier_Injector_Linkify extends HTMLPurifier_Injector +{ + + var $name = 'Linkify'; + var $needed = array('a' => array('href')); + + function handleText(&$token) { + if (!$this->allowsElement('a')) return; + + if (strpos($token->data, '://') === false) { + // our really quick heuristic failed, abort + // this may not work so well if we want to match things like + // "google.com", but then again, most people don't + return; + } + + // there is/are URL(s). Let's split the string: + // Note: this regex is extremely permissive + $bits = preg_split('#((?:https?|ftp)://[^\s\'"<>()]+)#S', $token->data, -1, PREG_SPLIT_DELIM_CAPTURE); + + $token = array(); + + // $i = index + // $c = count + // $l = is link + for ($i = 0, $c = count($bits), $l = false; $i < $c; $i++, $l = !$l) { + if (!$l) { + if ($bits[$i] === '') continue; + $token[] = new HTMLPurifier_Token_Text($bits[$i]); + } else { + $token[] = new HTMLPurifier_Token_Start('a', array('href' => $bits[$i])); + $token[] = new HTMLPurifier_Token_Text($bits[$i]); + $token[] = new HTMLPurifier_Token_End('a'); + } + } + + } + +} + diff --git a/lib/htmlpurifier/HTMLPurifier/Injector/PurifierLinkify.php b/lib/htmlpurifier/HTMLPurifier/Injector/PurifierLinkify.php new file mode 100644 index 0000000000..a7686297c2 --- /dev/null +++ b/lib/htmlpurifier/HTMLPurifier/Injector/PurifierLinkify.php @@ -0,0 +1,65 @@ + + Internal auto-formatter that converts configuration directives in + syntax %Namespace.Directive to links. a tags + with the href attribute must be allowed. + This directive has been available since 2.0.1. +

+'); + +HTMLPurifier_ConfigSchema::define( + 'AutoFormatParam', 'PurifierLinkifyDocURL', '#%s', 'string', ' +

+ Location of configuration documentation to link to, let %s substitute + into the configuration\'s namespace and directive names sans the percent + sign. This directive has been available since 2.0.1. +

+'); + +/** + * Injector that converts configuration directive syntax %Namespace.Directive + * to links + */ +class HTMLPurifier_Injector_PurifierLinkify extends HTMLPurifier_Injector +{ + + var $name = 'PurifierLinkify'; + var $docURL; + var $needed = array('a' => array('href')); + + function prepare($config, &$context) { + $this->docURL = $config->get('AutoFormatParam', 'PurifierLinkifyDocURL'); + return parent::prepare($config, $context); + } + + function handleText(&$token) { + if (!$this->allowsElement('a')) return; + if (strpos($token->data, '%') === false) return; + + $bits = preg_split('#%([a-z0-9]+\.[a-z0-9]+)#Si', $token->data, -1, PREG_SPLIT_DELIM_CAPTURE); + $token = array(); + + // $i = index + // $c = count + // $l = is link + for ($i = 0, $c = count($bits), $l = false; $i < $c; $i++, $l = !$l) { + if (!$l) { + if ($bits[$i] === '') continue; + $token[] = new HTMLPurifier_Token_Text($bits[$i]); + } else { + $token[] = new HTMLPurifier_Token_Start('a', + array('href' => str_replace('%s', $bits[$i], $this->docURL))); + $token[] = new HTMLPurifier_Token_Text('%' . $bits[$i]); + $token[] = new HTMLPurifier_Token_End('a'); + } + } + + } + +} + diff --git a/lib/htmlpurifier/HTMLPurifier/Language.php b/lib/htmlpurifier/HTMLPurifier/Language.php index ca6fe03138..c9a3c20fe2 100644 --- a/lib/htmlpurifier/HTMLPurifier/Language.php +++ b/lib/htmlpurifier/HTMLPurifier/Language.php @@ -20,12 +20,27 @@ class HTMLPurifier_Language */ var $messages = array(); + /** + * Array of localizable error codes + */ + var $errorNames = array(); + /** * Has the language object been loaded yet? * @private */ var $_loaded = false; + /** + * Instances of HTMLPurifier_Config and HTMLPurifier_Context + */ + var $config, $context; + + function HTMLPurifier_Language($config, &$context) { + $this->config = $config; + $this->context =& $context; + } + /** * Loads language object with necessary info from factory cache * @note This is a lazy loader @@ -41,16 +56,98 @@ class HTMLPurifier_Language } /** - * Retrieves a localised message. Does not perform any operations. + * Retrieves a localised message. * @param $key string identifier of message * @return string localised message */ function getMessage($key) { if (!$this->_loaded) $this->load(); - if (!isset($this->messages[$key])) return ''; + if (!isset($this->messages[$key])) return "[$key]"; return $this->messages[$key]; } + /** + * Retrieves a localised error name. + * @param $int integer error number, corresponding to PHP's error + * reporting + * @return string localised message + */ + function getErrorName($int) { + if (!$this->_loaded) $this->load(); + if (!isset($this->errorNames[$int])) return "[Error: $int]"; + return $this->errorNames[$int]; + } + + /** + * Converts an array list into a string readable representation + */ + function listify($array) { + $sep = $this->getMessage('Item separator'); + $sep_last = $this->getMessage('Item separator last'); + $ret = ''; + for ($i = 0, $c = count($array); $i < $c; $i++) { + if ($i == 0) { + } elseif ($i + 1 < $c) { + $ret .= $sep; + } else { + $ret .= $sep_last; + } + $ret .= $array[$i]; + } + return $ret; + } + + /** + * Formats a localised message with passed parameters + * @param $key string identifier of message + * @param $args Parameters to substitute in + * @return string localised message + * @todo Implement conditionals? Right now, some messages make + * reference to line numbers, but those aren't always available + */ + function formatMessage($key, $args = array()) { + if (!$this->_loaded) $this->load(); + if (!isset($this->messages[$key])) return "[$key]"; + $raw = $this->messages[$key]; + $subst = array(); + $generator = false; + foreach ($args as $i => $value) { + if (is_object($value)) { + if (is_a($value, 'HTMLPurifier_Token')) { + // factor this out some time + if (!$generator) $generator = $this->context->get('Generator'); + if (isset($value->name)) $subst['$'.$i.'.Name'] = $value->name; + if (isset($value->data)) $subst['$'.$i.'.Data'] = $value->data; + $subst['$'.$i.'.Compact'] = + $subst['$'.$i.'.Serialized'] = $generator->generateFromToken($value); + // a more complex algorithm for compact representation + // could be introduced for all types of tokens. This + // may need to be factored out into a dedicated class + if (!empty($value->attr)) { + $stripped_token = $value->copy(); + $stripped_token->attr = array(); + $subst['$'.$i.'.Compact'] = $generator->generateFromToken($stripped_token); + } + $subst['$'.$i.'.Line'] = $value->line ? $value->line : 'unknown'; + } + continue; + } elseif (is_array($value)) { + $keys = array_keys($value); + if (array_keys($keys) === $keys) { + // list + $subst['$'.$i] = $this->listify($value); + } else { + // associative array + // no $i implementation yet, sorry + $subst['$'.$i.'.Keys'] = $this->listify($keys); + $subst['$'.$i.'.Values'] = $this->listify(array_values($value)); + } + continue; + } + $subst['$' . $i] = $value; + } + return strtr($raw, $subst); + } + } -?> \ No newline at end of file diff --git a/lib/htmlpurifier/HTMLPurifier/Language/classes/en-x-test.php b/lib/htmlpurifier/HTMLPurifier/Language/classes/en-x-test.php index 303ba4bae0..cbf0e612b6 100644 --- a/lib/htmlpurifier/HTMLPurifier/Language/classes/en-x-test.php +++ b/lib/htmlpurifier/HTMLPurifier/Language/classes/en-x-test.php @@ -9,4 +9,3 @@ class HTMLPurifier_Language_en_x_test extends HTMLPurifier_Language } -?> \ No newline at end of file diff --git a/lib/htmlpurifier/HTMLPurifier/Language/messages/en-x-test.php b/lib/htmlpurifier/HTMLPurifier/Language/messages/en-x-test.php index 115662bda9..3eac9ec65c 100644 --- a/lib/htmlpurifier/HTMLPurifier/Language/messages/en-x-test.php +++ b/lib/htmlpurifier/HTMLPurifier/Language/messages/en-x-test.php @@ -5,7 +5,6 @@ $fallback = 'en'; $messages = array( - 'htmlpurifier' => 'HTML Purifier X' + 'HTMLPurifier' => 'HTML Purifier X' ); -?> \ No newline at end of file diff --git a/lib/htmlpurifier/HTMLPurifier/Language/messages/en.php b/lib/htmlpurifier/HTMLPurifier/Language/messages/en.php index 7650b81803..b16c3ff385 100644 --- a/lib/htmlpurifier/HTMLPurifier/Language/messages/en.php +++ b/lib/htmlpurifier/HTMLPurifier/Language/messages/en.php @@ -4,9 +4,54 @@ $fallback = false; $messages = array( -'htmlpurifier' => 'HTML Purifier', -'pizza' => 'Pizza', // for unit testing purposes +'HTMLPurifier' => 'HTML Purifier', +// for unit testing purposes +'LanguageFactoryTest: Pizza' => 'Pizza', +'LanguageTest: List' => '$1', +'LanguageTest: Hash' => '$1.Keys; $1.Values', + +'Item separator' => ', ', +'Item separator last' => ' and ', // non-Harvard style + +'ErrorCollector: No errors' => 'No errors detected. However, because error reporting is still incomplete, there may have been errors that the error collector was not notified of; please inspect the output HTML carefully.', +'ErrorCollector: At line' => ' at line $line', + +'Lexer: Unclosed comment' => 'Unclosed comment', +'Lexer: Unescaped lt' => 'Unescaped less-than sign (<) should be <', +'Lexer: Missing gt' => 'Missing greater-than sign (>), previous less-than sign (<) should be escaped', +'Lexer: Missing attribute key' => 'Attribute declaration has no key', +'Lexer: Missing end quote' => 'Attribute declaration has no end quote', + +'Strategy_RemoveForeignElements: Tag transform' => '<$1> element transformed into $CurrentToken.Serialized', +'Strategy_RemoveForeignElements: Missing required attribute' => '$CurrentToken.Compact element missing required attribute $1', +'Strategy_RemoveForeignElements: Foreign element to text' => 'Unrecognized $CurrentToken.Serialized tag converted to text', +'Strategy_RemoveForeignElements: Foreign element removed' => 'Unrecognized $CurrentToken.Serialized tag removed', +'Strategy_RemoveForeignElements: Comment removed' => 'Comment containing "$CurrentToken.Data" removed', +'Strategy_RemoveForeignElements: Foreign meta element removed' => 'Unrecognized $CurrentToken.Serialized meta tag and all descendants removed', +'Strategy_RemoveForeignElements: Token removed to end' => 'Tags and text starting from $1 element where removed to end', + +'Strategy_MakeWellFormed: Unnecessary end tag removed' => 'Unnecessary $CurrentToken.Serialized tag removed', +'Strategy_MakeWellFormed: Unnecessary end tag to text' => 'Unnecessary $CurrentToken.Serialized tag converted to text', +'Strategy_MakeWellFormed: Tag auto closed' => '$1.Compact started on line $1.Line auto-closed by $CurrentToken.Compact', +'Strategy_MakeWellFormed: Stray end tag removed' => 'Stray $CurrentToken.Serialized tag removed', +'Strategy_MakeWellFormed: Stray end tag to text' => 'Stray $CurrentToken.Serialized tag converted to text', +'Strategy_MakeWellFormed: Tag closed by element end' => '$1.Compact tag started on line $1.Line closed by end of $CurrentToken.Serialized', +'Strategy_MakeWellFormed: Tag closed by document end' => '$1.Compact tag started on line $1.Line closed by end of document', + +'Strategy_FixNesting: Node removed' => '$CurrentToken.Compact node removed', +'Strategy_FixNesting: Node excluded' => '$CurrentToken.Compact node removed due to descendant exclusion by ancestor element', +'Strategy_FixNesting: Node reorganized' => 'Contents of $CurrentToken.Compact node reorganized to enforce its content model', +'Strategy_FixNesting: Node contents removed' => 'Contents of $CurrentToken.Compact node removed', + +'AttrValidator: Attributes transformed' => 'Attributes on $CurrentToken.Compact transformed from $1.Keys to $2.Keys', +'AttrValidator: Attribute removed' => '$CurrentAttr.Name attribute on $CurrentToken.Compact removed', + +); + +$errorNames = array( + E_ERROR => 'Error', + E_WARNING => 'Warning', + E_NOTICE => 'Notice' ); -?> \ No newline at end of file diff --git a/lib/htmlpurifier/HTMLPurifier/LanguageFactory.php b/lib/htmlpurifier/HTMLPurifier/LanguageFactory.php index 7097ced767..9d26cd7037 100644 --- a/lib/htmlpurifier/HTMLPurifier/LanguageFactory.php +++ b/lib/htmlpurifier/HTMLPurifier/LanguageFactory.php @@ -3,6 +3,14 @@ require_once 'HTMLPurifier/Language.php'; require_once 'HTMLPurifier/AttrDef/Lang.php'; +HTMLPurifier_ConfigSchema::define( + 'Core', 'Language', 'en', 'string', ' +ISO 639 language code for localizable things in HTML Purifier to use, +which is mainly error reporting. There is currently only an English (en) +translation, so this directive is currently useless. +This directive has been available since 2.0.0. +'); + /** * Class responsible for generating HTMLPurifier_Language objects, managing * caching and fallbacks. @@ -24,7 +32,7 @@ class HTMLPurifier_LanguageFactory * variables to slurp out of a message file. * @value array list */ - var $keys = array('fallback', 'messages'); + var $keys = array('fallback', 'messages', 'errorNames'); /** * Instance of HTMLPurifier_AttrDef_Lang to validate language codes @@ -43,7 +51,7 @@ class HTMLPurifier_LanguageFactory * Keys whose contents are a hash map and can be merged * @value array lookup */ - var $mergeable_keys_map = array('messages' => true); + var $mergeable_keys_map = array('messages' => true, 'errorNames' => true); /** * Keys whose contents are a list and can be merged @@ -74,17 +82,20 @@ class HTMLPurifier_LanguageFactory */ function setup() { $this->validator = new HTMLPurifier_AttrDef_Lang(); - $this->dir = dirname(__FILE__); + $this->dir = HTMLPURIFIER_PREFIX . '/HTMLPurifier'; } /** * Creates a language object, handles class fallbacks - * @param $code string language code + * @param $config Instance of HTMLPurifier_Config + * @param $context Instance of HTMLPurifier_Context */ - function create($code) { + function create($config, &$context) { - $config = $context = false; // hope it doesn't use these! - $code = $this->validator->validate($code, $config, $context); + // validate language code + $code = $this->validator->validate( + $config->get('Core', 'Language'), $config, $context + ); if ($code === false) $code = 'en'; // malformed code becomes English $pcode = str_replace('-', '_', $code); // make valid PHP classname @@ -100,18 +111,18 @@ class HTMLPurifier_LanguageFactory // you can bypass the conditional include by loading the // file yourself if (file_exists($file) && !class_exists($class)) { - include_once $file; - } + include_once $file; + } } if (!class_exists($class)) { // go fallback - $fallback = HTMLPurifier_Language::getFallbackFor($code); + $fallback = HTMLPurifier_LanguageFactory::getFallbackFor($code); $depth++; - $lang = Language::factory( $fallback ); + $lang = HTMLPurifier_LanguageFactory::factory( $fallback ); $depth--; } else { - $lang = new $class; + $lang = new $class($config, $context); } $lang->code = $code; @@ -172,15 +183,15 @@ class HTMLPurifier_LanguageFactory // merge fallback with current language foreach ( $this->keys as $key ) { - if (isset($cache[$key]) && isset($fallback_cache[$key])) { + if (isset($cache[$key]) && isset($fallback_cache[$key])) { if (isset($this->mergeable_keys_map[$key])) { $cache[$key] = $cache[$key] + $fallback_cache[$key]; } elseif (isset($this->mergeable_keys_list[$key])) { $cache[$key] = array_merge( $fallback_cache[$key], $cache[$key] ); } - } else { - $cache[$key] = $fallback_cache[$key]; - } + } else { + $cache[$key] = $fallback_cache[$key]; + } } } @@ -193,4 +204,3 @@ class HTMLPurifier_LanguageFactory } -?> \ No newline at end of file diff --git a/lib/htmlpurifier/HTMLPurifier/Lexer.php b/lib/htmlpurifier/HTMLPurifier/Lexer.php index e7242e1e36..b1bd1ed0e1 100644 --- a/lib/htmlpurifier/HTMLPurifier/Lexer.php +++ b/lib/htmlpurifier/HTMLPurifier/Lexer.php @@ -4,6 +4,14 @@ require_once 'HTMLPurifier/Token.php'; require_once 'HTMLPurifier/Encoder.php'; require_once 'HTMLPurifier/EntityParser.php'; +// implementations +require_once 'HTMLPurifier/Lexer/DirectLex.php'; +if (version_compare(PHP_VERSION, "5", ">=")) { + // You can remove the if statement if you are running PHP 5 only. + // We ought to get the strict version to follow those rules. + require_once 'HTMLPurifier/Lexer/DOMLex.php'; +} + HTMLPurifier_ConfigSchema::define( 'Core', 'AcceptFullDocuments', true, 'bool', 'This parameter determines whether or not the filter should accept full '. @@ -11,6 +19,63 @@ HTMLPurifier_ConfigSchema::define( 'drop all sections except the content between body.' ); +HTMLPurifier_ConfigSchema::define( + 'Core', 'LexerImpl', null, 'mixed/null', ' +

+ This parameter determines what lexer implementation can be used. The + valid values are: +

+
+
null
+
+ Recommended, the lexer implementation will be auto-detected based on + your PHP-version and configuration. +
+
string lexer identifier
+
+ This is a slim way of manually overridding the implementation. + Currently recognized values are: DOMLex (the default PHP5 implementation) + and DirectLex (the default PHP4 implementation). Only use this if + you know what you are doing: usually, the auto-detection will + manage things for cases you aren\'t even aware of. +
+
object lexer instance
+
+ Super-advanced: you can specify your own, custom, implementation that + implements the interface defined by HTMLPurifier_Lexer. + I may remove this option simply because I don\'t expect anyone + to use it. +
+
+

+ This directive has been available since 2.0.0. +

+' +); + +HTMLPurifier_ConfigSchema::define( + 'Core', 'MaintainLineNumbers', null, 'bool/null', ' +

+ If true, HTML Purifier will add line number information to all tokens. + This is useful when error reporting is turned on, but can result in + significant performance degradation and should not be used when + unnecessary. This directive must be used with the DirectLex lexer, + as the DOMLex lexer does not (yet) support this functionality. + If the value is null, an appropriate value will be selected based + on other configuration. This directive has been available since 2.0.0. +

+'); + +HTMLPurifier_ConfigSchema::define( + 'Core', 'AggressivelyFixLt', false, 'bool', ' +This directive enables aggressive pre-filter fixes HTML Purifier can +perform in order to ensure that open angled-brackets do not get killed +during parsing stage. Enabling this will result in two preg_replace_callback +calls and one preg_replace call for every bit of HTML passed through here. +It is not necessary and will have no effect for PHP 4. +This directive has been available since 2.1.0. +'); + /** * Forgivingly lexes HTML (SGML-style) markup into tokens. * @@ -55,11 +120,87 @@ HTMLPurifier_ConfigSchema::define( class HTMLPurifier_Lexer { + // -- STATIC ---------------------------------------------------------- + + /** + * Retrieves or sets the default Lexer as a Prototype Factory. + * + * Depending on what PHP version you are running, the abstract base + * Lexer class will determine which concrete Lexer is best for you: + * HTMLPurifier_Lexer_DirectLex for PHP 4, and HTMLPurifier_Lexer_DOMLex + * for PHP 5 and beyond. This general rule has a few exceptions to it + * involving special features that only DirectLex implements. + * + * @static + * + * @note The behavior of this class has changed, rather than accepting + * a prototype object, it now accepts a configuration object. + * To specify your own prototype, set %Core.LexerImpl to it. + * This change in behavior de-singletonizes the lexer object. + * + * @note In PHP4, it is possible to call this factory method from + * subclasses, such usage is not recommended and not + * forwards-compatible. + * + * @param $prototype Optional prototype lexer or configuration object + * @return Concrete lexer. + */ + function create($config) { + + if (!is_a($config, 'HTMLPurifier_Config')) { + $lexer = $config; + trigger_error("Passing a prototype to + HTMLPurifier_Lexer::create() is deprecated, please instead + use %Core.LexerImpl", E_USER_WARNING); + } else { + $lexer = $config->get('Core', 'LexerImpl'); + } + + if (is_object($lexer)) { + return $lexer; + } + + if (is_null($lexer)) { do { + // auto-detection algorithm + + // once PHP DOM implements native line numbers, or we + // hack out something using XSLT, remove this stipulation + $line_numbers = $config->get('Core', 'MaintainLineNumbers'); + if ( + $line_numbers === true || + ($line_numbers === null && $config->get('Core', 'CollectErrors')) + ) { + $lexer = 'DirectLex'; + break; + } + + if (version_compare(PHP_VERSION, "5", ">=") && // check for PHP5 + class_exists('DOMDocument')) { // check for DOM support + $lexer = 'DOMLex'; + } else { + $lexer = 'DirectLex'; + } + + } while(0); } // do..while so we can break + + // instantiate recognized string names + switch ($lexer) { + case 'DOMLex': + return new HTMLPurifier_Lexer_DOMLex(); + case 'DirectLex': + return new HTMLPurifier_Lexer_DirectLex(); + default: + trigger_error("Cannot instantiate unrecognized Lexer type " . htmlspecialchars($lexer), E_USER_ERROR); + } + + } + + // -- CONVENIENCE MEMBERS --------------------------------------------- + function HTMLPurifier_Lexer() { $this->_entity_parser = new HTMLPurifier_EntityParser(); } - /** * Most common entity to raw value conversion table for special entities. * @protected @@ -123,46 +264,6 @@ class HTMLPurifier_Lexer trigger_error('Call to abstract class', E_USER_ERROR); } - /** - * Retrieves or sets the default Lexer as a Prototype Factory. - * - * Depending on what PHP version you are running, the abstract base - * Lexer class will determine which concrete Lexer is best for you: - * HTMLPurifier_Lexer_DirectLex for PHP 4, and HTMLPurifier_Lexer_DOMLex - * for PHP 5 and beyond. - * - * Passing the optional prototype lexer parameter will override the - * default with your own implementation. A copy/reference of the prototype - * lexer will now be returned when you request a new lexer. - * - * @static - * - * @note - * Though it is possible to call this factory method from subclasses, - * such usage is not recommended. - * - * @param $prototype Optional prototype lexer. - * @return Concrete lexer. - */ - function create($prototype = null) { - // we don't really care if it's a reference or a copy - static $lexer = null; - if ($prototype) { - $lexer = $prototype; - } - if (empty($lexer)) { - if (version_compare(PHP_VERSION, "5", ">=") && // check for PHP5 - class_exists('DOMDocument')) { // check for DOM support - require_once 'HTMLPurifier/Lexer/DOMLex.php'; - $lexer = new HTMLPurifier_Lexer_DOMLex(); - } else { - require_once 'HTMLPurifier/Lexer/DirectLex.php'; - $lexer = new HTMLPurifier_Lexer_DirectLex(); - } - } - return $lexer; - } - /** * Translates CDATA sections into regular sections (through escaping). * @@ -173,7 +274,18 @@ class HTMLPurifier_Lexer */ function escapeCDATA($string) { return preg_replace_callback( - '//', + '//s', + array('HTMLPurifier_Lexer', 'CDATACallback'), + $string + ); + } + + /** + * Special CDATA case that is especiall convoluted for )#si', + array('HTMLPurifier_Lexer_DirectLex', 'scriptCallback'), $html); + } + $html = $this->normalize($html, $config, $context); $cursor = 0; // our location in the text $inside_tag = false; // whether or not we're parsing the inside of a tag $array = array(); // result array + $maintain_line_numbers = $config->get('Core', 'MaintainLineNumbers'); + + if ($maintain_line_numbers === null) { + // automatically determine line numbering by checking + // if error collection is on + $maintain_line_numbers = $config->get('Core', 'CollectErrors'); + } + + if ($maintain_line_numbers) $current_line = 1; + else $current_line = false; + $context->register('CurrentLine', $current_line); + $nl = "\n"; + // how often to manually recalculate. This will ALWAYS be right, + // but it's pretty wasteful. Set to 0 to turn off + $synchronize_interval = $config->get('Core', 'DirectLexLineNumberSyncInterval'); + + $e = false; + if ($config->get('Core', 'CollectErrors')) { + $e =& $context->get('ErrorCollector'); + } + // infinite loop protection // has to be pretty big, since html docs can be big // we're allow two hundred thousand tags... more than enough? + // NOTE: this is also used for synchronization, so watch out $loops = 0; while(true) { @@ -42,10 +92,21 @@ class HTMLPurifier_Lexer_DirectLex extends HTMLPurifier_Lexer // infinite loop protection if (++$loops > 200000) return array(); + // recalculate lines + if ( + $maintain_line_numbers && // line number tracking is on + $synchronize_interval && // synchronization is on + $cursor > 0 && // cursor is further than zero + $loops % $synchronize_interval === 0 // time to synchronize! + ) { + $current_line = 1 + $this->substrCount($html, $nl, 0, $cursor); + } + $position_next_lt = strpos($html, '<', $cursor); $position_next_gt = strpos($html, '>', $cursor); // triggers on "asdf" but not "asdf " + // special case to set up context if ($position_next_lt === $cursor) { $inside_tag = true; $cursor++; @@ -53,7 +114,7 @@ class HTMLPurifier_Lexer_DirectLex extends HTMLPurifier_Lexer if (!$inside_tag && $position_next_lt !== false) { // We are not inside tag and there still is another tag to parse - $array[] = new + $token = new HTMLPurifier_Token_Text( $this->parseData( substr( @@ -61,6 +122,11 @@ class HTMLPurifier_Lexer_DirectLex extends HTMLPurifier_Lexer ) ) ); + if ($maintain_line_numbers) { + $token->line = $current_line; + $current_line += $this->substrCount($html, $nl, $cursor, $position_next_lt - $cursor); + } + $array[] = $token; $cursor = $position_next_lt + 1; $inside_tag = true; continue; @@ -69,7 +135,7 @@ class HTMLPurifier_Lexer_DirectLex extends HTMLPurifier_Lexer // If we're already at the end, break if ($cursor === strlen($html)) break; // Create Text of rest of string - $array[] = new + $token = new HTMLPurifier_Token_Text( $this->parseData( substr( @@ -77,26 +143,54 @@ class HTMLPurifier_Lexer_DirectLex extends HTMLPurifier_Lexer ) ) ); + if ($maintain_line_numbers) $token->line = $current_line; + $array[] = $token; break; } elseif ($inside_tag && $position_next_gt !== false) { // We are in tag and it is well formed // Grab the internals of the tag $strlen_segment = $position_next_gt - $cursor; + + if ($strlen_segment < 1) { + // there's nothing to process! + $token = new HTMLPurifier_Token_Text('<'); + $cursor++; + continue; + } + $segment = substr($html, $cursor, $strlen_segment); // Check if it's a comment if ( - substr($segment, 0, 3) == '!--' && - substr($segment, $strlen_segment-2, 2) == '--' + substr($segment, 0, 3) == '!--' ) { - $array[] = new + // re-determine segment length, looking for --> + $position_comment_end = strpos($html, '-->', $cursor); + if ($position_comment_end === false) { + // uh oh, we have a comment that extends to + // infinity. Can't be helped: set comment + // end position to end of string + if ($e) $e->send(E_WARNING, 'Lexer: Unclosed comment'); + $position_comment_end = strlen($html); + $end = true; + } else { + $end = false; + } + $strlen_segment = $position_comment_end - $cursor; + $segment = substr($html, $cursor, $strlen_segment); + $token = new HTMLPurifier_Token_Comment( substr( - $segment, 3, $strlen_segment - 5 + $segment, 3, $strlen_segment - 3 ) ); + if ($maintain_line_numbers) { + $token->line = $current_line; + $current_line += $this->substrCount($html, $nl, $cursor, $strlen_segment); + } + $array[] = $token; + $cursor = $end ? $position_comment_end : $position_comment_end + 3; $inside_tag = false; - $cursor = $position_next_gt + 1; continue; } @@ -104,7 +198,12 @@ class HTMLPurifier_Lexer_DirectLex extends HTMLPurifier_Lexer $is_end_tag = (strpos($segment,'/') === 0); if ($is_end_tag) { $type = substr($segment, 1); - $array[] = new HTMLPurifier_Token_End($type); + $token = new HTMLPurifier_Token_End($type); + if ($maintain_line_numbers) { + $token->line = $current_line; + $current_line += $this->substrCount($html, $nl, $cursor, $position_next_gt - $cursor); + } + $array[] = $token; $inside_tag = false; $cursor = $position_next_gt + 1; continue; @@ -113,8 +212,10 @@ class HTMLPurifier_Lexer_DirectLex extends HTMLPurifier_Lexer // Check leading character is alnum, if not, we may // have accidently grabbed an emoticon. Translate into // text and go our merry way - if (!ctype_alnum($segment[0])) { - $array[] = new + if (!ctype_alpha($segment[0])) { + // XML: $segment[0] !== '_' && $segment[0] !== ':' + if ($e) $e->send(E_NOTICE, 'Lexer: Unescaped lt'); + $token = new HTMLPurifier_Token_Text( '<' . $this->parseData( @@ -122,6 +223,11 @@ class HTMLPurifier_Lexer_DirectLex extends HTMLPurifier_Lexer ) . '>' ); + if ($maintain_line_numbers) { + $token->line = $current_line; + $current_line += $this->substrCount($html, $nl, $cursor, $position_next_gt - $cursor); + } + $array[] = $token; $cursor = $position_next_gt + 1; $inside_tag = false; continue; @@ -142,10 +248,15 @@ class HTMLPurifier_Lexer_DirectLex extends HTMLPurifier_Lexer if ($position_first_space >= $strlen_segment) { if ($is_self_closing) { - $array[] = new HTMLPurifier_Token_Empty($segment); + $token = new HTMLPurifier_Token_Empty($segment); } else { - $array[] = new HTMLPurifier_Token_Start($segment); + $token = new HTMLPurifier_Token_Start($segment); + } + if ($maintain_line_numbers) { + $token->line = $current_line; + $current_line += $this->substrCount($html, $nl, $cursor, $position_next_gt - $cursor); } + $array[] = $token; $inside_tag = false; $cursor = $position_next_gt + 1; continue; @@ -169,28 +280,56 @@ class HTMLPurifier_Lexer_DirectLex extends HTMLPurifier_Lexer } if ($is_self_closing) { - $array[] = new HTMLPurifier_Token_Empty($type, $attr); + $token = new HTMLPurifier_Token_Empty($type, $attr); } else { - $array[] = new HTMLPurifier_Token_Start($type, $attr); + $token = new HTMLPurifier_Token_Start($type, $attr); } + if ($maintain_line_numbers) { + $token->line = $current_line; + $current_line += $this->substrCount($html, $nl, $cursor, $position_next_gt - $cursor); + } + $array[] = $token; $cursor = $position_next_gt + 1; $inside_tag = false; continue; } else { - $array[] = new + // inside tag, but there's no ending > sign + if ($e) $e->send(E_WARNING, 'Lexer: Missing gt'); + $token = new HTMLPurifier_Token_Text( '<' . $this->parseData( substr($html, $cursor) ) ); + if ($maintain_line_numbers) $token->line = $current_line; + // no cursor scroll? Hmm... + $array[] = $token; break; } break; } + + $context->destroy('CurrentLine'); return $array; } + /** + * PHP 4 compatible substr_count that implements offset and length + */ + function substrCount($haystack, $needle, $offset, $length) { + static $oldVersion; + if ($oldVersion === null) { + $oldVersion = version_compare(PHP_VERSION, '5.1', '<'); + } + if ($oldVersion) { + $haystack = substr($haystack, $offset, $length); + return substr_count($haystack, $needle); + } else { + return substr_count($haystack, $needle, $offset, $length); + } + } + /** * Takes the inside of an HTML tag and makes an assoc array of attributes. * @@ -202,6 +341,11 @@ class HTMLPurifier_Lexer_DirectLex extends HTMLPurifier_Lexer if ($string == '') return array(); // no attributes + $e = false; + if ($config->get('Core', 'CollectErrors')) { + $e =& $context->get('ErrorCollector'); + } + // let's see if we can abort as quickly as possible // one equal sign, no spaces => one attribute $num_equal = substr_count($string, '='); @@ -213,7 +357,10 @@ class HTMLPurifier_Lexer_DirectLex extends HTMLPurifier_Lexer // only one attribute list($key, $quoted_value) = explode('=', $string); $quoted_value = trim($quoted_value); - if (!$key) return array(); + if (!$key) { + if ($e) $e->send(E_ERROR, 'Lexer: Missing attribute key'); + return array(); + } if (!$quoted_value) return array($key => ''); $first_char = @$quoted_value[0]; $last_char = @$quoted_value[strlen($quoted_value)-1]; @@ -227,11 +374,13 @@ class HTMLPurifier_Lexer_DirectLex extends HTMLPurifier_Lexer } else { // not well behaved if ($open_quote) { + if ($e) $e->send(E_ERROR, 'Lexer: Missing end quote'); $value = substr($quoted_value, 1); } else { $value = $quoted_value; } } + if ($value === false) $value = ''; return array($key => $value); } @@ -246,18 +395,19 @@ class HTMLPurifier_Lexer_DirectLex extends HTMLPurifier_Lexer // infinite loop protection $loops = 0; - while(true) { // infinite loop protection - if (++$loops > 1000) return array(); + if (++$loops > 1000) { + trigger_error('Infinite loop detected in attribute parsing', E_USER_WARNING); + return array(); + } if ($cursor >= $size) { break; } $cursor += ($value = strspn($string, $this->_whitespace, $cursor)); - // grab the key $key_begin = $cursor; //we're currently at the start of the key @@ -269,7 +419,11 @@ class HTMLPurifier_Lexer_DirectLex extends HTMLPurifier_Lexer $key = substr($string, $key_begin, $key_end - $key_begin); - if (!$key) continue; // empty key + if (!$key) { + if ($e) $e->send(E_ERROR, 'Lexer: Missing attribute key'); + $cursor += strcspn($string, $this->_whitespace, $cursor + 1); // prevent infinite loop + continue; // empty key + } // scroll past all whitespace $cursor += strspn($string, $this->_whitespace, $cursor); @@ -289,6 +443,11 @@ class HTMLPurifier_Lexer_DirectLex extends HTMLPurifier_Lexer $cursor++; $cursor += strspn($string, $this->_whitespace, $cursor); + if ($cursor === false) { + $array[$key] = ''; + break; + } + // we might be in front of a quote right now $char = @$string[$cursor]; @@ -306,7 +465,14 @@ class HTMLPurifier_Lexer_DirectLex extends HTMLPurifier_Lexer $value_end = $cursor; } + // we reached a premature end + if ($cursor === false) { + $cursor = $size; + $value_end = $cursor; + } + $value = substr($string, $value_begin, $value_end - $value_begin); + if ($value === false) $value = ''; $array[$key] = $this->parseData($value); $cursor++; @@ -314,6 +480,9 @@ class HTMLPurifier_Lexer_DirectLex extends HTMLPurifier_Lexer // boolattr if ($key !== '') { $array[$key] = $key; + } else { + // purely theoretical + if ($e) $e->send(E_ERROR, 'Lexer: Missing attribute key'); } } @@ -323,4 +492,3 @@ class HTMLPurifier_Lexer_DirectLex extends HTMLPurifier_Lexer } -?> \ No newline at end of file diff --git a/lib/htmlpurifier/HTMLPurifier/Lexer/PEARSax3.php b/lib/htmlpurifier/HTMLPurifier/Lexer/PEARSax3.php index 18777ef7e8..3888229b07 100644 --- a/lib/htmlpurifier/HTMLPurifier/Lexer/PEARSax3.php +++ b/lib/htmlpurifier/HTMLPurifier/Lexer/PEARSax3.php @@ -107,4 +107,3 @@ class HTMLPurifier_Lexer_PEARSax3 extends HTMLPurifier_Lexer } -?> \ No newline at end of file diff --git a/lib/htmlpurifier/HTMLPurifier/PercentEncoder.php b/lib/htmlpurifier/HTMLPurifier/PercentEncoder.php index 7a12caaa76..e32421e1c1 100644 --- a/lib/htmlpurifier/HTMLPurifier/PercentEncoder.php +++ b/lib/htmlpurifier/HTMLPurifier/PercentEncoder.php @@ -44,4 +44,3 @@ class HTMLPurifier_PercentEncoder } -?> \ No newline at end of file diff --git a/lib/htmlpurifier/HTMLPurifier/Printer.php b/lib/htmlpurifier/HTMLPurifier/Printer.php index 14135fd8db..7e20daafe3 100644 --- a/lib/htmlpurifier/HTMLPurifier/Printer.php +++ b/lib/htmlpurifier/HTMLPurifier/Printer.php @@ -4,6 +4,8 @@ require_once 'HTMLPurifier/Generator.php'; require_once 'HTMLPurifier/Token.php'; require_once 'HTMLPurifier/Encoder.php'; +// OUT OF DATE, NEEDS UPDATING! + class HTMLPurifier_Printer { @@ -24,11 +26,21 @@ class HTMLPurifier_Printer $this->generator = new HTMLPurifier_Generator(); } + /** + * Give generator necessary configuration if possible + */ + function prepareGenerator($config) { + // hack for smoketests/configForm.php + if (empty($config->conf['HTML'])) return; + $context = new HTMLPurifier_Context(); + $this->generator->generateFromTokens(array(), $config, $context); + } + /** * Main function that renders object or aspect of that object - * @param $config Configuration object + * @note Parameters vary depending on printer */ - function render($config) {} + // function render() {} /** * Returns a start tag @@ -64,6 +76,18 @@ class HTMLPurifier_Printer $this->end($tag); } + function elementEmpty($tag, $attr = array()) { + return $this->generator->generateFromToken( + new HTMLPurifier_Token_Empty($tag, $attr) + ); + } + + function text($text) { + return $this->generator->generateFromToken( + new HTMLPurifier_Token_Text($text) + ); + } + /** * Prints a simple key/value row in a table. * @param $name Key @@ -146,4 +170,3 @@ class HTMLPurifier_Printer } -?> \ No newline at end of file diff --git a/lib/htmlpurifier/HTMLPurifier/Printer/CSSDefinition.php b/lib/htmlpurifier/HTMLPurifier/Printer/CSSDefinition.php index 7745f5f444..7d3ad61e98 100644 --- a/lib/htmlpurifier/HTMLPurifier/Printer/CSSDefinition.php +++ b/lib/htmlpurifier/HTMLPurifier/Printer/CSSDefinition.php @@ -37,4 +37,3 @@ class HTMLPurifier_Printer_CSSDefinition extends HTMLPurifier_Printer } -?> \ No newline at end of file diff --git a/lib/htmlpurifier/HTMLPurifier/Printer/ConfigForm.css b/lib/htmlpurifier/HTMLPurifier/Printer/ConfigForm.css new file mode 100644 index 0000000000..0653bbb0c0 --- /dev/null +++ b/lib/htmlpurifier/HTMLPurifier/Printer/ConfigForm.css @@ -0,0 +1,8 @@ + +.hp-config {} + +.hp-config tbody th {text-align:right; padding-right:0.5em;} +.hp-config thead, .hp-config .namespace {background:#3C578C; color:#FFF;} +.hp-config .namespace th {text-align:center;} +.hp-config .verbose {display:none;} +.hp-config .controls {text-align:center;} diff --git a/lib/htmlpurifier/HTMLPurifier/Printer/ConfigForm.js b/lib/htmlpurifier/HTMLPurifier/Printer/ConfigForm.js new file mode 100644 index 0000000000..119ca4a04d --- /dev/null +++ b/lib/htmlpurifier/HTMLPurifier/Printer/ConfigForm.js @@ -0,0 +1,3 @@ +function toggleWriteability(id_of_patient, checked) { + document.getElementById(id_of_patient).disabled = checked; +} \ No newline at end of file diff --git a/lib/htmlpurifier/HTMLPurifier/Printer/ConfigForm.php b/lib/htmlpurifier/HTMLPurifier/Printer/ConfigForm.php new file mode 100644 index 0000000000..31da35f8ac --- /dev/null +++ b/lib/htmlpurifier/HTMLPurifier/Printer/ConfigForm.php @@ -0,0 +1,320 @@ +docURL = $doc_url; + $this->name = $name; + $this->compress = $compress; + $this->fields['default'] = new HTMLPurifier_Printer_ConfigForm_default(); + $this->fields['bool'] = new HTMLPurifier_Printer_ConfigForm_bool(); + } + + /** + * @param $cols Integer columns of textarea, null to use default + * @param $rows Integer rows of textarea, null to use default + */ + function setTextareaDimensions($cols = null, $rows = null) { + if ($cols) $this->fields['default']->cols = $cols; + if ($rows) $this->fields['default']->rows = $rows; + } + + /** + * Retrieves styling, in case the directory it's in is not publically + * available + */ + function getCSS() { + return file_get_contents(HTMLPURIFIER_PREFIX . '/HTMLPurifier/Printer/ConfigForm.css'); + } + + /** + * Retrieves JavaScript, in case directory is not public + */ + function getJavaScript() { + return file_get_contents(HTMLPURIFIER_PREFIX . '/HTMLPurifier/Printer/ConfigForm.js'); + } + + /** + * Returns HTML output for a configuration form + * @param $config Configuration object of current form state + * @param $allowed Optional namespace(s) and directives to restrict form to. + */ + function render($config, $allowed = true, $render_controls = true) { + $this->config = $config; + $this->prepareGenerator($config); + + $allowed = HTMLPurifier_Config::getAllowedDirectivesForForm($allowed); + $all = array(); + foreach ($allowed as $key) { + list($ns, $directive) = $key; + $all[$ns][$directive] = $config->get($ns, $directive); + } + + $ret = ''; + $ret .= $this->start('table', array('class' => 'hp-config')); + $ret .= $this->start('thead'); + $ret .= $this->start('tr'); + $ret .= $this->element('th', 'Directive'); + $ret .= $this->element('th', 'Value'); + $ret .= $this->end('tr'); + $ret .= $this->end('thead'); + foreach ($all as $ns => $directives) { + $ret .= $this->renderNamespace($ns, $directives); + } + if ($render_controls) { + $ret .= $this->start('tfoot'); + $ret .= $this->start('tr'); + $ret .= $this->start('td', array('colspan' => 2, 'class' => 'controls')); + $ret .= $this->elementEmpty('input', array('type' => 'Submit', 'value' => 'Submit')); + $ret .= '[Reset]'; + $ret .= $this->end('td'); + $ret .= $this->end('tr'); + $ret .= $this->end('tfoot'); + } + $ret .= $this->end('table'); + return $ret; + } + + /** + * Renders a single namespace + * @param $ns String namespace name + * @param $directive Associative array of directives to values + * @protected + */ + function renderNamespace($ns, $directives) { + $ret = ''; + $ret .= $this->start('tbody', array('class' => 'namespace')); + $ret .= $this->start('tr'); + $ret .= $this->element('th', $ns, array('colspan' => 2)); + $ret .= $this->end('tr'); + $ret .= $this->end('tbody'); + $ret .= $this->start('tbody'); + foreach ($directives as $directive => $value) { + $ret .= $this->start('tr'); + $ret .= $this->start('th'); + if ($this->docURL) { + $url = str_replace('%s', urlencode("$ns.$directive"), $this->docURL); + $ret .= $this->start('a', array('href' => $url)); + } + $attr = array('for' => "{$this->name}:$ns.$directive"); + + // crop directive name if it's too long + if (!$this->compress || (strlen($directive) < $this->compress)) { + $directive_disp = $directive; + } else { + $directive_disp = substr($directive, 0, $this->compress - 2) . '...'; + $attr['title'] = $directive; + } + + $ret .= $this->element( + 'label', + $directive_disp, + // component printers must create an element with this id + $attr + ); + if ($this->docURL) $ret .= $this->end('a'); + $ret .= $this->end('th'); + + $ret .= $this->start('td'); + $def = $this->config->def->info[$ns][$directive]; + $type = $def->type; + if (!isset($this->fields[$type])) $type = 'default'; + $type_obj = $this->fields[$type]; + if ($def->allow_null) { + $type_obj = new HTMLPurifier_Printer_ConfigForm_NullDecorator($type_obj); + } + $ret .= $type_obj->render($ns, $directive, $value, $this->name, $this->config); + $ret .= $this->end('td'); + $ret .= $this->end('tr'); + } + $ret .= $this->end('tbody'); + return $ret; + } + +} + +/** + * Printer decorator for directives that accept null + */ +class HTMLPurifier_Printer_ConfigForm_NullDecorator extends HTMLPurifier_Printer { + /** + * Printer being decorated + */ + var $obj; + /** + * @param $obj Printer to decorate + */ + function HTMLPurifier_Printer_ConfigForm_NullDecorator($obj) { + parent::HTMLPurifier_Printer(); + $this->obj = $obj; + } + function render($ns, $directive, $value, $name, $config) { + $this->prepareGenerator($config); + $ret = ''; + $ret .= $this->start('label', array('for' => "$name:Null_$ns.$directive")); + $ret .= $this->element('span', "$ns.$directive:", array('class' => 'verbose')); + $ret .= $this->text(' Null/Disabled'); + $ret .= $this->end('label'); + $attr = array( + 'type' => 'checkbox', + 'value' => '1', + 'class' => 'null-toggle', + 'name' => "$name"."[Null_$ns.$directive]", + 'id' => "$name:Null_$ns.$directive", + 'onclick' => "toggleWriteability('$name:$ns.$directive',checked)" // INLINE JAVASCRIPT!!!! + ); + if ($value === null) $attr['checked'] = 'checked'; + $ret .= $this->elementEmpty('input', $attr); + $ret .= $this->text(' or '); + $ret .= $this->elementEmpty('br'); + $ret .= $this->obj->render($ns, $directive, $value, $name, $config); + return $ret; + } +} + +/** + * Swiss-army knife configuration form field printer + */ +class HTMLPurifier_Printer_ConfigForm_default extends HTMLPurifier_Printer { + var $cols = 18; + var $rows = 5; + function render($ns, $directive, $value, $name, $config) { + $this->prepareGenerator($config); + // this should probably be split up a little + $ret = ''; + $def = $config->def->info[$ns][$directive]; + if (is_array($value)) { + switch ($def->type) { + case 'lookup': + $array = $value; + $value = array(); + foreach ($array as $val => $b) { + $value[] = $val; + } + case 'list': + $value = implode(PHP_EOL, $value); + break; + case 'hash': + $nvalue = ''; + foreach ($value as $i => $v) { + $nvalue .= "$i:$v" . PHP_EOL; + } + $value = $nvalue; + break; + default: + $value = ''; + } + } + if ($def->type === 'mixed') { + return 'Not supported'; + $value = serialize($value); + } + $attr = array( + 'name' => "$name"."[$ns.$directive]", + 'id' => "$name:$ns.$directive" + ); + if ($value === null) $attr['disabled'] = 'disabled'; + if (is_array($def->allowed)) { + $ret .= $this->start('select', $attr); + foreach ($def->allowed as $val => $b) { + $attr = array(); + if ($value == $val) $attr['selected'] = 'selected'; + $ret .= $this->element('option', $val, $attr); + } + $ret .= $this->end('select'); + } elseif ( + $def->type == 'text' || $def->type == 'itext' || + $def->type == 'list' || $def->type == 'hash' || $def->type == 'lookup' + ) { + $attr['cols'] = $this->cols; + $attr['rows'] = $this->rows; + $ret .= $this->start('textarea', $attr); + $ret .= $this->text($value); + $ret .= $this->end('textarea'); + } else { + $attr['value'] = $value; + $attr['type'] = 'text'; + $ret .= $this->elementEmpty('input', $attr); + } + return $ret; + } +} + +/** + * Bool form field printer + */ +class HTMLPurifier_Printer_ConfigForm_bool extends HTMLPurifier_Printer { + function render($ns, $directive, $value, $name, $config) { + $this->prepareGenerator($config); + $ret = ''; + $ret .= $this->start('div', array('id' => "$name:$ns.$directive")); + + $ret .= $this->start('label', array('for' => "$name:Yes_$ns.$directive")); + $ret .= $this->element('span', "$ns.$directive:", array('class' => 'verbose')); + $ret .= $this->text(' Yes'); + $ret .= $this->end('label'); + + $attr = array( + 'type' => 'radio', + 'name' => "$name"."[$ns.$directive]", + 'id' => "$name:Yes_$ns.$directive", + 'value' => '1' + ); + if ($value) $attr['checked'] = 'checked'; + $ret .= $this->elementEmpty('input', $attr); + + $ret .= $this->start('label', array('for' => "$name:No_$ns.$directive")); + $ret .= $this->element('span', "$ns.$directive:", array('class' => 'verbose')); + $ret .= $this->text(' No'); + $ret .= $this->end('label'); + + $attr = array( + 'type' => 'radio', + 'name' => "$name"."[$ns.$directive]", + 'id' => "$name:No_$ns.$directive", + 'value' => '0' + ); + if (!$value) $attr['checked'] = 'checked'; + $ret .= $this->elementEmpty('input', $attr); + + $ret .= $this->end('div'); + + return $ret; + } +} + diff --git a/lib/htmlpurifier/HTMLPurifier/Printer/HTMLDefinition.php b/lib/htmlpurifier/HTMLPurifier/Printer/HTMLDefinition.php index a677c58bf6..52650c6308 100644 --- a/lib/htmlpurifier/HTMLPurifier/Printer/HTMLDefinition.php +++ b/lib/htmlpurifier/HTMLPurifier/Printer/HTMLDefinition.php @@ -15,9 +15,44 @@ class HTMLPurifier_Printer_HTMLDefinition extends HTMLPurifier_Printer $this->config =& $config; $this->def = $config->getHTMLDefinition(); - $def =& $this->def; $ret .= $this->start('div', array('class' => 'HTMLPurifier_Printer')); + + $ret .= $this->renderDoctype(); + $ret .= $this->renderEnvironment(); + $ret .= $this->renderContentSets(); + $ret .= $this->renderInfo(); + + $ret .= $this->end('div'); + + return $ret; + } + + /** + * Renders the Doctype table + */ + function renderDoctype() { + $doctype = $this->def->doctype; + $ret = ''; + $ret .= $this->start('table'); + $ret .= $this->element('caption', 'Doctype'); + $ret .= $this->row('Name', $doctype->name); + $ret .= $this->row('XML', $doctype->xml ? 'Yes' : 'No'); + $ret .= $this->row('Default Modules', implode($doctype->modules, ', ')); + $ret .= $this->row('Default Tidy Modules', implode($doctype->tidyModules, ', ')); + $ret .= $this->end('table'); + return $ret; + } + + + /** + * Renders environment table, which is miscellaneous info + */ + function renderEnvironment() { + $def = $this->def; + + $ret = ''; + $ret .= $this->start('table'); $ret .= $this->element('caption', 'Environment'); @@ -51,13 +86,22 @@ class HTMLPurifier_Printer_HTMLDefinition extends HTMLPurifier_Printer $ret .= $this->end('tr'); $ret .= $this->end('table'); - - - $ret .= $this->renderInfo(); - - - $ret .= $this->end('div'); - + return $ret; + } + + /** + * Renders the Content Sets table + */ + function renderContentSets() { + $ret = ''; + $ret .= $this->start('table'); + $ret .= $this->element('caption', 'Content Sets'); + foreach ($this->def->info_content_sets as $name => $lookup) { + $ret .= $this->heavyHeader($name); + $ret .= $this->start('tr'); + $ret .= $this->element('td', $this->listifyTagLookup($lookup)); + $ret .= $this->end('tr'); + } return $ret; } @@ -69,15 +113,13 @@ class HTMLPurifier_Printer_HTMLDefinition extends HTMLPurifier_Printer $ret .= $this->start('table'); $ret .= $this->element('caption', 'Elements ($info)'); ksort($this->def->info); - $ret .= $this->start('tr'); - $ret .= $this->element('th', 'Allowed tags', array('colspan' => 2, 'class' => 'heavy')); - $ret .= $this->end('tr'); + $ret .= $this->heavyHeader('Allowed tags', 2); $ret .= $this->start('tr'); $ret .= $this->element('td', $this->listifyTagLookup($this->def->info), array('colspan' => 2)); $ret .= $this->end('tr'); foreach ($this->def->info as $name => $def) { $ret .= $this->start('tr'); - $ret .= $this->element('th', "<$name>", array('class'=>'heavy', 'colspan' => 2)); + $ret .= $this->element('th', "<$name>" . ($def->safe ? '' : ' (unsafe)'), array('class'=>'heavy' . ($def->safe ? '' : ' unsafe'), 'colspan' => 2)); $ret .= $this->end('tr'); $ret .= $this->start('tr'); $ret .= $this->element('th', 'Inline content'); @@ -109,9 +151,13 @@ class HTMLPurifier_Printer_HTMLDefinition extends HTMLPurifier_Printer } $ret .= $this->start('tr'); $ret .= $this->element('th', 'Allowed attributes'); - $ret .= $this->element('td',$this->listifyAttr($def->attr),0,0); + $ret .= $this->element('td',$this->listifyAttr($def->attr), array(), 0); $ret .= $this->end('tr'); + if (!empty($def->required_attr)) { + $ret .= $this->row('Required attributes', $this->listify($def->required_attr)); + } + $ret .= $this->renderChildren($def->child); } $ret .= $this->end('table'); @@ -154,6 +200,11 @@ class HTMLPurifier_Printer_HTMLDefinition extends HTMLPurifier_Printer 'Inline: ' . $this->escape($this->listifyTagLookup($def->inline->elements)),0,0); + } elseif ($def->type == 'custom') { + + $ret .= $this->element('td', ''.ucfirst($def->type).': ' . + $def->dtd_regex); + } else { $ret .= $this->element('td', ''.ucfirst($def->type).': ' . @@ -205,6 +256,16 @@ class HTMLPurifier_Printer_HTMLDefinition extends HTMLPurifier_Printer return $this->listify($list); } + /** + * Creates a heavy header row + */ + function heavyHeader($text, $num = 1) { + $ret = ''; + $ret .= $this->start('tr'); + $ret .= $this->element('th', $text, array('colspan' => $num, 'class' => 'heavy')); + $ret .= $this->end('tr'); + return $ret; + } + } -?> \ No newline at end of file diff --git a/lib/htmlpurifier/HTMLPurifier/Strategy.php b/lib/htmlpurifier/HTMLPurifier/Strategy.php index 746b0a2d6e..a6ab7e8bca 100644 --- a/lib/htmlpurifier/HTMLPurifier/Strategy.php +++ b/lib/htmlpurifier/HTMLPurifier/Strategy.php @@ -30,4 +30,3 @@ class HTMLPurifier_Strategy } -?> \ No newline at end of file diff --git a/lib/htmlpurifier/HTMLPurifier/Strategy/Composite.php b/lib/htmlpurifier/HTMLPurifier/Strategy/Composite.php index bd86874798..fcd230f472 100644 --- a/lib/htmlpurifier/HTMLPurifier/Strategy/Composite.php +++ b/lib/htmlpurifier/HTMLPurifier/Strategy/Composite.php @@ -27,4 +27,3 @@ class HTMLPurifier_Strategy_Composite extends HTMLPurifier_Strategy } -?> \ No newline at end of file diff --git a/lib/htmlpurifier/HTMLPurifier/Strategy/Core.php b/lib/htmlpurifier/HTMLPurifier/Strategy/Core.php index 66e7bb3634..93d051046a 100644 --- a/lib/htmlpurifier/HTMLPurifier/Strategy/Core.php +++ b/lib/htmlpurifier/HTMLPurifier/Strategy/Core.php @@ -22,4 +22,3 @@ class HTMLPurifier_Strategy_Core extends HTMLPurifier_Strategy_Composite } -?> \ No newline at end of file diff --git a/lib/htmlpurifier/HTMLPurifier/Strategy/FixNesting.php b/lib/htmlpurifier/HTMLPurifier/Strategy/FixNesting.php index 08f907562f..51a14a78f4 100644 --- a/lib/htmlpurifier/HTMLPurifier/Strategy/FixNesting.php +++ b/lib/htmlpurifier/HTMLPurifier/Strategy/FixNesting.php @@ -42,16 +42,21 @@ class HTMLPurifier_Strategy_FixNesting extends HTMLPurifier_Strategy $definition = $config->getHTMLDefinition(); // insert implicit "parent" node, will be removed at end. - // ! we might want to move this to configuration // DEFINITION CALL $parent_name = $definition->info_parent; array_unshift($tokens, new HTMLPurifier_Token_Start($parent_name)); $tokens[] = new HTMLPurifier_Token_End($parent_name); - // setup the context variables - $is_inline = false; // reference var that we alter + // setup the context variable 'IsInline', for chameleon processing + // is 'false' when we are not inline, 'true' when it must always + // be inline, and an integer when it is inline for a certain + // branch of the document tree + $is_inline = $definition->info_parent_def->descendants_are_inline; $context->register('IsInline', $is_inline); + // setup error collector + $e =& $context->get('ErrorCollector', true); + //####################################################################// // Loop initialization @@ -60,10 +65,16 @@ class HTMLPurifier_Strategy_FixNesting extends HTMLPurifier_Strategy $stack = array(); // stack that contains all elements that are excluded - // same structure as $stack, but it is only populated when an element - // with exclusions is processed, i.e. there won't be empty exclusions. + // it is organized by parent elements, similar to $stack, + // but it is only populated when an element with exclusions is + // processed, i.e. there won't be empty exclusions. $exclude_stack = array(); + // variable that contains the start token while we are processing + // nodes. This enables error reporting to do its job + $start_token = false; + $context->register('CurrentToken', $start_token); + //####################################################################// // Loop @@ -97,6 +108,8 @@ class HTMLPurifier_Strategy_FixNesting extends HTMLPurifier_Strategy // $i is index of start token // $j is index of end token + $start_token = $tokens[$i]; // to make token available via CurrentToken + //################################################################// // Gather information on parent @@ -110,7 +123,10 @@ class HTMLPurifier_Strategy_FixNesting extends HTMLPurifier_Strategy $parent_def = $definition->info[$parent_name]; } } else { - // unknown info, it won't be used anyway + // processing as if the parent were the "root" node + // unknown info, it won't be used anyway, in the future, + // we may want to enforce one element only (this is + // necessary for HTML Purifier to clean entire documents $parent_index = $parent_name = $parent_def = null; } @@ -194,6 +210,14 @@ class HTMLPurifier_Strategy_FixNesting extends HTMLPurifier_Strategy } elseif($result === false) { // remove entire node + if ($e) { + if ($excluded) { + $e->send(E_ERROR, 'Strategy_FixNesting: Node excluded'); + } else { + $e->send(E_ERROR, 'Strategy_FixNesting: Node removed'); + } + } + // calculate length of inner tokens and current tokens $length = $j - $i + 1; @@ -207,6 +231,12 @@ class HTMLPurifier_Strategy_FixNesting extends HTMLPurifier_Strategy // current node is now the next possible start node // unless it turns out that we need to do a double-check + // this is a rought heuristic that covers 100% of HTML's + // cases and 99% of all other cases. A child definition + // that would be tricked by this would be something like: + // ( | a b c) where it's all or nothing. Fortunately, + // our current implementation claims that that case would + // not allow empty, even if it did if (!$parent_def->child->allow_empty) { // we need to do a double-check $i = $parent_index; @@ -222,6 +252,14 @@ class HTMLPurifier_Strategy_FixNesting extends HTMLPurifier_Strategy // calculate length of inner tokens $length = $j - $i - 1; + if ($e) { + if (empty($result) && $length) { + $e->send(E_ERROR, 'Strategy_FixNesting: Node contents removed'); + } else { + $e->send(E_WARNING, 'Strategy_FixNesting: Node reorganized'); + } + } + // perform replacement array_splice($tokens, $i + 1, $length, $result); @@ -279,6 +317,7 @@ class HTMLPurifier_Strategy_FixNesting extends HTMLPurifier_Strategy // remove context variables $context->destroy('IsInline'); + $context->destroy('CurrentToken'); //####################################################################// // Return @@ -289,4 +328,4 @@ class HTMLPurifier_Strategy_FixNesting extends HTMLPurifier_Strategy } -?> + diff --git a/lib/htmlpurifier/HTMLPurifier/Strategy/MakeWellFormed.php b/lib/htmlpurifier/HTMLPurifier/Strategy/MakeWellFormed.php index 84580d3d34..b3e8aa7453 100644 --- a/lib/htmlpurifier/HTMLPurifier/Strategy/MakeWellFormed.php +++ b/lib/htmlpurifier/HTMLPurifier/Strategy/MakeWellFormed.php @@ -4,127 +4,234 @@ require_once 'HTMLPurifier/Strategy.php'; require_once 'HTMLPurifier/HTMLDefinition.php'; require_once 'HTMLPurifier/Generator.php'; +require_once 'HTMLPurifier/Injector/AutoParagraph.php'; +require_once 'HTMLPurifier/Injector/Linkify.php'; +require_once 'HTMLPurifier/Injector/PurifierLinkify.php'; + +HTMLPurifier_ConfigSchema::define( + 'AutoFormat', 'Custom', array(), 'list', ' +

+ This directive can be used to add custom auto-format injectors. + Specify an array of injector names (class name minus the prefix) + or concrete implementations. Injector class must exist. This directive + has been available since 2.0.1. +

+' +); + /** * Takes tokens makes them well-formed (balance end tags, etc.) */ class HTMLPurifier_Strategy_MakeWellFormed extends HTMLPurifier_Strategy { + /** + * Locally shared variable references + * @private + */ + var $inputTokens, $inputIndex, $outputTokens, $currentNesting, + $currentInjector, $injectors; + function execute($tokens, $config, &$context) { + $definition = $config->getHTMLDefinition(); - $generator = new HTMLPurifier_Generator(); + + // CurrentNesting + $this->currentNesting = array(); + $context->register('CurrentNesting', $this->currentNesting); + + // InputIndex + $this->inputIndex = false; + $context->register('InputIndex', $this->inputIndex); + + // InputTokens + $context->register('InputTokens', $tokens); + $this->inputTokens =& $tokens; + + // OutputTokens $result = array(); - $current_nesting = array(); + $this->outputTokens =& $result; + + // %Core.EscapeInvalidTags $escape_invalid_tags = $config->get('Core', 'EscapeInvalidTags'); - foreach ($tokens as $token) { - if (empty( $token->is_tag )) { - $result[] = $token; - continue; + $generator = new HTMLPurifier_Generator(); + + $e =& $context->get('ErrorCollector', true); + + // -- begin INJECTOR -- + + $this->injectors = array(); + + $injectors = $config->getBatch('AutoFormat'); + $custom_injectors = $injectors['Custom']; + unset($injectors['Custom']); // special case + foreach ($injectors as $injector => $b) { + $injector = "HTMLPurifier_Injector_$injector"; + if (!$b) continue; + $this->injectors[] = new $injector; + } + foreach ($custom_injectors as $injector) { + if (is_string($injector)) { + $injector = "HTMLPurifier_Injector_$injector"; + $injector = new $injector; } + $this->injectors[] = $injector; + } + + // array index of the injector that resulted in an array + // substitution. This enables processTokens() to know which + // injectors are affected by the added tokens and which are + // not (namely, the ones after the current injector are not + // affected) + $this->currentInjector = false; + + // give the injectors references to the definition and context + // variables for performance reasons + foreach ($this->injectors as $i => $x) { + $error = $this->injectors[$i]->prepare($config, $context); + if (!$error) continue; + list($injector) = array_splice($this->injectors, $i, 1); + $name = $injector->name; + trigger_error("Cannot enable $name injector because $error is not allowed", E_USER_WARNING); + } + + // -- end INJECTOR -- + + $token = false; + $context->register('CurrentToken', $token); + + for ($this->inputIndex = 0; isset($tokens[$this->inputIndex]); $this->inputIndex++) { - // DEFINITION CALL - $info = $definition->info[$token->name]->child; + // if all goes well, this token will be passed through unharmed + $token = $tokens[$this->inputIndex]; - // test if it claims to be a start tag but is empty - if ($info->type == 'empty' && - $token->type == 'start' ) { - - $result[] = new HTMLPurifier_Token_Empty($token->name, - $token->attr); - continue; + foreach ($this->injectors as $i => $x) { + if ($x->skip > 0) $this->injectors[$i]->skip--; } - // test if it claims to be empty but really is a start tag - if ($info->type != 'empty' && - $token->type == 'empty' ) { - - $result[] = new HTMLPurifier_Token_Start($token->name, - $token->attr); - $result[] = new HTMLPurifier_Token_End($token->name); - + // quick-check: if it's not a tag, no need to process + if (empty( $token->is_tag )) { + if ($token->type === 'text') { + // injector handler code; duplicated for performance reasons + foreach ($this->injectors as $i => $x) { + if (!$x->skip) $x->handleText($token); + if (is_array($token)) { + $this->currentInjector = $i; + break; + } + } + } + $this->processToken($token, $config, $context); continue; } - // automatically insert empty tags - if ($token->type == 'empty') { - $result[] = $token; - continue; - } + $info = $definition->info[$token->name]->child; - // we give start tags precedence, so automatically accept unless... - // it's one of those special cases - if ($token->type == 'start') { + // quick tag checks: anything that's *not* an end tag + $ok = false; + if ($info->type == 'empty' && $token->type == 'start') { + // test if it claims to be a start tag but is empty + $token = new HTMLPurifier_Token_Empty($token->name, $token->attr); + $ok = true; + } elseif ($info->type != 'empty' && $token->type == 'empty' ) { + // claims to be empty but really is a start tag + $token = array( + new HTMLPurifier_Token_Start($token->name, $token->attr), + new HTMLPurifier_Token_End($token->name) + ); + $ok = true; + } elseif ($token->type == 'empty') { + // real empty token + $ok = true; + } elseif ($token->type == 'start') { + // start tag - // if there's a parent, check for special case - if (!empty($current_nesting)) { + // ...unless they also have to close their parent + if (!empty($this->currentNesting)) { - $parent = array_pop($current_nesting); - $parent_name = $parent->name; - $parent_info = $definition->info[$parent_name]; + $parent = array_pop($this->currentNesting); + $parent_info = $definition->info[$parent->name]; - if (isset($parent_info->auto_close[$token->name])) { - $result[] = new HTMLPurifier_Token_End($parent_name); + // this can be replaced with a more general algorithm: + // if the token is not allowed by the parent, auto-close + // the parent + if (!isset($parent_info->child->elements[$token->name])) { + if ($e) $e->send(E_NOTICE, 'Strategy_MakeWellFormed: Tag auto closed', $parent); + // close the parent, then append the token + $result[] = new HTMLPurifier_Token_End($parent->name); $result[] = $token; - $current_nesting[] = $token; + $this->currentNesting[] = $token; continue; } - $current_nesting[] = $parent; // undo the pop + $this->currentNesting[] = $parent; // undo the pop } - - $result[] = $token; - $current_nesting[] = $token; + $ok = true; + } + + // injector handler code; duplicated for performance reasons + if ($ok) { + foreach ($this->injectors as $i => $x) { + if (!$x->skip) $x->handleElement($token); + if (is_array($token)) { + $this->currentInjector = $i; + break; + } + } + $this->processToken($token, $config, $context); continue; } - // sanity check + // sanity check: we should be dealing with a closing tag if ($token->type != 'end') continue; - // okay, we're dealing with a closing tag - // make sure that we have something open - if (empty($current_nesting)) { + if (empty($this->currentNesting)) { if ($escape_invalid_tags) { + if ($e) $e->send(E_WARNING, 'Strategy_MakeWellFormed: Unnecessary end tag to text'); $result[] = new HTMLPurifier_Token_Text( $generator->generateFromToken($token, $config, $context) ); + } elseif ($e) { + $e->send(E_WARNING, 'Strategy_MakeWellFormed: Unnecessary end tag removed'); } continue; } // first, check for the simplest case: everything closes neatly - - // current_nesting is modified - $current_parent = array_pop($current_nesting); + $current_parent = array_pop($this->currentNesting); if ($current_parent->name == $token->name) { $result[] = $token; continue; } - // undo the array_pop - $current_nesting[] = $current_parent; - // okay, so we're trying to close the wrong tag - // scroll back the entire nest, trying to find our tag - // feature could be to specify how far you'd like to go - $size = count($current_nesting); + // undo the pop previous pop + $this->currentNesting[] = $current_parent; + + // scroll back the entire nest, trying to find our tag. + // (feature could be to specify how far you'd like to go) + $size = count($this->currentNesting); // -2 because -1 is the last element, but we already checked that $skipped_tags = false; for ($i = $size - 2; $i >= 0; $i--) { - if ($current_nesting[$i]->name == $token->name) { + if ($this->currentNesting[$i]->name == $token->name) { // current nesting is modified - $skipped_tags = array_splice($current_nesting, $i); + $skipped_tags = array_splice($this->currentNesting, $i); break; } } - // we still didn't find the tag, so translate to text + // we still didn't find the tag, so remove if ($skipped_tags === false) { if ($escape_invalid_tags) { $result[] = new HTMLPurifier_Token_Text( $generator->generateFromToken($token, $config, $context) ); + if ($e) $e->send(E_WARNING, 'Strategy_MakeWellFormed: Stray end tag to text'); + } elseif ($e) { + $e->send(E_WARNING, 'Strategy_MakeWellFormed: Stray end tag removed'); } continue; } @@ -132,27 +239,68 @@ class HTMLPurifier_Strategy_MakeWellFormed extends HTMLPurifier_Strategy // okay, we found it, close all the skipped tags // note that skipped tags contains the element we need closed $size = count($skipped_tags); - for ($i = $size - 1; $i >= 0; $i--) { + for ($i = $size - 1; $i > 0; $i--) { + if ($e && !isset($skipped_tags[$i]->armor['MakeWellFormed_TagClosedError'])) { + $e->send(E_NOTICE, 'Strategy_MakeWellFormed: Tag closed by element end', $skipped_tags[$i]); + } $result[] = new HTMLPurifier_Token_End($skipped_tags[$i]->name); } - // done! + $result[] = new HTMLPurifier_Token_End($skipped_tags[$i]->name); } - // we're at the end now, fix all still unclosed tags + $context->destroy('CurrentNesting'); + $context->destroy('InputTokens'); + $context->destroy('InputIndex'); + $context->destroy('CurrentToken'); - if (!empty($current_nesting)) { - $size = count($current_nesting); + // we're at the end now, fix all still unclosed tags + // not using processToken() because at this point we don't + // care about current nesting + if (!empty($this->currentNesting)) { + $size = count($this->currentNesting); for ($i = $size - 1; $i >= 0; $i--) { + if ($e && !isset($this->currentNesting[$i]->armor['MakeWellFormed_TagClosedError'])) { + $e->send(E_NOTICE, 'Strategy_MakeWellFormed: Tag closed by document end', $this->currentNesting[$i]); + } $result[] = - new HTMLPurifier_Token_End($current_nesting[$i]->name); + new HTMLPurifier_Token_End($this->currentNesting[$i]->name); } } + unset($this->outputTokens, $this->injectors, $this->currentInjector, + $this->currentNesting, $this->inputTokens, $this->inputIndex); + return $result; } + function processToken($token, $config, &$context) { + if (is_array($token)) { + // the original token was overloaded by an injector, time + // to some fancy acrobatics + + // $this->inputIndex is decremented so that the entire set gets + // re-processed + array_splice($this->inputTokens, $this->inputIndex--, 1, $token); + + // adjust the injector skips based on the array substitution + if ($this->injectors) { + $offset = count($token) + 1; + for ($i = 0; $i <= $this->currentInjector; $i++) { + $this->injectors[$i]->skip += $offset; + } + } + } elseif ($token) { + // regular case + $this->outputTokens[] = $token; + if ($token->type == 'start') { + $this->currentNesting[] = $token; + } elseif ($token->type == 'end') { + array_pop($this->currentNesting); // not actually used + } + } + } + } -?> \ No newline at end of file diff --git a/lib/htmlpurifier/HTMLPurifier/Strategy/RemoveForeignElements.php b/lib/htmlpurifier/HTMLPurifier/Strategy/RemoveForeignElements.php index cb5c4dd1b3..2c280b23d7 100644 --- a/lib/htmlpurifier/HTMLPurifier/Strategy/RemoveForeignElements.php +++ b/lib/htmlpurifier/HTMLPurifier/Strategy/RemoveForeignElements.php @@ -5,12 +5,43 @@ require_once 'HTMLPurifier/HTMLDefinition.php'; require_once 'HTMLPurifier/Generator.php'; require_once 'HTMLPurifier/TagTransform.php'; +require_once 'HTMLPurifier/AttrValidator.php'; + +HTMLPurifier_ConfigSchema::define( + 'Core', 'RemoveInvalidImg', true, 'bool', ' +

+ This directive enables pre-emptive URI checking in img + tags, as the attribute validation strategy is not authorized to + remove elements from the document. This directive has been available + since 1.3.0, revert to pre-1.3.0 behavior by setting to false. +

+' +); + +HTMLPurifier_ConfigSchema::define( + 'Core', 'RemoveScriptContents', null, 'bool/null', ' +

+ This directive enables HTML Purifier to remove not only script tags + but all of their contents. This directive has been deprecated since 2.1.0, + and when not set the value of %Core.HiddenElements will take + precedence. This directive has been available since 2.0.0, and can be used to + revert to pre-2.0.0 behavior by setting it to false. +

+' +); + HTMLPurifier_ConfigSchema::define( - 'Core', 'RemoveInvalidImg', true, 'bool', - 'This directive enables pre-emptive URI checking in img '. - 'tags, as the attribute validation strategy is not authorized to '. - 'remove elements from the document. This directive has been available '. - 'since 1.3.0, revert to pre-1.3.0 behavior by setting to false.' + 'Core', 'HiddenElements', array('script' => true, 'style' => true), 'lookup', ' +

+ This directive is a lookup array of elements which should have their + contents removed when they are not allowed by the HTML definition. + For example, the contents of a script tag are not + normally shown in a document, so if script tags are to be removed, + their contents should be removed to. This is opposed to a b + tag, which defines some presentational changes but does not hide its + contents. +

+' ); /** @@ -28,60 +59,136 @@ class HTMLPurifier_Strategy_RemoveForeignElements extends HTMLPurifier_Strategy $definition = $config->getHTMLDefinition(); $generator = new HTMLPurifier_Generator(); $result = array(); + $escape_invalid_tags = $config->get('Core', 'EscapeInvalidTags'); $remove_invalid_img = $config->get('Core', 'RemoveInvalidImg'); + + $remove_script_contents = $config->get('Core', 'RemoveScriptContents'); + $hidden_elements = $config->get('Core', 'HiddenElements'); + + // remove script contents compatibility + if ($remove_script_contents === true) { + $hidden_elements['script'] = true; + } elseif ($remove_script_contents === false && isset($hidden_elements['script'])) { + unset($hidden_elements['script']); + } + + $attr_validator = new HTMLPurifier_AttrValidator(); + + // removes tokens until it reaches a closing tag with its value + $remove_until = false; + + // converts comments into text tokens when this is equal to a tag name + $textify_comments = false; + + $token = false; + $context->register('CurrentToken', $token); + + $e = false; + if ($config->get('Core', 'CollectErrors')) { + $e =& $context->get('ErrorCollector'); + } + foreach($tokens as $token) { + if ($remove_until) { + if (empty($token->is_tag) || $token->name !== $remove_until) { + continue; + } + } if (!empty( $token->is_tag )) { // DEFINITION CALL - if (isset($definition->info[$token->name])) { - // leave untouched, except for a few special cases: - - // hard-coded image special case, pre-emptively drop - // if not available. Probably not abstract-able - if ( $token->name == 'img' && $remove_invalid_img ) { - if (!isset($token->attr['src'])) { - continue; - } - if (!isset($definition->info['img']->attr['src'])) { - continue; - } - $token->attr['src'] = - $definition-> - info['img']-> - attr['src']-> - validate($token->attr['src'], - $config, $context); - if ($token->attr['src'] === false) continue; - } - - } elseif ( + + // before any processing, try to transform the element + if ( isset($definition->info_tag_transform[$token->name]) ) { + $original_name = $token->name; // there is a transformation for this tag // DEFINITION CALL $token = $definition-> info_tag_transform[$token->name]-> transform($token, $config, $context); + if ($e) $e->send(E_NOTICE, 'Strategy_RemoveForeignElements: Tag transform', $original_name); + } + + if (isset($definition->info[$token->name])) { + + // mostly everything's good, but + // we need to make sure required attributes are in order + if ( + $definition->info[$token->name]->required_attr && + ($token->name != 'img' || $remove_invalid_img) // ensure config option still works + ) { + $attr_validator->validateToken($token, $config, $context); + $ok = true; + foreach ($definition->info[$token->name]->required_attr as $name) { + if (!isset($token->attr[$name])) { + $ok = false; + break; + } + } + if (!$ok) { + if ($e) $e->send(E_ERROR, 'Strategy_RemoveForeignElements: Missing required attribute', $name); + continue; + } + $token->armor['ValidateAttributes'] = true; + } + + // CAN BE GENERICIZED + if (isset($hidden_elements[$token->name]) && $token->type == 'start') { + $textify_comments = $token->name; + } elseif ($token->name === $textify_comments && $token->type == 'end') { + $textify_comments = false; + } + } elseif ($escape_invalid_tags) { - // invalid tag, generate HTML and insert in + // invalid tag, generate HTML representation and insert in + if ($e) $e->send(E_WARNING, 'Strategy_RemoveForeignElements: Foreign element to text'); $token = new HTMLPurifier_Token_Text( $generator->generateFromToken($token, $config, $context) ); } else { + // check if we need to destroy all of the tag's children + // CAN BE GENERICIZED + if (isset($hidden_elements[$token->name])) { + if ($token->type == 'start') { + $remove_until = $token->name; + } elseif ($token->type == 'empty') { + // do nothing: we're still looking + } else { + $remove_until = false; + } + if ($e) $e->send(E_ERROR, 'Strategy_RemoveForeignElements: Foreign meta element removed'); + } else { + if ($e) $e->send(E_ERROR, 'Strategy_RemoveForeignElements: Foreign element removed'); + } continue; } } elseif ($token->type == 'comment') { - // strip comments - continue; + // textify comments in script tags when they are allowed + if ($textify_comments !== false) { + $data = $token->data; + $token = new HTMLPurifier_Token_Text($data); + } else { + // strip comments + if ($e) $e->send(E_NOTICE, 'Strategy_RemoveForeignElements: Comment removed'); + continue; + } } elseif ($token->type == 'text') { } else { continue; } $result[] = $token; } + if ($remove_until && $e) { + // we removed tokens until the end, throw error + $e->send(E_ERROR, 'Strategy_RemoveForeignElements: Token removed to end', $remove_until); + } + + $context->destroy('CurrentToken'); + return $result; } } -?> \ No newline at end of file diff --git a/lib/htmlpurifier/HTMLPurifier/Strategy/ValidateAttributes.php b/lib/htmlpurifier/HTMLPurifier/Strategy/ValidateAttributes.php index 07744f803d..869f3fab93 100644 --- a/lib/htmlpurifier/HTMLPurifier/Strategy/ValidateAttributes.php +++ b/lib/htmlpurifier/HTMLPurifier/Strategy/ValidateAttributes.php @@ -4,6 +4,8 @@ require_once 'HTMLPurifier/Strategy.php'; require_once 'HTMLPurifier/HTMLDefinition.php'; require_once 'HTMLPurifier/IDAccumulator.php'; +require_once 'HTMLPurifier/AttrValidator.php'; + HTMLPurifier_ConfigSchema::define( 'Attr', 'IDBlacklist', array(), 'list', 'Array of IDs not allowed in the document.'); @@ -17,16 +19,16 @@ class HTMLPurifier_Strategy_ValidateAttributes extends HTMLPurifier_Strategy function execute($tokens, $config, &$context) { - $definition = $config->getHTMLDefinition(); - // setup id_accumulator context $id_accumulator = new HTMLPurifier_IDAccumulator(); $id_accumulator->load($config->get('Attr', 'IDBlacklist')); $context->register('IDAccumulator', $id_accumulator); - // create alias to global definition array, see also $defs - // DEFINITION CALL - $d_defs = $definition->info_global_attr; + // setup validator + $validator = new HTMLPurifier_AttrValidator(); + + $token = false; + $context->register('CurrentToken', $token); foreach ($tokens as $key => $token) { @@ -34,96 +36,20 @@ class HTMLPurifier_Strategy_ValidateAttributes extends HTMLPurifier_Strategy // namely start and empty tags if ($token->type !== 'start' && $token->type !== 'empty') continue; - // copy out attributes for easy manipulation - $attr = $token->attr; - - // do global transformations (pre) - // nothing currently utilizes this - foreach ($definition->info_attr_transform_pre as $transform) { - $attr = $transform->transform($attr, $config, $context); - } - - // do local transformations only applicable to this element (pre) - // ex.

to

- foreach ($definition->info[$token->name]->attr_transform_pre - as $transform - ) { - $attr = $transform->transform($attr, $config, $context); - } + // skip tokens that are armored + if (!empty($token->armor['ValidateAttributes'])) continue; - // create alias to this element's attribute definition array, see - // also $d_defs (global attribute definition array) - // DEFINITION CALL - $defs = $definition->info[$token->name]->attr; + // note that we have no facilities here for removing tokens + $validator->validateToken($token, $config, $context); - // iterate through all the attribute keypairs - // Watch out for name collisions: $key has previously been used - foreach ($attr as $attr_key => $value) { - - // call the definition - if ( isset($defs[$attr_key]) ) { - // there is a local definition defined - if ($defs[$attr_key] === false) { - // We've explicitly been told not to allow this element. - // This is usually when there's a global definition - // that must be overridden. - // Theoretically speaking, we could have a - // AttrDef_DenyAll, but this is faster! - $result = false; - } else { - // validate according to the element's definition - $result = $defs[$attr_key]->validate( - $value, $config, $context - ); - } - } elseif ( isset($d_defs[$attr_key]) ) { - // there is a global definition defined, validate according - // to the global definition - $result = $d_defs[$attr_key]->validate( - $value, $config, $context - ); - } else { - // system never heard of the attribute? DELETE! - $result = false; - } - - // put the results into effect - if ($result === false || $result === null) { - // remove the attribute - unset($attr[$attr_key]); - } elseif (is_string($result)) { - // simple substitution - $attr[$attr_key] = $result; - } - - // we'd also want slightly more complicated substitution - // involving an array as the return value, - // although we're not sure how colliding attributes would - // resolve (certain ones would be completely overriden, - // others would prepend themselves). - } - - // post transforms - - // ex. to - foreach ($definition->info_attr_transform_post as $transform) { - $attr = $transform->transform($attr, $config, $context); - } - - // ex. to - foreach ($definition->info[$token->name]->attr_transform_post as $transform) { - $attr = $transform->transform($attr, $config, $context); - } - - // commit changes - // could interfere with flyweight implementation - $tokens[$key]->attr = $attr; + $tokens[$key] = $token; // for PHP 4 } + $context->destroy('IDAccumulator'); + $context->destroy('CurrentToken'); return $tokens; } } -?> \ No newline at end of file diff --git a/lib/htmlpurifier/HTMLPurifier/TagTransform.php b/lib/htmlpurifier/HTMLPurifier/TagTransform.php index f5dc5c97b6..f5de99ce49 100644 --- a/lib/htmlpurifier/HTMLPurifier/TagTransform.php +++ b/lib/htmlpurifier/HTMLPurifier/TagTransform.php @@ -24,6 +24,17 @@ class HTMLPurifier_TagTransform trigger_error('Call to abstract function', E_USER_ERROR); } + /** + * Prepends CSS properties to the style attribute, creating the + * attribute if it doesn't exist. + * @warning Copied over from AttrTransform, be sure to keep in sync + * @param $attr Attribute array to process (passed by reference) + * @param $css CSS to prepend + */ + function prependCSS(&$attr, $css) { + $attr['style'] = isset($attr['style']) ? $attr['style'] : ''; + $attr['style'] = $css . $attr['style']; + } + } -?> \ No newline at end of file diff --git a/lib/htmlpurifier/HTMLPurifier/TagTransform/Center.php b/lib/htmlpurifier/HTMLPurifier/TagTransform/Center.php deleted file mode 100644 index 571bb9df4d..0000000000 --- a/lib/htmlpurifier/HTMLPurifier/TagTransform/Center.php +++ /dev/null @@ -1,34 +0,0 @@ -type == 'end') { - $new_tag = new HTMLPurifier_Token_End($this->transform_to); - return $new_tag; - } - $attr = $tag->attr; - $prepend_css = 'text-align:center;'; - if (isset($attr['style'])) { - $attr['style'] = $prepend_css . $attr['style']; - } else { - $attr['style'] = $prepend_css; - } - $new_tag = $tag->copy(); - $new_tag->name = $this->transform_to; - $new_tag->attr = $attr; - return $new_tag; - } -} - -?> \ No newline at end of file diff --git a/lib/htmlpurifier/HTMLPurifier/TagTransform/Font.php b/lib/htmlpurifier/HTMLPurifier/TagTransform/Font.php index dedaf8b245..5bc22df9e3 100644 --- a/lib/htmlpurifier/HTMLPurifier/TagTransform/Font.php +++ b/lib/htmlpurifier/HTMLPurifier/TagTransform/Font.php @@ -39,7 +39,8 @@ class HTMLPurifier_TagTransform_Font extends HTMLPurifier_TagTransform function transform($tag, $config, &$context) { if ($tag->type == 'end') { - $new_tag = new HTMLPurifier_Token_End($this->transform_to); + $new_tag = $tag->copy(); + $new_tag->name = $this->transform_to; return $new_tag; } @@ -91,4 +92,3 @@ class HTMLPurifier_TagTransform_Font extends HTMLPurifier_TagTransform } } -?> \ No newline at end of file diff --git a/lib/htmlpurifier/HTMLPurifier/TagTransform/Simple.php b/lib/htmlpurifier/HTMLPurifier/TagTransform/Simple.php index 6ffd0eabbb..0b5a84d480 100644 --- a/lib/htmlpurifier/HTMLPurifier/TagTransform/Simple.php +++ b/lib/htmlpurifier/HTMLPurifier/TagTransform/Simple.php @@ -3,24 +3,34 @@ require_once 'HTMLPurifier/TagTransform.php'; /** - * Simple transformation, just change tag name to something else. + * Simple transformation, just change tag name to something else, + * and possibly add some styling. This will cover most of the deprecated + * tag cases. */ class HTMLPurifier_TagTransform_Simple extends HTMLPurifier_TagTransform { + var $style; + /** * @param $transform_to Tag name to transform to. + * @param $style CSS style to add to the tag */ - function HTMLPurifier_TagTransform_Simple($transform_to) { + function HTMLPurifier_TagTransform_Simple($transform_to, $style = null) { $this->transform_to = $transform_to; + $this->style = $style; } function transform($tag, $config, &$context) { $new_tag = $tag->copy(); $new_tag->name = $this->transform_to; + if (!is_null($this->style) && + ($new_tag->type == 'start' || $new_tag->type == 'empty') + ) { + $this->prependCSS($new_tag->attr, $this->style); + } return $new_tag; } } -?> \ No newline at end of file diff --git a/lib/htmlpurifier/HTMLPurifier/Token.php b/lib/htmlpurifier/HTMLPurifier/Token.php index 555e76f1b2..bd859d7f47 100644 --- a/lib/htmlpurifier/HTMLPurifier/Token.php +++ b/lib/htmlpurifier/HTMLPurifier/Token.php @@ -11,13 +11,21 @@ */ class HTMLPurifier_Token { var $type; /**< Type of node to bypass is_a(). @public */ + var $line; /**< Line number node was on in source document. Null if unknown. @public */ + + /** + * Lookup array of processing that this token is exempt from. + * Currently, valid values are "ValidateAttributes" and + * "MakeWellFormed_TagClosedError" + */ + var $armor = array(); /** * Copies the tag into a new one (clone substitute). * @return Copied token */ function copy() { - trigger_error('Cannot copy abstract class', E_USER_ERROR); + return unserialize(serialize($this)); } } @@ -58,7 +66,7 @@ class HTMLPurifier_Token_Tag extends HTMLPurifier_Token // abstract * @param $name String name. * @param $attr Associative array of attributes. */ - function HTMLPurifier_Token_Tag($name, $attr = array()) { + function HTMLPurifier_Token_Tag($name, $attr = array(), $line = null) { $this->name = ctype_lower($name) ? $name : strtolower($name); foreach ($attr as $key => $value) { // normalization only necessary when key is not lowercase @@ -73,6 +81,7 @@ class HTMLPurifier_Token_Tag extends HTMLPurifier_Token // abstract } } $this->attr = $attr; + $this->line = $line; } } @@ -82,9 +91,6 @@ class HTMLPurifier_Token_Tag extends HTMLPurifier_Token // abstract class HTMLPurifier_Token_Start extends HTMLPurifier_Token_Tag { var $type = 'start'; - function copy() { - return new HTMLPurifier_Token_Start($this->name, $this->attr); - } } /** @@ -93,9 +99,6 @@ class HTMLPurifier_Token_Start extends HTMLPurifier_Token_Tag class HTMLPurifier_Token_Empty extends HTMLPurifier_Token_Tag { var $type = 'empty'; - function copy() { - return new HTMLPurifier_Token_Empty($this->name, $this->attr); - } } /** @@ -108,9 +111,6 @@ class HTMLPurifier_Token_Empty extends HTMLPurifier_Token_Tag class HTMLPurifier_Token_End extends HTMLPurifier_Token_Tag { var $type = 'end'; - function copy() { - return new HTMLPurifier_Token_End($this->name); - } } /** @@ -135,12 +135,10 @@ class HTMLPurifier_Token_Text extends HTMLPurifier_Token * * @param $data String parsed character data. */ - function HTMLPurifier_Token_Text($data) { + function HTMLPurifier_Token_Text($data, $line = null) { $this->data = $data; $this->is_whitespace = ctype_space($data); - } - function copy() { - return new HTMLPurifier_Token_Text($this->data); + $this->line = $line; } } @@ -157,12 +155,9 @@ class HTMLPurifier_Token_Comment extends HTMLPurifier_Token * * @param $data String comment data. */ - function HTMLPurifier_Token_Comment($data) { + function HTMLPurifier_Token_Comment($data, $line = null) { $this->data = $data; - } - function copy() { - return new HTMLPurifier_Token_Comment($this->data); + $this->line = $line; } } -?> \ No newline at end of file diff --git a/lib/htmlpurifier/HTMLPurifier/TokenFactory.php b/lib/htmlpurifier/HTMLPurifier/TokenFactory.php index 25cc4122a2..d15ee1a9e1 100644 --- a/lib/htmlpurifier/HTMLPurifier/TokenFactory.php +++ b/lib/htmlpurifier/HTMLPurifier/TokenFactory.php @@ -93,4 +93,3 @@ class HTMLPurifier_TokenFactory } -?> \ No newline at end of file diff --git a/lib/htmlpurifier/HTMLPurifier/URI.php b/lib/htmlpurifier/HTMLPurifier/URI.php new file mode 100644 index 0000000000..ed7ffdd6a5 --- /dev/null +++ b/lib/htmlpurifier/HTMLPurifier/URI.php @@ -0,0 +1,119 @@ +scheme = is_null($scheme) || ctype_lower($scheme) ? $scheme : strtolower($scheme); + $this->userinfo = $userinfo; + $this->host = $host; + $this->port = is_null($port) ? $port : (int) $port; + $this->path = $path; + $this->query = $query; + $this->fragment = $fragment; + } + + /** + * Retrieves a scheme object corresponding to the URI's scheme/default + * @param $config Instance of HTMLPurifier_Config + * @param $context Instance of HTMLPurifier_Context + * @return Scheme object appropriate for validating this URI + */ + function getSchemeObj($config, &$context) { + $registry =& HTMLPurifier_URISchemeRegistry::instance(); + if ($this->scheme !== null) { + $scheme_obj = $registry->getScheme($this->scheme, $config, $context); + if (!$scheme_obj) return false; // invalid scheme, clean it out + } else { + // no scheme: retrieve the default one + $def = $config->getDefinition('URI'); + $scheme_obj = $registry->getScheme($def->defaultScheme, $config, $context); + if (!$scheme_obj) { + // something funky happened to the default scheme object + trigger_error( + 'Default scheme object "' . $def->defaultScheme . '" was not readable', + E_USER_WARNING + ); + return false; + } + } + return $scheme_obj; + } + + /** + * Generic validation method applicable for all schemes + * @param $config Instance of HTMLPurifier_Config + * @param $context Instance of HTMLPurifier_Context + * @return True if validation/filtering succeeds, false if failure + */ + function validate($config, &$context) { + + // validate host + if (!is_null($this->host)) { + $host_def = new HTMLPurifier_AttrDef_URI_Host(); + $this->host = $host_def->validate($this->host, $config, $context); + if ($this->host === false) $this->host = null; + } + + // validate port + if (!is_null($this->port)) { + if ($this->port < 1 || $this->port > 65535) $this->port = null; + } + + // query and fragment are quite simple in terms of definition: + // *( pchar / "/" / "?" ), so define their validation routines + // when we start fixing percent encoding + + // path gets to be validated against a hodge-podge of rules depending + // on the status of authority and scheme, but it's not that important, + // esp. since it won't be applicable to everyone + + return true; + + } + + /** + * Convert URI back to string + * @return String URI appropriate for output + */ + function toString() { + // reconstruct authority + $authority = null; + if (!is_null($this->host)) { + $authority = ''; + if(!is_null($this->userinfo)) $authority .= $this->userinfo . '@'; + $authority .= $this->host; + if(!is_null($this->port)) $authority .= ':' . $this->port; + } + + // reconstruct the result + $result = ''; + if (!is_null($this->scheme)) $result .= $this->scheme . ':'; + if (!is_null($authority)) $result .= '//' . $authority; + $result .= $this->path; + if (!is_null($this->query)) $result .= '?' . $this->query; + if (!is_null($this->fragment)) $result .= '#' . $this->fragment; + + return $result; + } + + /** + * Returns a copy of the URI object + */ + function copy() { + return unserialize(serialize($this)); + } + +} + diff --git a/lib/htmlpurifier/HTMLPurifier/URIDefinition.php b/lib/htmlpurifier/HTMLPurifier/URIDefinition.php new file mode 100644 index 0000000000..45c505edb0 --- /dev/null +++ b/lib/htmlpurifier/HTMLPurifier/URIDefinition.php @@ -0,0 +1,145 @@ + + Unique identifier for a custom-built URI definition. If you want + to add custom URIFilters, you must specify this value. + This directive has been available since 2.1.0. +

+'); + +HTMLPurifier_ConfigSchema::define( + 'URI', 'DefinitionRev', 1, 'int', ' +

+ Revision identifier for your custom definition. See + %HTML.DefinitionRev for details. This directive has been available + since 2.1.0. +

+'); + +// informative URI directives + +HTMLPurifier_ConfigSchema::define( + 'URI', 'DefaultScheme', 'http', 'string', ' +

+ Defines through what scheme the output will be served, in order to + select the proper object validator when no scheme information is present. +

+'); + +HTMLPurifier_ConfigSchema::define( + 'URI', 'Host', null, 'string/null', ' +

+ Defines the domain name of the server, so we can determine whether or + an absolute URI is from your website or not. Not strictly necessary, + as users should be using relative URIs to reference resources on your + website. It will, however, let you use absolute URIs to link to + subdomains of the domain you post here: i.e. example.com will allow + sub.example.com. However, higher up domains will still be excluded: + if you set %URI.Host to sub.example.com, example.com will be blocked. + Note: This directive overrides %URI.Base because + a given page may be on a sub-domain, but you wish HTML Purifier to be + more relaxed and allow some of the parent domains too. + This directive has been available since 1.2.0. +

+'); + +HTMLPurifier_ConfigSchema::define( + 'URI', 'Base', null, 'string/null', ' +

+ The base URI is the URI of the document this purified HTML will be + inserted into. This information is important if HTML Purifier needs + to calculate absolute URIs from relative URIs, such as when %URI.MakeAbsolute + is on. You may use a non-absolute URI for this value, but behavior + may vary (%URI.MakeAbsolute deals nicely with both absolute and + relative paths, but forwards-compatibility is not guaranteed). + Warning: If set, the scheme on this URI + overrides the one specified by %URI.DefaultScheme. This directive has + been available since 2.1.0. +

+'); + +class HTMLPurifier_URIDefinition extends HTMLPurifier_Definition +{ + + var $type = 'URI'; + var $filters = array(); + var $registeredFilters = array(); + + /** + * HTMLPurifier_URI object of the base specified at %URI.Base + */ + var $base; + + /** + * String host to consider "home" base + */ + var $host; + + /** + * Name of default scheme based on %URI.DefaultScheme and %URI.Base + */ + var $defaultScheme; + + function HTMLPurifier_URIDefinition() { + $this->registerFilter(new HTMLPurifier_URIFilter_DisableExternal()); + $this->registerFilter(new HTMLPurifier_URIFilter_DisableExternalResources()); + $this->registerFilter(new HTMLPurifier_URIFilter_HostBlacklist()); + $this->registerFilter(new HTMLPurifier_URIFilter_MakeAbsolute()); + } + + function registerFilter($filter) { + $this->registeredFilters[$filter->name] = $filter; + } + + function addFilter($filter, $config) { + $filter->prepare($config); + $this->filters[$filter->name] = $filter; + } + + function doSetup($config) { + $this->setupMemberVariables($config); + $this->setupFilters($config); + } + + function setupFilters($config) { + foreach ($this->registeredFilters as $name => $filter) { + $conf = $config->get('URI', $name); + if ($conf !== false && $conf !== null) { + $this->addFilter($filter, $config); + } + } + unset($this->registeredFilters); + } + + function setupMemberVariables($config) { + $this->host = $config->get('URI', 'Host'); + $base_uri = $config->get('URI', 'Base'); + if (!is_null($base_uri)) { + $parser = new HTMLPurifier_URIParser(); + $this->base = $parser->parse($base_uri); + $this->defaultScheme = $this->base->scheme; + if (is_null($this->host)) $this->host = $this->base->host; + } + if (is_null($this->defaultScheme)) $this->defaultScheme = $config->get('URI', 'DefaultScheme'); + } + + function filter(&$uri, $config, &$context) { + foreach ($this->filters as $name => $x) { + $result = $this->filters[$name]->filter($uri, $config, $context); + if (!$result) return false; + } + return true; + } + +} diff --git a/lib/htmlpurifier/HTMLPurifier/URIFilter.php b/lib/htmlpurifier/HTMLPurifier/URIFilter.php new file mode 100644 index 0000000000..e0066f3bf0 --- /dev/null +++ b/lib/htmlpurifier/HTMLPurifier/URIFilter.php @@ -0,0 +1,24 @@ +get('URI', 'Host'); + if ($our_host !== null) $this->ourHostParts = array_reverse(explode('.', $our_host)); + } + function filter(&$uri, $config, &$context) { + if (is_null($uri->host)) return true; + if ($this->ourHostParts === false) return false; + $host_parts = array_reverse(explode('.', $uri->host)); + foreach ($this->ourHostParts as $i => $x) { + if (!isset($host_parts[$i])) return false; + if ($host_parts[$i] != $this->ourHostParts[$i]) return false; + } + return true; + } +} + diff --git a/lib/htmlpurifier/HTMLPurifier/URIFilter/DisableExternalResources.php b/lib/htmlpurifier/HTMLPurifier/URIFilter/DisableExternalResources.php new file mode 100644 index 0000000000..dc00e74110 --- /dev/null +++ b/lib/htmlpurifier/HTMLPurifier/URIFilter/DisableExternalResources.php @@ -0,0 +1,26 @@ +get('EmbeddedURI', true)) return true; + return parent::filter($uri, $config, $context); + } +} + diff --git a/lib/htmlpurifier/HTMLPurifier/URIFilter/HostBlacklist.php b/lib/htmlpurifier/HTMLPurifier/URIFilter/HostBlacklist.php new file mode 100644 index 0000000000..d3429d5cbf --- /dev/null +++ b/lib/htmlpurifier/HTMLPurifier/URIFilter/HostBlacklist.php @@ -0,0 +1,28 @@ +moo.com will catch moo.com.example.com. '. + 'This directive has been available since 1.3.0.' +); + +class HTMLPurifier_URIFilter_HostBlacklist extends HTMLPurifier_URIFilter +{ + var $name = 'HostBlacklist'; + var $blacklist = array(); + function prepare($config) { + $this->blacklist = $config->get('URI', 'HostBlacklist'); + } + function filter(&$uri, $config, &$context) { + foreach($this->blacklist as $blacklisted_host_fragment) { + if (strpos($uri->host, $blacklisted_host_fragment) !== false) { + return false; + } + } + return true; + } +} diff --git a/lib/htmlpurifier/HTMLPurifier/URIFilter/MakeAbsolute.php b/lib/htmlpurifier/HTMLPurifier/URIFilter/MakeAbsolute.php new file mode 100644 index 0000000000..9935dc6ee9 --- /dev/null +++ b/lib/htmlpurifier/HTMLPurifier/URIFilter/MakeAbsolute.php @@ -0,0 +1,115 @@ + + Converts all URIs into absolute forms. This is useful when the HTML + being filtered assumes a specific base path, but will actually be + viewed in a different context (and setting an alternate base URI is + not possible). %URI.Base must be set for this directive to work. + This directive has been available since 2.1.0. +

+'); + +class HTMLPurifier_URIFilter_MakeAbsolute extends HTMLPurifier_URIFilter +{ + var $name = 'MakeAbsolute'; + var $base; + var $basePathStack = array(); + function prepare($config) { + $def = $config->getDefinition('URI'); + $this->base = $def->base; + if (is_null($this->base)) { + trigger_error('URI.MakeAbsolute is being ignored due to lack of value for URI.Base configuration', E_USER_ERROR); + return; + } + $this->base->fragment = null; // fragment is invalid for base URI + $stack = explode('/', $this->base->path); + array_pop($stack); // discard last segment + $stack = $this->_collapseStack($stack); // do pre-parsing + $this->basePathStack = $stack; + } + function filter(&$uri, $config, &$context) { + if (is_null($this->base)) return true; // abort early + if ( + $uri->path === '' && is_null($uri->scheme) && + is_null($uri->host) && is_null($uri->query) && is_null($uri->fragment) + ) { + // reference to current document + $uri = $this->base->copy(); + return true; + } + if (!is_null($uri->scheme)) { + // absolute URI already: don't change + if (!is_null($uri->host)) return true; + $scheme_obj = $uri->getSchemeObj($config, $context); + if (!$scheme_obj->hierarchical) { + // non-hierarchal URI with explicit scheme, don't change + return true; + } + // special case: had a scheme but always is hierarchical and had no authority + } + if (!is_null($uri->host)) { + // network path, don't bother + return true; + } + if ($uri->path === '') { + $uri->path = $this->base->path; + }elseif ($uri->path[0] !== '/') { + // relative path, needs more complicated processing + $stack = explode('/', $uri->path); + $new_stack = array_merge($this->basePathStack, $stack); + $new_stack = $this->_collapseStack($new_stack); + $uri->path = implode('/', $new_stack); + } + // re-combine + $uri->scheme = $this->base->scheme; + if (is_null($uri->userinfo)) $uri->userinfo = $this->base->userinfo; + if (is_null($uri->host)) $uri->host = $this->base->host; + if (is_null($uri->port)) $uri->port = $this->base->port; + return true; + } + + /** + * Resolve dots and double-dots in a path stack + * @private + */ + function _collapseStack($stack) { + $result = array(); + for ($i = 0; isset($stack[$i]); $i++) { + $is_folder = false; + // absorb an internally duplicated slash + if ($stack[$i] == '' && $i && isset($stack[$i+1])) continue; + if ($stack[$i] == '..') { + if (!empty($result)) { + $segment = array_pop($result); + if ($segment === '' && empty($result)) { + // error case: attempted to back out too far: + // restore the leading slash + $result[] = ''; + } elseif ($segment === '..') { + $result[] = '..'; // cannot remove .. with .. + } + } else { + // relative path, preserve the double-dots + $result[] = '..'; + } + $is_folder = true; + continue; + } + if ($stack[$i] == '.') { + // silently absorb + $is_folder = true; + continue; + } + $result[] = $stack[$i]; + } + if ($is_folder) $result[] = ''; + return $result; + } +} + diff --git a/lib/htmlpurifier/HTMLPurifier/URIParser.php b/lib/htmlpurifier/HTMLPurifier/URIParser.php new file mode 100644 index 0000000000..dff7e28ef8 --- /dev/null +++ b/lib/htmlpurifier/HTMLPurifier/URIParser.php @@ -0,0 +1,62 @@ +\'"]+):)?'. // 2. Scheme + '(//([^/?#<>\'"]*))?'. // 4. Authority + '([^?#<>\'"]*)'. // 5. Path + '(\?([^#<>\'"]*))?'. // 7. Query + '(#([^<>\'"]*))?'. // 8. Fragment + '!'; + + $matches = array(); + $result = preg_match($r_URI, $uri, $matches); + + if (!$result) return false; // *really* invalid URI + + // seperate out parts + $scheme = !empty($matches[1]) ? $matches[2] : null; + $authority = !empty($matches[3]) ? $matches[4] : null; + $path = $matches[5]; // always present, can be empty + $query = !empty($matches[6]) ? $matches[7] : null; + $fragment = !empty($matches[8]) ? $matches[9] : null; + + // further parse authority + if ($authority !== null) { + // ridiculously inefficient: it's a stacked regex! + $HEXDIG = '[A-Fa-f0-9]'; + $unreserved = 'A-Za-z0-9-._~'; // make sure you wrap with [] + $sub_delims = '!$&\'()'; // needs [] + $pct_encoded = "%$HEXDIG$HEXDIG"; + $r_userinfo = "(?:[$unreserved$sub_delims:]|$pct_encoded)*"; + $r_authority = "/^(($r_userinfo)@)?(\[[^\]]+\]|[^:]*)(:(\d*))?/"; + $matches = array(); + preg_match($r_authority, $authority, $matches); + $userinfo = !empty($matches[1]) ? $matches[2] : null; + $host = !empty($matches[3]) ? $matches[3] : ''; + $port = !empty($matches[4]) ? (int) $matches[5] : null; + } else { + $port = $host = $userinfo = null; + } + + return new HTMLPurifier_URI( + $scheme, $userinfo, $host, $port, $path, $query, $fragment); + } + +} + diff --git a/lib/htmlpurifier/HTMLPurifier/URIScheme.php b/lib/htmlpurifier/HTMLPurifier/URIScheme.php index 20a9781b48..41c02f70d2 100644 --- a/lib/htmlpurifier/HTMLPurifier/URIScheme.php +++ b/lib/htmlpurifier/HTMLPurifier/URIScheme.php @@ -19,26 +19,25 @@ class HTMLPurifier_URIScheme */ var $browsable = false; + /** + * Whether or not the URI always uses , resolves edge cases + * with making relative URIs absolute + */ + var $hierarchical = false; + /** * Validates the components of a URI * @note This implementation should be called by children if they define * a default port, as it does port processing. - * @note Fragment is omitted as that is scheme independent - * @param $userinfo User info found before at sign in authority - * @param $host Hostname in authority - * @param $port Port found after colon in authority - * @param $path Path of URI - * @param $query Query of URI, found after question mark + * @param $uri Instance of HTMLPurifier_URI * @param $config HTMLPurifier_Config object * @param $context HTMLPurifier_Context object + * @return Bool success or failure */ - function validateComponents( - $userinfo, $host, $port, $path, $query, $config, &$context - ) { - if ($this->default_port == $port) $port = null; - return array($userinfo, $host, $port, $path, $query); + function validate(&$uri, $config, &$context) { + if ($this->default_port == $uri->port) $uri->port = null; + return true; } } -?> \ No newline at end of file diff --git a/lib/htmlpurifier/HTMLPurifier/URIScheme/ftp.php b/lib/htmlpurifier/HTMLPurifier/URIScheme/ftp.php index dab9c981c4..5555ef33a1 100644 --- a/lib/htmlpurifier/HTMLPurifier/URIScheme/ftp.php +++ b/lib/htmlpurifier/HTMLPurifier/URIScheme/ftp.php @@ -9,37 +9,36 @@ class HTMLPurifier_URIScheme_ftp extends HTMLPurifier_URIScheme { var $default_port = 21; var $browsable = true; // usually + var $hierarchical = true; - function validateComponents( - $userinfo, $host, $port, $path, $query, $config, &$context - ) { - list($userinfo, $host, $port, $path, $query) = - parent::validateComponents( - $userinfo, $host, $port, $path, $query, $config, $context ); - $semicolon_pos = strrpos($path, ';'); // reverse + function validate(&$uri, $config, &$context) { + parent::validate($uri, $config, $context); + $uri->query = null; + + // typecode check + $semicolon_pos = strrpos($uri->path, ';'); // reverse if ($semicolon_pos !== false) { - // typecode check - $type = substr($path, $semicolon_pos + 1); // no semicolon - $path = substr($path, 0, $semicolon_pos); + $type = substr($uri->path, $semicolon_pos + 1); // no semicolon + $uri->path = substr($uri->path, 0, $semicolon_pos); $type_ret = ''; if (strpos($type, '=') !== false) { // figure out whether or not the declaration is correct list($key, $typecode) = explode('=', $type, 2); if ($key !== 'type') { // invalid key, tack it back on encoded - $path .= '%3B' . $type; + $uri->path .= '%3B' . $type; } elseif ($typecode === 'a' || $typecode === 'i' || $typecode === 'd') { $type_ret = ";type=$typecode"; } } else { - $path .= '%3B' . $type; + $uri->path .= '%3B' . $type; } - $path = str_replace(';', '%3B', $path); - $path .= $type_ret; + $uri->path = str_replace(';', '%3B', $uri->path); + $uri->path .= $type_ret; } - return array($userinfo, $host, $port, $path, null); + + return true; } } -?> \ No newline at end of file diff --git a/lib/htmlpurifier/HTMLPurifier/URIScheme/http.php b/lib/htmlpurifier/HTMLPurifier/URIScheme/http.php index 54b250da52..7abc6680fd 100644 --- a/lib/htmlpurifier/HTMLPurifier/URIScheme/http.php +++ b/lib/htmlpurifier/HTMLPurifier/URIScheme/http.php @@ -9,16 +9,13 @@ class HTMLPurifier_URIScheme_http extends HTMLPurifier_URIScheme { var $default_port = 80; var $browsable = true; + var $hierarchical = true; - function validateComponents( - $userinfo, $host, $port, $path, $query, $config, &$context - ) { - list($userinfo, $host, $port, $path, $query) = - parent::validateComponents( - $userinfo, $host, $port, $path, $query, $config, $context ); - return array(null, $host, $port, $path, $query); + function validate(&$uri, $config, &$context) { + parent::validate($uri, $config, $context); + $uri->userinfo = null; + return true; } } -?> \ No newline at end of file diff --git a/lib/htmlpurifier/HTMLPurifier/URIScheme/https.php b/lib/htmlpurifier/HTMLPurifier/URIScheme/https.php index 7f896592de..bbd69b9c42 100644 --- a/lib/htmlpurifier/HTMLPurifier/URIScheme/https.php +++ b/lib/htmlpurifier/HTMLPurifier/URIScheme/https.php @@ -11,4 +11,3 @@ class HTMLPurifier_URIScheme_https extends HTMLPurifier_URIScheme_http { } -?> \ No newline at end of file diff --git a/lib/htmlpurifier/HTMLPurifier/URIScheme/mailto.php b/lib/htmlpurifier/HTMLPurifier/URIScheme/mailto.php index 2292072eea..f6acc6af61 100644 --- a/lib/htmlpurifier/HTMLPurifier/URIScheme/mailto.php +++ b/lib/htmlpurifier/HTMLPurifier/URIScheme/mailto.php @@ -15,16 +15,14 @@ class HTMLPurifier_URIScheme_mailto extends HTMLPurifier_URIScheme { var $browsable = false; - function validateComponents( - $userinfo, $host, $port, $path, $query, $config, &$context - ) { - list($userinfo, $host, $port, $path, $query) = - parent::validateComponents( - $userinfo, $host, $port, $path, $query, $config, $context ); + function validate(&$uri, $config, &$context) { + parent::validate($uri, $config, $context); + $uri->userinfo = null; + $uri->host = null; + $uri->port = null; // we need to validate path against RFC 2368's addr-spec - return array(null, null, null, $path, $query); + return true; } } -?> \ No newline at end of file diff --git a/lib/htmlpurifier/HTMLPurifier/URIScheme/news.php b/lib/htmlpurifier/HTMLPurifier/URIScheme/news.php index c9d1c2b0c7..87bda63c7f 100644 --- a/lib/htmlpurifier/HTMLPurifier/URIScheme/news.php +++ b/lib/htmlpurifier/HTMLPurifier/URIScheme/news.php @@ -9,16 +9,15 @@ class HTMLPurifier_URIScheme_news extends HTMLPurifier_URIScheme { var $browsable = false; - function validateComponents( - $userinfo, $host, $port, $path, $query, $config, &$context - ) { - list($userinfo, $host, $port, $path, $query) = - parent::validateComponents( - $userinfo, $host, $port, $path, $query, $config, $context ); + function validate(&$uri, $config, &$context) { + parent::validate($uri, $config, $context); + $uri->userinfo = null; + $uri->host = null; + $uri->port = null; + $uri->query = null; // typecode check needed on path - return array(null, null, null, $path, null); + return true; } } -?> \ No newline at end of file diff --git a/lib/htmlpurifier/HTMLPurifier/URIScheme/nntp.php b/lib/htmlpurifier/HTMLPurifier/URIScheme/nntp.php index 49fca4c3bb..caa85b260f 100644 --- a/lib/htmlpurifier/HTMLPurifier/URIScheme/nntp.php +++ b/lib/htmlpurifier/HTMLPurifier/URIScheme/nntp.php @@ -10,15 +10,12 @@ class HTMLPurifier_URIScheme_nntp extends HTMLPurifier_URIScheme { var $default_port = 119; var $browsable = false; - function validateComponents( - $userinfo, $host, $port, $path, $query, $config, &$context - ) { - list($userinfo, $host, $port, $path, $query) = - parent::validateComponents( - $userinfo, $host, $port, $path, $query, $config, $context ); - return array(null, $host, $port, $path, null); + function validate(&$uri, $config, &$context) { + parent::validate($uri, $config, $context); + $uri->userinfo = null; + $uri->query = null; + return true; } } -?> \ No newline at end of file diff --git a/lib/htmlpurifier/HTMLPurifier/URISchemeRegistry.php b/lib/htmlpurifier/HTMLPurifier/URISchemeRegistry.php index d840068a3f..8cddb7f934 100644 --- a/lib/htmlpurifier/HTMLPurifier/URISchemeRegistry.php +++ b/lib/htmlpurifier/HTMLPurifier/URISchemeRegistry.php @@ -79,12 +79,14 @@ class HTMLPurifier_URISchemeRegistry } if (isset($this->schemes[$scheme])) return $this->schemes[$scheme]; - if (empty($this->_dir)) $this->_dir = dirname(__FILE__) . '/URIScheme/'; + if (empty($this->_dir)) $this->_dir = HTMLPURIFIER_PREFIX . '/HTMLPurifier/URIScheme/'; if (!isset($allowed_schemes[$scheme])) return $null; - @include_once $this->_dir . $scheme . '.php'; + // this bit of reflection is not very efficient, and a bit + // hacky too $class = 'HTMLPurifier_URIScheme_' . $scheme; + if (!class_exists($class)) include_once $this->_dir . $scheme . '.php'; if (!class_exists($class)) return $null; $this->schemes[$scheme] = new $class(); return $this->schemes[$scheme]; @@ -101,4 +103,4 @@ class HTMLPurifier_URISchemeRegistry } -?> + diff --git a/lib/htmlpurifier/readme_moodle.txt b/lib/htmlpurifier/readme_moodle.txt index 34af69a1c1..f1c12eb84a 100644 --- a/lib/htmlpurifier/readme_moodle.txt +++ b/lib/htmlpurifier/readme_moodle.txt @@ -1,7 +1,7 @@ -Description of HTML Purifier v1.6.1 library import into Moodle +Description of HTML Purifier v2.1.1 Lite library import into Moodle Changes: - * Text.php - added nolink, tex and algebra tags + * Text.php - added nolink, tex, lang and algebra tags skodak diff --git a/lib/weblib.php b/lib/weblib.php index 824e9ea550..952d0f5cfc 100644 --- a/lib/weblib.php +++ b/lib/weblib.php @@ -1865,10 +1865,13 @@ function purify_html($text) { static $purifier = false; if (!$purifier) { + make_upload_directory('cache/htmlpurifier', false); require_once $CFG->libdir.'/htmlpurifier/HTMLPurifier.auto.php'; $config = HTMLPurifier_Config::createDefault(); $config->set('Core', 'AcceptFullDocuments', false); - //$config->set('HTML', 'Strict', true); + $config->set('Core', 'Encoding', 'UTF-8'); + $config->set('HTML', 'Doctype', 'XHTML 1.0 Transitional'); + $config->set('Cache', 'SerializerPath', $CFG->dataroot.'/cache/htmlpurifier'); $config->set('URI', 'AllowedSchemes', array('http'=>1, 'https'=>1, 'ftp'=>1, 'irc'=>1, 'nntp'=>1, 'news'=>1, 'rtsp'=>1, 'teamspeak'=>1, 'gopher'=>1, 'mms'=>1)); $purifier = new HTMLPurifier($config); }