From 14913ca4d56c7f511c8729c3dd10032ca63b5157 Mon Sep 17 00:00:00 2001 From: skodak Date: Mon, 19 May 2008 06:24:33 +0000 Subject: [PATCH] MDL-14894 html purifier updated, merged from MOODLE_19_STABLE --- lib/htmlpurifier/HTMLPurifier.php | 9 ++- lib/htmlpurifier/HTMLPurifier/AttrDef.php | 8 +++ lib/htmlpurifier/HTMLPurifier/AttrDef/CSS.php | 15 +++- .../HTMLPurifier/AttrDef/CSS/Background.php | 3 + .../HTMLPurifier/AttrDef/CSS/Border.php | 2 +- .../HTMLPurifier/AttrDef/CSS/Color.php | 22 +++--- .../AttrDef/CSS/DenyElementDecorator.php | 26 +++++++ lib/htmlpurifier/HTMLPurifier/AttrDef/URI.php | 12 +--- .../HTMLPurifier/AttrDef/URI/Host.php | 20 ++++-- .../HTMLPurifier/CSSDefinition.php | 6 +- lib/htmlpurifier/HTMLPurifier/Config.php | 2 +- .../HTMLPurifier/DefinitionCache.php | 3 + .../HTMLPurifier/DefinitionCacheFactory.php | 29 ++++++-- lib/htmlpurifier/HTMLPurifier/ElementDef.php | 2 +- lib/htmlpurifier/HTMLPurifier/Encoder.php | 16 ++++- .../HTMLPurifier/HTMLDefinition.php | 8 ++- .../HTMLPurifier/IDAccumulator.php | 6 +- lib/htmlpurifier/HTMLPurifier/Language.php | 7 ++ .../Language/messages/en-x-testmini.php | 11 +++ .../HTMLPurifier/LanguageFactory.php | 47 +++++++------ .../HTMLPurifier/Lexer/DOMLex.php | 21 +++++- .../HTMLPurifier/Lexer/DirectLex.php | 9 +-- .../HTMLPurifier/PercentEncoder.php | 65 +++++++++++++++-- .../HTMLPurifier/Strategy/MakeWellFormed.php | 5 +- lib/htmlpurifier/HTMLPurifier/URI.php | 70 ++++++++++++++++--- lib/htmlpurifier/HTMLPurifier/URIParser.php | 41 ++++++----- lib/htmlpurifier/readme_moodle.txt | 2 +- 27 files changed, 355 insertions(+), 112 deletions(-) create mode 100644 lib/htmlpurifier/HTMLPurifier/AttrDef/CSS/DenyElementDecorator.php create mode 100644 lib/htmlpurifier/HTMLPurifier/Language/messages/en-x-testmini.php diff --git a/lib/htmlpurifier/HTMLPurifier.php b/lib/htmlpurifier/HTMLPurifier.php index e9dfe5f404..a7bba317e5 100644 --- a/lib/htmlpurifier/HTMLPurifier.php +++ b/lib/htmlpurifier/HTMLPurifier.php @@ -22,7 +22,7 @@ */ /* - HTML Purifier 2.1.3 - Standards Compliant HTML Filtering + HTML Purifier 2.1.4 - Standards Compliant HTML Filtering Copyright (C) 2006-2007 Edward Z. Yang This library is free software; you can redistribute it and/or @@ -83,7 +83,7 @@ since 2.0.0. class HTMLPurifier { - var $version = '2.1.3'; + var $version = '2.1.4'; var $config; var $filters = array(); @@ -213,7 +213,7 @@ class HTMLPurifier * @param $prototype Optional prototype HTMLPurifier instance to * overload singleton with. */ - function &getInstance($prototype = null) { + function &instance($prototype = null) { static $htmlpurifier; if (!$htmlpurifier || $prototype) { if (is_a($prototype, 'HTMLPurifier')) { @@ -227,6 +227,9 @@ class HTMLPurifier return $htmlpurifier; } + function &getInstance($prototype = null) { + return HTMLPurifier::instance($prototype); + } } diff --git a/lib/htmlpurifier/HTMLPurifier/AttrDef.php b/lib/htmlpurifier/HTMLPurifier/AttrDef.php index 882b626043..e94ee713d2 100644 --- a/lib/htmlpurifier/HTMLPurifier/AttrDef.php +++ b/lib/htmlpurifier/HTMLPurifier/AttrDef.php @@ -82,5 +82,13 @@ class HTMLPurifier_AttrDef return $this; } + /** + * Removes spaces from rgb(0, 0, 0) so that shorthand CSS properties work + * properly. THIS IS A HACK! + */ + function mungeRgb($string) { + return preg_replace('/rgb\((\d+)\s*,\s*(\d+)\s*,\s*(\d+)\)/', 'rgb(\1,\2,\3)', $string); + } + } diff --git a/lib/htmlpurifier/HTMLPurifier/AttrDef/CSS.php b/lib/htmlpurifier/HTMLPurifier/AttrDef/CSS.php index d0f49bc4ad..71523be1f1 100644 --- a/lib/htmlpurifier/HTMLPurifier/AttrDef/CSS.php +++ b/lib/htmlpurifier/HTMLPurifier/AttrDef/CSS.php @@ -38,7 +38,20 @@ class HTMLPurifier_AttrDef_CSS extends HTMLPurifier_AttrDef list($property, $value) = explode(':', $declaration, 2); $property = trim($property); $value = trim($value); - if (!isset($definition->info[$property])) continue; + $ok = false; + do { + if (isset($definition->info[$property])) { + $ok = true; + break; + } + if (ctype_lower($property)) break; + $property = strtolower($property); + if (isset($definition->info[$property])) { + $ok = true; + break; + } + } while(0); + if (!$ok) continue; // inefficient call, since the validator will do this again if (strtolower(trim($value)) !== 'inherit') { // inherit works for everything (but only on the base property) diff --git a/lib/htmlpurifier/HTMLPurifier/AttrDef/CSS/Background.php b/lib/htmlpurifier/HTMLPurifier/AttrDef/CSS/Background.php index b82e98e581..a5c1046a82 100644 --- a/lib/htmlpurifier/HTMLPurifier/AttrDef/CSS/Background.php +++ b/lib/htmlpurifier/HTMLPurifier/AttrDef/CSS/Background.php @@ -31,6 +31,9 @@ class HTMLPurifier_AttrDef_CSS_Background extends HTMLPurifier_AttrDef $string = $this->parseCDATA($string); if ($string === '') return false; + // munge rgb() decl if necessary + $string = $this->mungeRgb($string); + // assumes URI doesn't have spaces in it $bits = explode(' ', strtolower($string)); // bits to process diff --git a/lib/htmlpurifier/HTMLPurifier/AttrDef/CSS/Border.php b/lib/htmlpurifier/HTMLPurifier/AttrDef/CSS/Border.php index f6d4d684e3..4eb3e25abd 100644 --- a/lib/htmlpurifier/HTMLPurifier/AttrDef/CSS/Border.php +++ b/lib/htmlpurifier/HTMLPurifier/AttrDef/CSS/Border.php @@ -22,7 +22,7 @@ class HTMLPurifier_AttrDef_CSS_Border extends HTMLPurifier_AttrDef function validate($string, $config, &$context) { $string = $this->parseCDATA($string); - // we specifically will not support rgb() syntax with spaces + $string = $this->mungeRgb($string); $bits = explode(' ', $string); $done = array(); // segments we've finished $ret = ''; // return value diff --git a/lib/htmlpurifier/HTMLPurifier/AttrDef/CSS/Color.php b/lib/htmlpurifier/HTMLPurifier/AttrDef/CSS/Color.php index 30b38f9293..a6711f7175 100644 --- a/lib/htmlpurifier/HTMLPurifier/AttrDef/CSS/Color.php +++ b/lib/htmlpurifier/HTMLPurifier/AttrDef/CSS/Color.php @@ -39,20 +39,13 @@ class HTMLPurifier_AttrDef_CSS_Color extends HTMLPurifier_AttrDef if ($colors === null) $colors = $config->get('Core', 'ColorKeywords'); $color = trim($color); - if (!$color) return false; + if ($color === '') return false; $lower = strtolower($color); if (isset($colors[$lower])) return $colors[$lower]; - if ($color[0] === '#') { - // hexadecimal handling - $hex = substr($color, 1); - $length = strlen($hex); - if ($length !== 3 && $length !== 6) return false; - if (!ctype_xdigit($hex)) return false; - } else { + if (strpos($color, 'rgb(') !== false) { // rgb literal handling - if (strpos($color, 'rgb(')) return false; $length = strlen($color); if (strpos($color, ')') !== $length - 1) return false; $triad = substr($color, 4, $length - 4 - 1); @@ -90,6 +83,17 @@ class HTMLPurifier_AttrDef_CSS_Color extends HTMLPurifier_AttrDef } $new_triad = implode(',', $new_parts); $color = "rgb($new_triad)"; + } else { + // hexadecimal handling + if ($color[0] === '#') { + $hex = substr($color, 1); + } else { + $hex = $color; + $color = '#' . $color; + } + $length = strlen($hex); + if ($length !== 3 && $length !== 6) return false; + if (!ctype_xdigit($hex)) return false; } return $color; diff --git a/lib/htmlpurifier/HTMLPurifier/AttrDef/CSS/DenyElementDecorator.php b/lib/htmlpurifier/HTMLPurifier/AttrDef/CSS/DenyElementDecorator.php new file mode 100644 index 0000000000..b0a6db9dee --- /dev/null +++ b/lib/htmlpurifier/HTMLPurifier/AttrDef/CSS/DenyElementDecorator.php @@ -0,0 +1,26 @@ +def =& $def; + $this->element = $element; + } + /** + * Checks if CurrentToken is set and equal to $this->element + */ + function validate($string, $config, $context) { + $token = $context->get('CurrentToken', true); + if ($token && $token->name == $this->element) return false; + return $this->def->validate($string, $config, $context); + } +} diff --git a/lib/htmlpurifier/HTMLPurifier/AttrDef/URI.php b/lib/htmlpurifier/HTMLPurifier/AttrDef/URI.php index 0e9a5f4739..52b4193b98 100644 --- a/lib/htmlpurifier/HTMLPurifier/AttrDef/URI.php +++ b/lib/htmlpurifier/HTMLPurifier/AttrDef/URI.php @@ -68,7 +68,7 @@ HTMLPurifier_ConfigSchema::define( class HTMLPurifier_AttrDef_URI extends HTMLPurifier_AttrDef { - var $parser, $percentEncoder; + var $parser; var $embedsResource; /** @@ -76,7 +76,6 @@ class HTMLPurifier_AttrDef_URI extends HTMLPurifier_AttrDef */ function HTMLPurifier_AttrDef_URI($embeds_resource = false) { $this->parser = new HTMLPurifier_URIParser(); - $this->percentEncoder = new HTMLPurifier_PercentEncoder(); $this->embedsResource = (bool) $embeds_resource; } @@ -84,9 +83,7 @@ class HTMLPurifier_AttrDef_URI extends HTMLPurifier_AttrDef if ($config->get('URI', 'Disable')) return false; - // initial operations $uri = $this->parseCDATA($uri); - $uri = $this->percentEncoder->normalize($uri); // parse the URI $uri = $this->parser->parse($uri); @@ -122,13 +119,6 @@ class HTMLPurifier_AttrDef_URI extends HTMLPurifier_AttrDef $context->destroy('EmbeddedURI'); if (!$ok) return false; - // munge scheme off if necessary (this must be last) - if (!is_null($uri->scheme) && is_null($uri->host)) { - if ($uri_def->defaultScheme == $uri->scheme) { - $uri->scheme = null; - } - } - // back to string $result = $uri->toString(); diff --git a/lib/htmlpurifier/HTMLPurifier/AttrDef/URI/Host.php b/lib/htmlpurifier/HTMLPurifier/AttrDef/URI/Host.php index ac729ebd93..4812ad1d3d 100644 --- a/lib/htmlpurifier/HTMLPurifier/AttrDef/URI/Host.php +++ b/lib/htmlpurifier/HTMLPurifier/AttrDef/URI/Host.php @@ -40,11 +40,23 @@ class HTMLPurifier_AttrDef_URI_Host extends HTMLPurifier_AttrDef $ipv4 = $this->ipv4->validate($string, $config, $context); if ($ipv4 !== false) return $ipv4; - // validate a domain name here, do filtering, etc etc etc + // A regular domain name. - // We could use this, but it would break I18N domain names - //$match = preg_match('/^[a-z0-9][\w\-\.]*[a-z0-9]$/i', $string); - //if (!$match) return false; + // This breaks I18N domain names, but we don't have proper IRI support, + // so force users to insert Punycode. If there's complaining we'll + // try to fix things into an international friendly form. + + // The productions describing this are: + $a = '[a-z]'; // alpha + $an = '[a-z0-9]'; // alphanum + $and = '[a-z0-9-]'; // alphanum | "-" + // domainlabel = alphanum | alphanum *( alphanum | "-" ) alphanum + $domainlabel = "$an($and*$an)?"; + // toplabel = alpha | alpha *( alphanum | "-" ) alphanum + $toplabel = "$a($and*$an)?"; + // hostname = *( domainlabel "." ) toplabel [ "." ] + $match = preg_match("/^($domainlabel\.)*$toplabel\.?$/i", $string); + if (!$match) return false; return $string; } diff --git a/lib/htmlpurifier/HTMLPurifier/CSSDefinition.php b/lib/htmlpurifier/HTMLPurifier/CSSDefinition.php index 2acf7cf83b..2fc73b905d 100644 --- a/lib/htmlpurifier/HTMLPurifier/CSSDefinition.php +++ b/lib/htmlpurifier/HTMLPurifier/CSSDefinition.php @@ -7,6 +7,7 @@ require_once 'HTMLPurifier/AttrDef/CSS/BackgroundPosition.php'; require_once 'HTMLPurifier/AttrDef/CSS/Border.php'; require_once 'HTMLPurifier/AttrDef/CSS/Color.php'; require_once 'HTMLPurifier/AttrDef/CSS/Composite.php'; +require_once 'HTMLPurifier/AttrDef/CSS/DenyElementDecorator.php'; require_once 'HTMLPurifier/AttrDef/CSS/Font.php'; require_once 'HTMLPurifier/AttrDef/CSS/FontFamily.php'; require_once 'HTMLPurifier/AttrDef/CSS/Length.php'; @@ -176,12 +177,13 @@ class HTMLPurifier_CSSDefinition extends HTMLPurifier_Definition )); $this->info['width'] = - $this->info['height'] = + $this->info['height'] = + new HTMLPurifier_AttrDef_CSS_DenyElementDecorator( new HTMLPurifier_AttrDef_CSS_Composite(array( new HTMLPurifier_AttrDef_CSS_Length(true), new HTMLPurifier_AttrDef_CSS_Percentage(true), new HTMLPurifier_AttrDef_Enum(array('auto')) - )); + )), 'img'); $this->info['text-decoration'] = new HTMLPurifier_AttrDef_CSS_TextDecoration(); diff --git a/lib/htmlpurifier/HTMLPurifier/Config.php b/lib/htmlpurifier/HTMLPurifier/Config.php index 203542f0aa..1c043aeb71 100644 --- a/lib/htmlpurifier/HTMLPurifier/Config.php +++ b/lib/htmlpurifier/HTMLPurifier/Config.php @@ -42,7 +42,7 @@ class HTMLPurifier_Config /** * HTML Purifier's version */ - var $version = '2.1.3'; + var $version = '2.1.4'; /** * Two-level associative array of configuration directives diff --git a/lib/htmlpurifier/HTMLPurifier/DefinitionCache.php b/lib/htmlpurifier/HTMLPurifier/DefinitionCache.php index d4c9d239f2..5b14fdfe4d 100644 --- a/lib/htmlpurifier/HTMLPurifier/DefinitionCache.php +++ b/lib/htmlpurifier/HTMLPurifier/DefinitionCache.php @@ -120,6 +120,9 @@ class HTMLPurifier_DefinitionCache /** * Clears all expired (older version or revision) objects from cache + * @note Be carefuly implementing this method as flush. Flush must + * not interfere with other Definition types, and cleanup() + * should not be repeatedly called by userland code. */ function cleanup($config) { trigger_error('Cannot call abstract method', E_USER_ERROR); diff --git a/lib/htmlpurifier/HTMLPurifier/DefinitionCacheFactory.php b/lib/htmlpurifier/HTMLPurifier/DefinitionCacheFactory.php index acc661828a..dead92a32e 100644 --- a/lib/htmlpurifier/HTMLPurifier/DefinitionCacheFactory.php +++ b/lib/htmlpurifier/HTMLPurifier/DefinitionCacheFactory.php @@ -1,6 +1,7 @@ array()); + var $implementations = array(); var $decorators = array(); /** @@ -51,14 +49,21 @@ class HTMLPurifier_DefinitionCacheFactory return $instance; } + /** + * Registers a new definition cache object + * @param $short Short name of cache object, for reference + * @param $long Full class name of cache object, for construction + */ + function register($short, $long) { + $this->implementations[$short] = $long; + } + /** * Factory method that creates a cache object based on configuration * @param $name Name of definitions handled by cache * @param $config Instance of HTMLPurifier_Config */ function &create($type, $config) { - // only one implementation as for right now, $config will - // be used to determine implementation $method = $config->get('Cache', 'DefinitionImpl'); if ($method === null) { $null = new HTMLPurifier_DefinitionCache_Null($type); @@ -67,7 +72,17 @@ class HTMLPurifier_DefinitionCacheFactory if (!empty($this->caches[$method][$type])) { return $this->caches[$method][$type]; } - $cache = new HTMLPurifier_DefinitionCache_Serializer($type); + if ( + isset($this->implementations[$method]) && + class_exists($class = $this->implementations[$method]) + ) { + $cache = new $class($type); + } else { + if ($method != 'Serializer') { + trigger_error("Unrecognized DefinitionCache $method, using Serializer instead", E_USER_WARNING); + } + $cache = new HTMLPurifier_DefinitionCache_Serializer($type); + } foreach ($this->decorators as $decorator) { $new_cache = $decorator->decorate($cache); // prevent infinite recursion in PHP 4 diff --git a/lib/htmlpurifier/HTMLPurifier/ElementDef.php b/lib/htmlpurifier/HTMLPurifier/ElementDef.php index 21e1a5a764..b6439d1a5b 100644 --- a/lib/htmlpurifier/HTMLPurifier/ElementDef.php +++ b/lib/htmlpurifier/HTMLPurifier/ElementDef.php @@ -82,7 +82,7 @@ class HTMLPurifier_ElementDef /** * List of the names of required attributes this element has. Dynamically - * populated. + * populated by HTMLPurifier_HTMLDefinition::getElement * @public */ var $required_attr = array(); diff --git a/lib/htmlpurifier/HTMLPurifier/Encoder.php b/lib/htmlpurifier/HTMLPurifier/Encoder.php index e5adf83f59..31ebb785ff 100644 --- a/lib/htmlpurifier/HTMLPurifier/Encoder.php +++ b/lib/htmlpurifier/HTMLPurifier/Encoder.php @@ -62,6 +62,11 @@ class HTMLPurifier_Encoder trigger_error('Cannot instantiate encoder, call methods statically', E_USER_ERROR); } + /** + * Error-handler that mutes errors, alternative to shut-up operator. + */ + function muteErrorHandler() {} + /** * Cleans a UTF-8 string for well-formedness and SGML validity * @@ -106,9 +111,18 @@ class HTMLPurifier_Encoder static $iconv = null; if ($iconv === null) $iconv = function_exists('iconv'); + // UTF-8 validity is checked since PHP 4.3.5 + // This is an optimization: if the string is already valid UTF-8, no + // need to do iconv/php stuff. 99% of the time, this will be the case. + if (preg_match('/^.{1}/us', $str)) { + return strtr($str, $non_sgml_chars); + } + if ($iconv && !$force_php) { // do the shortcut way - $str = @iconv('UTF-8', 'UTF-8//IGNORE', $str); + set_error_handler(array('HTMLPurifier_Encoder', 'muteErrorHandler')); + $str = iconv('UTF-8', 'UTF-8//IGNORE', $str); + restore_error_handler(); return strtr($str, $non_sgml_chars); } diff --git a/lib/htmlpurifier/HTMLPurifier/HTMLDefinition.php b/lib/htmlpurifier/HTMLPurifier/HTMLDefinition.php index e13e0c62b0..51367ca403 100644 --- a/lib/htmlpurifier/HTMLPurifier/HTMLDefinition.php +++ b/lib/htmlpurifier/HTMLPurifier/HTMLDefinition.php @@ -222,6 +222,8 @@ class HTMLPurifier_HTMLDefinition extends HTMLPurifier_Definition /** * Adds a custom attribute to a pre-existing element + * @note This is strictly convenience, and does not have a corresponding + * method in HTMLPurifier_HTMLModule * @param $element_name String element name to add attribute to * @param $attr_name String name of attribute * @param $def Attribute definition, can be string or object, see @@ -229,7 +231,11 @@ class HTMLPurifier_HTMLDefinition extends HTMLPurifier_Definition */ function addAttribute($element_name, $attr_name, $def) { $module =& $this->getAnonymousModule(); - $element =& $module->addBlankElement($element_name); + if (!isset($module->info[$element_name])) { + $element =& $module->addBlankElement($element_name); + } else { + $element =& $module->info[$element_name]; + } $element->attr[$attr_name] = $def; } diff --git a/lib/htmlpurifier/HTMLPurifier/IDAccumulator.php b/lib/htmlpurifier/HTMLPurifier/IDAccumulator.php index 60715afc1e..e746e565bb 100644 --- a/lib/htmlpurifier/HTMLPurifier/IDAccumulator.php +++ b/lib/htmlpurifier/HTMLPurifier/IDAccumulator.php @@ -28,9 +28,9 @@ class HTMLPurifier_IDAccumulator * @static */ function build($config, &$context) { - $id_accumulator = new HTMLPurifier_IDAccumulator(); - $id_accumulator->load($config->get('Attr', 'IDBlacklist')); - return $id_accumulator; + $acc = new HTMLPurifier_IDAccumulator(); + $acc->load($config->get('Attr', 'IDBlacklist')); + return $acc; } /** diff --git a/lib/htmlpurifier/HTMLPurifier/Language.php b/lib/htmlpurifier/HTMLPurifier/Language.php index c9a3c20fe2..c0833b7f79 100644 --- a/lib/htmlpurifier/HTMLPurifier/Language.php +++ b/lib/htmlpurifier/HTMLPurifier/Language.php @@ -25,6 +25,13 @@ class HTMLPurifier_Language */ var $errorNames = array(); + /** + * True if no message file was found for this language, so English + * is being used instead. Check this if you'd like to notify the + * user that they've used a non-supported language. + */ + var $error = false; + /** * Has the language object been loaded yet? * @private diff --git a/lib/htmlpurifier/HTMLPurifier/Language/messages/en-x-testmini.php b/lib/htmlpurifier/HTMLPurifier/Language/messages/en-x-testmini.php new file mode 100644 index 0000000000..4b16cd20a9 --- /dev/null +++ b/lib/htmlpurifier/HTMLPurifier/Language/messages/en-x-testmini.php @@ -0,0 +1,11 @@ + 'HTML Purifier XNone' +); + diff --git a/lib/htmlpurifier/HTMLPurifier/LanguageFactory.php b/lib/htmlpurifier/HTMLPurifier/LanguageFactory.php index 9d26cd7037..715c3fee9a 100644 --- a/lib/htmlpurifier/HTMLPurifier/LanguageFactory.php +++ b/lib/htmlpurifier/HTMLPurifier/LanguageFactory.php @@ -16,6 +16,7 @@ This directive has been available since 2.0.0. * caching and fallbacks. * @note Thanks to MediaWiki for the general logic, although this version * has been entirely rewritten + * @todo Serialized cache for languages */ class HTMLPurifier_LanguageFactory { @@ -89,40 +90,42 @@ class HTMLPurifier_LanguageFactory * Creates a language object, handles class fallbacks * @param $config Instance of HTMLPurifier_Config * @param $context Instance of HTMLPurifier_Context + * @param $code Code to override configuration with. Private parameter. */ - function create($config, &$context) { + function create($config, &$context, $code = false) { // validate language code - $code = $this->validator->validate( - $config->get('Core', 'Language'), $config, $context - ); + if ($code === false) { + $code = $this->validator->validate( + $config->get('Core', 'Language'), $config, $context + ); + } else { + $code = $this->validator->validate($code, $config, $context); + } if ($code === false) $code = 'en'; // malformed code becomes English $pcode = str_replace('-', '_', $code); // make valid PHP classname static $depth = 0; // recursion protection if ($code == 'en') { - $class = 'HTMLPurifier_Language'; - $file = $this->dir . '/Language.php'; + $lang = new HTMLPurifier_Language($config, $context); } else { $class = 'HTMLPurifier_Language_' . $pcode; $file = $this->dir . '/Language/classes/' . $code . '.php'; - // PHP5/APC deps bug workaround can go here - // you can bypass the conditional include by loading the - // file yourself - if (file_exists($file) && !class_exists($class)) { - include_once $file; - } - } - - if (!class_exists($class)) { - // go fallback - $fallback = HTMLPurifier_LanguageFactory::getFallbackFor($code); - $depth++; - $lang = HTMLPurifier_LanguageFactory::factory( $fallback ); - $depth--; - } else { - $lang = new $class($config, $context); + if (file_exists($file)) { + include $file; + $lang = new $class($config, $context); + } else { + // Go fallback + $raw_fallback = $this->getFallbackFor($code); + $fallback = $raw_fallback ? $raw_fallback : 'en'; + $depth++; + $lang = $this->create($config, $context, $fallback); + if (!$raw_fallback) { + $lang->error = true; + } + $depth--; + } } $lang->code = $code; diff --git a/lib/htmlpurifier/HTMLPurifier/Lexer/DOMLex.php b/lib/htmlpurifier/HTMLPurifier/Lexer/DOMLex.php index 56bd4a4828..9aef335ba8 100644 --- a/lib/htmlpurifier/HTMLPurifier/Lexer/DOMLex.php +++ b/lib/htmlpurifier/HTMLPurifier/Lexer/DOMLex.php @@ -90,10 +90,27 @@ class HTMLPurifier_Lexer_DOMLex extends HTMLPurifier_Lexer $tokens[] = $this->factory->createText($node->data); return; } elseif ($node->nodeType === XML_CDATA_SECTION_NODE) { - // undo DOM's special treatment of