From e0ac8448c79df889c91e2bb454fd1a9dafbcdd53 Mon Sep 17 00:00:00 2001
From: skodak
HTML Purifier - lib/htmlpurifier
++diff --git a/lib/htmlpurifier/CREDITS b/lib/htmlpurifier/CREDITS new file mode 100644 index 0000000000..c3e7bb8e2f --- /dev/null +++ b/lib/htmlpurifier/CREDITS @@ -0,0 +1,7 @@ + +CREDITS + +Almost everything written by Edward Z. Yang (Ambush Commander). Lots of thanks +to the DevNetwork Community for their help (see docs/ref-devnetwork.html for +more details), Feyd especially (namely IPv6 and optimization). Thanks to RSnake +for letting me package his fantastic XSS cheatsheet for a smoketest. diff --git a/lib/htmlpurifier/HTMLPurifier.auto.php b/lib/htmlpurifier/HTMLPurifier.auto.php new file mode 100644 index 0000000000..a66fd2e25d --- /dev/null +++ b/lib/htmlpurifier/HTMLPurifier.auto.php @@ -0,0 +1,10 @@ + \ No newline at end of file diff --git a/lib/htmlpurifier/HTMLPurifier.func.php b/lib/htmlpurifier/HTMLPurifier.func.php new file mode 100644 index 0000000000..876ad7b298 --- /dev/null +++ b/lib/htmlpurifier/HTMLPurifier.func.php @@ -0,0 +1,21 @@ +purify($html, $config); +} + +?> \ No newline at end of file diff --git a/lib/htmlpurifier/HTMLPurifier.php b/lib/htmlpurifier/HTMLPurifier.php new file mode 100644 index 0000000000..5a0ce99d0b --- /dev/null +++ b/lib/htmlpurifier/HTMLPurifier.php @@ -0,0 +1,170 @@ +config = HTMLPurifier_Config::create($config); + + $this->lexer = HTMLPurifier_Lexer::create(); + $this->strategy = new HTMLPurifier_Strategy_Core(); + $this->generator = new HTMLPurifier_Generator(); + + } + + /** + * Adds a filter to process the output. First come first serve + * @param $filter HTMLPurifier_Filter object + */ + function addFilter($filter) { + $this->filters[] = $filter; + } + + /** + * Filters an HTML snippet/document to be XSS-free and standards-compliant. + * + * @param $html String of HTML to purify + * @param $config HTMLPurifier_Config object for this operation, if omitted, + * defaults to the config object specified during this + * object's construction. The parameter can also be any type + * that HTMLPurifier_Config::create() supports. + * @return Purified HTML + */ + function purify($html, $config = null) { + + $config = $config ? HTMLPurifier_Config::create($config) : $this->config; + + $context = new HTMLPurifier_Context(); + $html = HTMLPurifier_Encoder::convertToUTF8($html, $config, $context); + + for ($i = 0, $size = count($this->filters); $i < $size; $i++) { + $html = $this->filters[$i]->preFilter($html, $config, $context); + } + + // purified HTML + $html = + $this->generator->generateFromTokens( + // list of tokens + $this->strategy->execute( + // list of un-purified tokens + $this->lexer->tokenizeHTML( + // un-purified HTML + $html, $config, $context + ), + $config, $context + ), + $config, $context + ); + + for ($i = $size - 1; $i >= 0; $i--) { + $html = $this->filters[$i]->postFilter($html, $config, $context); + } + + $html = HTMLPurifier_Encoder::convertFromUTF8($html, $config, $context); + $this->context =& $context; + return $html; + } + + /** + * Filters an array of HTML snippets + * @param $config Optional HTMLPurifier_Config object for this operation. + * See HTMLPurifier::purify() for more details. + * @return Array of purified HTML + */ + function purifyArray($array_of_html, $config = null) { + $context_array = array(); + foreach ($array_of_html as $key => $html) { + $array_of_html[$key] = $this->purify($html, $config); + $context_array[$key] = $this->context; + } + $this->context = $context_array; + return $array_of_html; + } + + +} + +?> \ No newline at end of file diff --git a/lib/htmlpurifier/HTMLPurifier/AttrCollections.php b/lib/htmlpurifier/HTMLPurifier/AttrCollections.php new file mode 100644 index 0000000000..8318abb15c --- /dev/null +++ b/lib/htmlpurifier/HTMLPurifier/AttrCollections.php @@ -0,0 +1,100 @@ +info; + // load extensions from the modules + foreach ($modules as $module) { + foreach ($module->attr_collections as $coll_i => $coll) { + foreach ($coll as $attr_i => $attr) { + if ($attr_i === 0 && isset($info[$coll_i][$attr_i])) { + // merge in includes + $info[$coll_i][$attr_i] = array_merge( + $info[$coll_i][$attr_i], $attr); + continue; + } + $info[$coll_i][$attr_i] = $attr; + } + } + } + // perform internal expansions and inclusions + foreach ($info as $name => $attr) { + // merge attribute collections that include others + $this->performInclusions($info[$name]); + // replace string identifiers with actual attribute objects + $this->expandIdentifiers($info[$name], $attr_types); + } + } + + /** + * Takes a reference to an attribute associative array and performs + * all inclusions specified by the zero index. + * @param &$attr Reference to attribute array + */ + function performInclusions(&$attr) { + if (!isset($attr[0])) return; + $merge = $attr[0]; + // loop through all the inclusions + for ($i = 0; isset($merge[$i]); $i++) { + // foreach attribute of the inclusion, copy it over + foreach ($this->info[$merge[$i]] as $key => $value) { + if (isset($attr[$key])) continue; // also catches more inclusions + $attr[$key] = $value; + } + if (isset($info[$merge[$i]][0])) { + // recursion + $merge = array_merge($merge, isset($info[$merge[$i]][0])); + } + } + unset($attr[0]); + } + + /** + * Expands all string identifiers in an attribute array by replacing + * them with the appropriate values inside HTMLPurifier_AttrTypes + * @param &$attr Reference to attribute array + * @param $attr_types HTMLPurifier_AttrTypes instance + */ + function expandIdentifiers(&$attr, $attr_types) { + foreach ($attr as $def_i => $def) { + if ($def_i === 0) continue; + if (!is_string($def)) continue; + if ($def === false) { + unset($attr[$def_i]); + continue; + } + if (isset($attr_types->info[$def])) { + $attr[$def_i] = $attr_types->info[$def]; + } else { + trigger_error('Attempted to reference undefined attribute type', E_USER_ERROR); + unset($attr[$def_i]); + } + } + } + +} + +?> \ No newline at end of file diff --git a/lib/htmlpurifier/HTMLPurifier/AttrDef.php b/lib/htmlpurifier/HTMLPurifier/AttrDef.php new file mode 100644 index 0000000000..334a7acedd --- /dev/null +++ b/lib/htmlpurifier/HTMLPurifier/AttrDef.php @@ -0,0 +1,67 @@ + by removing + * leading and trailing whitespace, ignoring line feeds, and replacing + * carriage returns and tabs with spaces. While most useful for HTML + * attributes specified as CDATA, it can also be applied to most CSS + * values. + * + * @note This method is not entirely standards compliant, as trim() removes + * more types of whitespace than specified in the spec. In practice, + * this is rarely a problem, as those extra characters usually have + * already been removed by HTMLPurifier_Encoder. + * + * @warning This processing is inconsistent with XML's whitespace handling + * as specified by section 3.3.3 and referenced XHTML 1.0 section + * 4.7. Compliant processing requires all line breaks normalized + * to "\n", so the fix is not as simple as fixing it in this + * function. Trim and whitespace collapsing are supposed to only + * occur in NMTOKENs. However, note that we are NOT necessarily + * parsing XML, thus, this behavior may still be correct. + * + * @public + */ + function parseCDATA($string) { + $string = trim($string); + $string = str_replace("\n", '', $string); + $string = str_replace(array("\r", "\t"), ' ', $string); + return $string; + } +} + +?> \ No newline at end of file diff --git a/lib/htmlpurifier/HTMLPurifier/AttrDef/CSS.php b/lib/htmlpurifier/HTMLPurifier/AttrDef/CSS.php new file mode 100644 index 0000000000..220ec0d0d1 --- /dev/null +++ b/lib/htmlpurifier/HTMLPurifier/AttrDef/CSS.php @@ -0,0 +1,69 @@ +parseCDATA($css); + + $definition = $config->getCSSDefinition(); + + // we're going to break the spec and explode by semicolons. + // This is because semicolon rarely appears in escaped form + // Doing this is generally flaky but fast + // IT MIGHT APPEAR IN URIs, see HTMLPurifier_AttrDef_CSSURI + // for details + + $declarations = explode(';', $css); + $propvalues = array(); + + foreach ($declarations as $declaration) { + if (!$declaration) continue; + if (!strpos($declaration, ':')) continue; + list($property, $value) = explode(':', $declaration, 2); + $property = trim($property); + $value = trim($value); + if (!isset($definition->info[$property])) continue; + // inefficient call, since the validator will do this again + if (strtolower(trim($value)) !== 'inherit') { + // inherit works for everything (but only on the base property) + $result = $definition->info[$property]->validate( + $value, $config, $context ); + } else { + $result = 'inherit'; + } + if ($result === false) continue; + $propvalues[$property] = $result; + } + + // procedure does not write the new CSS simultaneously, so it's + // slightly inefficient, but it's the only way of getting rid of + // duplicates. Perhaps config to optimize it, but not now. + + $new_declarations = ''; + foreach ($propvalues as $prop => $value) { + $new_declarations .= "$prop:$value;"; + } + + return $new_declarations ? $new_declarations : false; + + } + +} + +?> \ No newline at end of file diff --git a/lib/htmlpurifier/HTMLPurifier/AttrDef/CSS/Background.php b/lib/htmlpurifier/HTMLPurifier/AttrDef/CSS/Background.php new file mode 100644 index 0000000000..42d8bcf0e6 --- /dev/null +++ b/lib/htmlpurifier/HTMLPurifier/AttrDef/CSS/Background.php @@ -0,0 +1,87 @@ +getCSSDefinition(); + $this->info['background-color'] = $def->info['background-color']; + $this->info['background-image'] = $def->info['background-image']; + $this->info['background-repeat'] = $def->info['background-repeat']; + $this->info['background-attachment'] = $def->info['background-attachment']; + $this->info['background-position'] = $def->info['background-position']; + } + + function validate($string, $config, &$context) { + + // regular pre-processing + $string = $this->parseCDATA($string); + if ($string === '') return false; + + // assumes URI doesn't have spaces in it + $bits = explode(' ', strtolower($string)); // bits to process + + $caught = array(); + $caught['color'] = false; + $caught['image'] = false; + $caught['repeat'] = false; + $caught['attachment'] = false; + $caught['position'] = false; + + $i = 0; // number of catches + $none = false; + + foreach ($bits as $bit) { + if ($bit === '') continue; + foreach ($caught as $key => $status) { + if ($key != 'position') { + if ($status !== false) continue; + $r = $this->info['background-' . $key]->validate($bit, $config, $context); + } else { + $r = $bit; + } + if ($r === false) continue; + if ($key == 'position') { + if ($caught[$key] === false) $caught[$key] = ''; + $caught[$key] .= $r . ' '; + } else { + $caught[$key] = $r; + } + $i++; + break; + } + } + + if (!$i) return false; + if ($caught['position'] !== false) { + $caught['position'] = $this->info['background-position']-> + validate($caught['position'], $config, $context); + } + + $ret = array(); + foreach ($caught as $value) { + if ($value === false) continue; + $ret[] = $value; + } + + if (empty($ret)) return false; + return implode(' ', $ret); + + } + +} + +?> \ No newline at end of file diff --git a/lib/htmlpurifier/HTMLPurifier/AttrDef/CSS/BackgroundPosition.php b/lib/htmlpurifier/HTMLPurifier/AttrDef/CSS/BackgroundPosition.php new file mode 100644 index 0000000000..77a3ddd6e3 --- /dev/null +++ b/lib/htmlpurifier/HTMLPurifier/AttrDef/CSS/BackgroundPosition.php @@ -0,0 +1,130 @@ + |Standards-compliant HTML filter library.
+
+
+ CVS version: 1.60
+ Copyright (C) 2006 Edward Z. Yang
+ License: GNU LGPL
+ URL: http://hp.jpsband.org/
+
This can greatly '. + 'improve readability for editors who are hand-editing the HTML, but is '. + 'by no means necessary as HTML Purifier has already fixed all major '. + 'errors the HTML may have had. Tidy is a non-default extension, and this directive '. + 'will silently fail if Tidy is not available.
If you are looking to make '. + 'the overall look of your page\'s source better, I recommend running Tidy '. + 'on the entire page rather than just user-content (after all, the '. + 'indentation relative to the containing blocks will be incorrect).
This '. + 'directive was available since 1.1.1.
' +); + +/** + * Generates HTML from tokens. + */ +class HTMLPurifier_Generator +{ + + /** + * Bool cache of %Core.CleanUTF8DuringGeneration + * @private + */ + var $_clean_utf8 = false; + + /** + * Bool cache of %Core.XHTML + * @private + */ + var $_xhtml = true; + + /** + * Generates HTML from an array of tokens. + * @param $tokens Array of HTMLPurifier_Token + * @param $config HTMLPurifier_Config object + * @return Generated HTML + */ + function generateFromTokens($tokens, $config, &$context) { + $html = ''; + if (!$config) $config = HTMLPurifier_Config::createDefault(); + $this->_clean_utf8 = $config->get('Core', 'CleanUTF8DuringGeneration'); + $this->_xhtml = $config->get('Core', 'XHTML'); + if (!$tokens) return ''; + foreach ($tokens as $token) { + $html .= $this->generateFromToken($token); + } + if ($config->get('Core', 'TidyFormat') && extension_loaded('tidy')) { + + $tidy_options = array( + 'indent'=> true, + 'output-xhtml' => $this->_xhtml, + 'show-body-only' => true, + 'indent-spaces' => 2, + 'wrap' => 68, + ); + if (version_compare(PHP_VERSION, '5', '<')) { + tidy_set_encoding('utf8'); + foreach ($tidy_options as $key => $value) { + tidy_setopt($key, $value); + } + tidy_parse_string($html); + tidy_clean_repair(); + $html = tidy_get_output(); + } else { + $tidy = new Tidy; + $tidy->parseString($html, $tidy_options, 'utf8'); + $tidy->cleanRepair(); + $html = (string) $tidy; + } + } + return $html; + } + + /** + * Generates HTML from a single token. + * @param $token HTMLPurifier_Token object. + * @return Generated HTML + */ + function generateFromToken($token) { + if (!isset($token->type)) return ''; + if ($token->type == 'start') { + $attr = $this->generateAttributes($token->attr); + return '<' . $token->name . ($attr ? ' ' : '') . $attr . '>'; + + } elseif ($token->type == 'end') { + return '' . $token->name . '>'; + + } elseif ($token->type == 'empty') { + $attr = $this->generateAttributes($token->attr); + return '<' . $token->name . ($attr ? ' ' : '') . $attr . + ( $this->_xhtml ? ' /': '' ) + . '>'; + + } elseif ($token->type == 'text') { + return $this->escape($token->data); + + } else { + return ''; + + } + } + + /** + * Generates attribute declarations from attribute array. + * @param $assoc_array_of_attributes Attribute array + * @return Generate HTML fragment for insertion. + */ + function generateAttributes($assoc_array_of_attributes) { + $html = ''; + foreach ($assoc_array_of_attributes as $key => $value) { + if (!$this->_xhtml) { + // remove namespaced attributes + if (strpos($key, ':') !== false) continue; + // also needed: check for attribute minimization + } + $html .= $key.'="'.$this->escape($value).'" '; + } + return rtrim($html); + } + + /** + * Escapes raw text data. + * @param $string String data to escape for HTML. + * @return String escaped data. + */ + function escape($string) { + if ($this->_clean_utf8) $string = HTMLPurifier_Lexer::cleanUTF8($string); + return htmlspecialchars($string, ENT_COMPAT, 'UTF-8'); + } + +} + +?> \ No newline at end of file diff --git a/lib/htmlpurifier/HTMLPurifier/HTMLDefinition.php b/lib/htmlpurifier/HTMLPurifier/HTMLDefinition.php new file mode 100644 index 0000000000..3af445ceb0 --- /dev/null +++ b/lib/htmlpurifier/HTMLPurifier/HTMLDefinition.php @@ -0,0 +1,281 @@ +<blockquote>Foo</blockquote> '. + 'would become<blockquote><p>Foo</p></blockquote>
. The '.
+ '<p>
tags can be replaced '.
+ 'with whatever you desire, as long as it is a block level element. '.
+ 'This directive has been available since 1.3.0.'
+);
+
+HTMLPurifier_ConfigSchema::define(
+ 'HTML', 'Parent', 'div', 'string',
+ 'String name of element that HTML fragment passed to library will be '.
+ 'inserted in. An interesting variation would be using span as the '.
+ 'parent element, meaning that only inline tags would be allowed. '.
+ 'This directive has been available since 1.3.0.'
+);
+
+HTMLPurifier_ConfigSchema::define(
+ 'HTML', 'AllowedElements', null, 'lookup/null',
+ 'If HTML Purifier\'s tag set is unsatisfactory for your needs, you '.
+ 'can overload it with your own list of tags to allow. Note that this '.
+ 'method is subtractive: it does its job by taking away from HTML Purifier '.
+ 'usual feature set, so you cannot add a tag that HTML Purifier never '.
+ 'supported in the first place (like embed, form or head). If you change this, you '.
+ 'probably also want to change %HTML.AllowedAttributes. '.
+ 'Warning: If another directive conflicts with the '.
+ 'elements here, that directive will win and override. '.
+ 'This directive has been available since 1.3.0.'
+);
+
+HTMLPurifier_ConfigSchema::define(
+ 'HTML', 'AllowedAttributes', null, 'lookup/null',
+ 'IF HTML Purifier\'s attribute set is unsatisfactory, overload it! '.
+ 'The syntax is \'tag.attr\' or \'*.attr\' for the global attributes '.
+ '(style, id, class, dir, lang, xml:lang).'.
+ 'Warning: If another directive conflicts with the '.
+ 'elements here, that directive will win and override. For '.
+ 'example, %HTML.EnableAttrID will take precedence over *.id in this '.
+ 'directive. You must set that directive to true before you can use '.
+ 'IDs at all. This directive has been available since 1.3.0.'
+);
+
+/**
+ * Definition of the purified HTML that describes allowed children,
+ * attributes, and many other things.
+ *
+ * Conventions:
+ *
+ * All member variables that are prefixed with info
+ * (including the main $info array) are used by HTML Purifier internals
+ * and should not be directly edited when customizing the HTMLDefinition.
+ * They can usually be set via configuration directives or custom
+ * modules.
+ *
+ * On the other hand, member variables without the info prefix are used
+ * internally by the HTMLDefinition and MUST NOT be used by other HTML
+ * Purifier internals. Many of them, however, are public, and may be
+ * edited by userspace code to tweak the behavior of HTMLDefinition.
+ *
+ * HTMLPurifier_Printer_HTMLDefinition is a notable exception to this
+ * rule: in the interest of comprehensiveness, it will sniff everything.
+ */
+class HTMLPurifier_HTMLDefinition
+{
+
+ /** FULLY-PUBLIC VARIABLES */
+
+ /**
+ * Associative array of element names to HTMLPurifier_ElementDef
+ * @public
+ */
+ var $info = array();
+
+ /**
+ * Associative array of global attribute name to attribute definition.
+ * @public
+ */
+ var $info_global_attr = array();
+
+ /**
+ * String name of parent element HTML will be going into.
+ * @public
+ */
+ var $info_parent = 'div';
+
+ /**
+ * Definition for parent element, allows parent element to be a
+ * tag that's not allowed inside the HTML fragment.
+ * @public
+ */
+ var $info_parent_def;
+
+ /**
+ * String name of element used to wrap inline elements in block context
+ * @note This is rarely used except for BLOCKQUOTEs in strict mode
+ * @public
+ */
+ var $info_block_wrapper = 'p';
+
+ /**
+ * Associative array of deprecated tag name to HTMLPurifier_TagTransform
+ * @public
+ */
+ var $info_tag_transform = array();
+
+ /**
+ * Indexed list of HTMLPurifier_AttrTransform to be performed before validation.
+ * @public
+ */
+ var $info_attr_transform_pre = array();
+
+ /**
+ * Indexed list of HTMLPurifier_AttrTransform to be performed after validation.
+ * @public
+ */
+ var $info_attr_transform_post = array();
+
+ /**
+ * Nested lookup array of content set name (Block, Inline) to
+ * element name to whether or not it belongs in that content set.
+ * @public
+ */
+ var $info_content_sets = array();
+
+
+
+ /** PUBLIC BUT INTERNAL VARIABLES */
+
+ var $setup = false; /**< Has setup() been called yet? */
+ var $config; /**< Temporary instance of HTMLPurifier_Config */
+
+ var $manager; /**< Instance of HTMLPurifier_HTMLModuleManager */
+
+ /**
+ * Performs low-cost, preliminary initialization.
+ * @param $config Instance of HTMLPurifier_Config
+ */
+ function HTMLPurifier_HTMLDefinition(&$config) {
+ $this->config =& $config;
+ $this->manager = new HTMLPurifier_HTMLModuleManager();
+ }
+
+ /**
+ * Processes internals into form usable by HTMLPurifier internals.
+ * Modifying the definition after calling this function should not
+ * be done.
+ */
+ function setup() {
+
+ // multiple call guard
+ if ($this->setup) {return;} else {$this->setup = true;}
+
+ $this->processModules();
+ $this->setupConfigStuff();
+
+ unset($this->config);
+ unset($this->manager);
+
+ }
+
+ /**
+ * Extract out the information from the manager
+ */
+ function processModules() {
+
+ $this->manager->setup($this->config);
+
+ foreach ($this->manager->activeModules as $module) {
+ foreach($module->info_tag_transform as $k => $v) $this->info_tag_transform[$k] = $v;
+ foreach($module->info_attr_transform_pre as $k => $v) $this->info_attr_transform_pre[$k] = $v;
+ foreach($module->info_attr_transform_post as $k => $v) $this->info_attr_transform_post[$k]= $v;
+ }
+
+ $this->info = $this->manager->getElements($this->config);
+ $this->info_content_sets = $this->manager->contentSets->lookup;
+
+ }
+
+ /**
+ * Sets up stuff based on config. We need a better way of doing this.
+ */
+ function setupConfigStuff() {
+
+ $block_wrapper = $this->config->get('HTML', 'BlockWrapper');
+ if (isset($this->info_content_sets['Block'][$block_wrapper])) {
+ $this->info_block_wrapper = $block_wrapper;
+ } else {
+ trigger_error('Cannot use non-block element as block wrapper.',
+ E_USER_ERROR);
+ }
+
+ $parent = $this->config->get('HTML', 'Parent');
+ $def = $this->manager->getElement($parent, $this->config);
+ if ($def) {
+ $this->info_parent = $parent;
+ $this->info_parent_def = $def;
+ } else {
+ trigger_error('Cannot use unrecognized element as parent.',
+ E_USER_ERROR);
+ $this->info_parent_def = $this->manager->getElement(
+ $this->info_parent, $this->config);
+ }
+
+ // support template text
+ $support = "(for information on implementing this, see the ".
+ "support forums) ";
+
+ // setup allowed elements, SubtractiveWhitelist module
+ $allowed_elements = $this->config->get('HTML', 'AllowedElements');
+ if (is_array($allowed_elements)) {
+ foreach ($this->info as $name => $d) {
+ if(!isset($allowed_elements[$name])) unset($this->info[$name]);
+ unset($allowed_elements[$name]);
+ }
+ // emit errors
+ foreach ($allowed_elements as $element => $d) {
+ trigger_error("Element '$element' is not supported $support", E_USER_WARNING);
+ }
+ }
+
+ $allowed_attributes = $this->config->get('HTML', 'AllowedAttributes');
+ $allowed_attributes_mutable = $allowed_attributes; // by copy!
+ if (is_array($allowed_attributes)) {
+ foreach ($this->info_global_attr as $attr_key => $info) {
+ if (!isset($allowed_attributes["*.$attr_key"])) {
+ unset($this->info_global_attr[$attr_key]);
+ } elseif (isset($allowed_attributes_mutable["*.$attr_key"])) {
+ unset($allowed_attributes_mutable["*.$attr_key"]);
+ }
+ }
+ foreach ($this->info as $tag => $info) {
+ foreach ($info->attr as $attr => $attr_info) {
+ if (!isset($allowed_attributes["$tag.$attr"]) &&
+ !isset($allowed_attributes["*.$attr"])) {
+ unset($this->info[$tag]->attr[$attr]);
+ } else {
+ if (isset($allowed_attributes_mutable["$tag.$attr"])) {
+ unset($allowed_attributes_mutable["$tag.$attr"]);
+ } elseif (isset($allowed_attributes_mutable["*.$attr"])) {
+ unset($allowed_attributes_mutable["*.$attr"]);
+ }
+ }
+ }
+ }
+ // emit errors
+ foreach ($allowed_attributes_mutable as $elattr => $d) {
+ list($element, $attribute) = explode('.', $elattr);
+ if ($element == '*') {
+ trigger_error("Global attribute '$attribute' is not ".
+ "supported in any elements $support",
+ E_USER_WARNING);
+ } else {
+ trigger_error("Attribute '$attribute' in element '$element' not supported $support",
+ E_USER_WARNING);
+ }
+ }
+ }
+
+ }
+
+
+}
+
+?>
diff --git a/lib/htmlpurifier/HTMLPurifier/HTMLModule.php b/lib/htmlpurifier/HTMLPurifier/HTMLModule.php
new file mode 100644
index 0000000000..930b605d11
--- /dev/null
+++ b/lib/htmlpurifier/HTMLPurifier/HTMLModule.php
@@ -0,0 +1,125 @@
+info, since the object's data is only info,
+ * with extra behavior associated with it.
+ * @public
+ */
+ var $attr_collections = array();
+
+ /**
+ * Associative array of deprecated tag name to HTMLPurifier_TagTransform
+ * @public
+ */
+ var $info_tag_transform = array();
+
+ /**
+ * List of HTMLPurifier_AttrTransform to be performed before validation.
+ * @public
+ */
+ var $info_attr_transform_pre = array();
+
+ /**
+ * List of HTMLPurifier_AttrTransform to be performed after validation.
+ * @public
+ */
+ var $info_attr_transform_post = array();
+
+ /**
+ * Boolean flag that indicates whether or not getChildDef is implemented.
+ * For optimization reasons: may save a call to a function. Be sure
+ * to set it if you do implement getChildDef(), otherwise it will have
+ * no effect!
+ * @public
+ */
+ var $defines_child_def = false;
+
+ /**
+ * Retrieves a proper HTMLPurifier_ChildDef subclass based on
+ * content_model and content_model_type member variables of
+ * the HTMLPurifier_ElementDef class. There is a similar function
+ * in HTMLPurifier_HTMLDefinition.
+ * @param $def HTMLPurifier_ElementDef instance
+ * @return HTMLPurifier_ChildDef subclass
+ * @public
+ */
+ function getChildDef($def) {return false;}
+
+ /**
+ * Hook method that lets module perform arbitrary operations on
+ * HTMLPurifier_HTMLDefinition before the module gets processed.
+ * @param $definition Reference to HTMLDefinition being setup
+ */
+ function preProcess(&$definition) {}
+
+ /**
+ * Hook method that lets module perform arbitrary operations
+ * on HTMLPurifier_HTMLDefinition after the module gets processed.
+ * @param $definition Reference to HTMLDefinition being setup
+ */
+ function postProcess(&$definition) {}
+
+ /**
+ * Hook method that is called when a module gets registered to
+ * the definition.
+ * @param $definition Reference to HTMLDefinition being setup
+ */
+ function setup(&$definition) {}
+
+}
+
+?>
\ No newline at end of file
diff --git a/lib/htmlpurifier/HTMLPurifier/HTMLModule/Bdo.php b/lib/htmlpurifier/HTMLPurifier/HTMLModule/Bdo.php
new file mode 100644
index 0000000000..17e5e987fd
--- /dev/null
+++ b/lib/htmlpurifier/HTMLPurifier/HTMLModule/Bdo.php
@@ -0,0 +1,43 @@
+ 'bdo');
+ var $attr_collections = array(
+ 'I18N' => array('dir' => false)
+ );
+
+ function HTMLPurifier_HTMLModule_Bdo() {
+ $dir = new HTMLPurifier_AttrDef_Enum(array('ltr','rtl'), false);
+ $this->attr_collections['I18N']['dir'] = $dir;
+ $this->info['bdo'] = new HTMLPurifier_ElementDef();
+ $this->info['bdo']->attr = array(
+ 0 => array('Core', 'Lang'),
+ 'dir' => $dir, // required
+ // The Abstract Module specification has the attribute
+ // inclusions wrong for bdo: bdo allows
+ // xml:lang too (and we'll toss in lang for good measure,
+ // though it is not allowed for XHTML 1.1, this will
+ // be managed with a global attribute transform)
+ );
+ $this->info['bdo']->content_model = '#PCDATA | Inline';
+ $this->info['bdo']->content_model_type = 'optional';
+ // provides fallback behavior if dir's missing (dir is required)
+ $this->info['bdo']->attr_transform_post['required-dir'] =
+ new HTMLPurifier_AttrTransform_BdoDir();
+ }
+
+}
+
+?>
\ No newline at end of file
diff --git a/lib/htmlpurifier/HTMLPurifier/HTMLModule/CommonAttributes.php b/lib/htmlpurifier/HTMLPurifier/HTMLModule/CommonAttributes.php
new file mode 100644
index 0000000000..8f17c2f0a3
--- /dev/null
+++ b/lib/htmlpurifier/HTMLPurifier/HTMLModule/CommonAttributes.php
@@ -0,0 +1,31 @@
+ array(
+ 0 => array('Style'),
+ // 'xml:space' => false,
+ 'class' => 'NMTOKENS',
+ 'id' => 'ID',
+ 'title' => 'CDATA',
+ ),
+ 'Lang' => array(
+ 'xml:lang' => false, // see constructor
+ ),
+ 'I18N' => array(
+ 0 => array('Lang'), // proprietary, for xml:lang/lang
+ ),
+ 'Common' => array(
+ 0 => array('Core', 'I18N')
+ )
+ );
+
+ function HTMLPurifier_HTMLModule_CommonAttributes() {
+ $this->attr_collections['Lang']['xml:lang'] = new HTMLPurifier_AttrDef_Lang();
+ }
+}
+
+?>
\ No newline at end of file
diff --git a/lib/htmlpurifier/HTMLPurifier/HTMLModule/Edit.php b/lib/htmlpurifier/HTMLPurifier/HTMLModule/Edit.php
new file mode 100644
index 0000000000..6a415906e6
--- /dev/null
+++ b/lib/htmlpurifier/HTMLPurifier/HTMLModule/Edit.php
@@ -0,0 +1,46 @@
+ 'del | ins');
+
+ function HTMLPurifier_HTMLModule_Edit() {
+ foreach ($this->elements as $element) {
+ $this->info[$element] = new HTMLPurifier_ElementDef();
+ $this->info[$element]->attr = array(
+ 0 => array('Common'),
+ 'cite' => 'URI',
+ // 'datetime' => 'Datetime' // Datetime not implemented
+ );
+ // Inline context ! Block context (exclamation mark is
+ // separator, see getChildDef for parsing)
+ $this->info[$element]->content_model =
+ '#PCDATA | Inline ! #PCDATA | Flow';
+ // HTML 4.01 specifies that ins/del must not contain block
+ // elements when used in an inline context, chameleon is
+ // a complicated workaround to acheive this effect
+ $this->info[$element]->content_model_type = 'chameleon';
+ }
+ }
+
+ var $defines_child_def = true;
+ function getChildDef($def) {
+ if ($def->content_model_type != 'chameleon') return false;
+ $value = explode('!', $def->content_model);
+ return new HTMLPurifier_ChildDef_Chameleon($value[0], $value[1]);
+ }
+
+}
+
+?>
\ No newline at end of file
diff --git a/lib/htmlpurifier/HTMLPurifier/HTMLModule/Hypertext.php b/lib/htmlpurifier/HTMLPurifier/HTMLModule/Hypertext.php
new file mode 100644
index 0000000000..e285e8ba1f
--- /dev/null
+++ b/lib/htmlpurifier/HTMLPurifier/HTMLModule/Hypertext.php
@@ -0,0 +1,37 @@
+ 'a');
+
+ function HTMLPurifier_HTMLModule_Hypertext() {
+ $this->info['a'] = new HTMLPurifier_ElementDef();
+ $this->info['a']->attr = array(
+ 0 => array('Common'),
+ // 'accesskey' => 'Character',
+ // 'charset' => 'Charset',
+ 'href' => 'URI',
+ //'hreflang' => 'LanguageCode',
+ 'rel' => new HTMLPurifier_AttrDef_HTML_LinkTypes('rel'),
+ 'rev' => new HTMLPurifier_AttrDef_HTML_LinkTypes('rev'),
+ //'tabindex' => 'Number',
+ //'type' => 'ContentType',
+ );
+ $this->info['a']->content_model = '#PCDATA | Inline';
+ $this->info['a']->content_model_type = 'optional';
+ $this->info['a']->excludes = array('a' => true);
+ }
+
+}
+
+?>
\ No newline at end of file
diff --git a/lib/htmlpurifier/HTMLPurifier/HTMLModule/Image.php b/lib/htmlpurifier/HTMLPurifier/HTMLModule/Image.php
new file mode 100644
index 0000000000..3852836de7
--- /dev/null
+++ b/lib/htmlpurifier/HTMLPurifier/HTMLModule/Image.php
@@ -0,0 +1,38 @@
+ 'img');
+
+ function HTMLPurifier_HTMLModule_Image() {
+ $this->info['img'] = new HTMLPurifier_ElementDef();
+ $this->info['img']->attr = array(
+ 0 => array('Common'),
+ 'alt' => 'Text',
+ 'height' => 'Length',
+ 'longdesc' => 'URI',
+ 'src' => new HTMLPurifier_AttrDef_URI(true), // embedded
+ 'width' => 'Length'
+ );
+ $this->info['img']->content_model_type = 'empty';
+ $this->info['img']->attr_transform_post[] =
+ new HTMLPurifier_AttrTransform_ImgRequired();
+ }
+
+}
+
+?>
\ No newline at end of file
diff --git a/lib/htmlpurifier/HTMLPurifier/HTMLModule/Legacy.php b/lib/htmlpurifier/HTMLPurifier/HTMLModule/Legacy.php
new file mode 100644
index 0000000000..a0613a2f7e
--- /dev/null
+++ b/lib/htmlpurifier/HTMLPurifier/HTMLModule/Legacy.php
@@ -0,0 +1,60 @@
+elements as $name) {
+ $this->info[$name] = new HTMLPurifier_ElementDef();
+ // for u, s, strike, as more elements get added, add
+ // conditionals as necessary
+ $this->info[$name]->content_model = 'Inline | #PCDATA';
+ $this->info[$name]->content_model_type = 'optional';
+ $this->info[$name]->attr[0] = array('Common');
+ }
+
+ // setup modifications to old elements
+ foreach ($this->non_standalone_elements as $name) {
+ $this->info[$name] = new HTMLPurifier_ElementDef();
+ $this->info[$name]->standalone = false;
+ }
+
+ $this->info['li']->attr['value'] = new HTMLPurifier_AttrDef_Integer();
+ $this->info['ol']->attr['start'] = new HTMLPurifier_AttrDef_Integer();
+
+ $this->info['address']->content_model = 'Inline | #PCDATA | p';
+ $this->info['address']->content_model_type = 'optional';
+ $this->info['address']->child = false;
+
+ $this->info['blockquote']->content_model = 'Flow | #PCDATA';
+ $this->info['blockquote']->content_model_type = 'optional';
+ $this->info['blockquote']->child = false;
+
+ }
+
+}
+
+?>
\ No newline at end of file
diff --git a/lib/htmlpurifier/HTMLPurifier/HTMLModule/List.php b/lib/htmlpurifier/HTMLPurifier/HTMLModule/List.php
new file mode 100644
index 0000000000..c74982df4e
--- /dev/null
+++ b/lib/htmlpurifier/HTMLPurifier/HTMLModule/List.php
@@ -0,0 +1,46 @@
+ 'dl | ol | ul', 'Flow' => 'List');
+
+ function HTMLPurifier_HTMLModule_List() {
+ foreach ($this->elements as $element) {
+ $this->info[$element] = new HTMLPurifier_ElementDef();
+ $this->info[$element]->attr = array(0 => array('Common'));
+ if ($element == 'li' || $element == 'dd') {
+ $this->info[$element]->content_model = '#PCDATA | Flow';
+ $this->info[$element]->content_model_type = 'optional';
+ } elseif ($element == 'ol' || $element == 'ul') {
+ $this->info[$element]->content_model = 'li';
+ $this->info[$element]->content_model_type = 'required';
+ }
+ }
+ $this->info['dt']->content_model = '#PCDATA | Inline';
+ $this->info['dt']->content_model_type = 'optional';
+ $this->info['dl']->content_model = 'dt | dd';
+ $this->info['dl']->content_model_type = 'required';
+ // this could be a LOT more robust
+ $this->info['li']->auto_close = array('li' => true);
+ }
+
+}
+
+?>
\ No newline at end of file
diff --git a/lib/htmlpurifier/HTMLPurifier/HTMLModule/Presentation.php b/lib/htmlpurifier/HTMLPurifier/HTMLModule/Presentation.php
new file mode 100644
index 0000000000..42d9c11e46
--- /dev/null
+++ b/lib/htmlpurifier/HTMLPurifier/HTMLModule/Presentation.php
@@ -0,0 +1,41 @@
+ 'hr',
+ 'Inline' => 'b | big | i | small | sub | sup | tt'
+ );
+
+ function HTMLPurifier_HTMLModule_Presentation() {
+ foreach ($this->elements as $element) {
+ $this->info[$element] = new HTMLPurifier_ElementDef();
+ $this->info[$element]->attr = array(0 => array('Common'));
+ if ($element == 'hr') {
+ $this->info[$element]->content_model_type = 'empty';
+ } else {
+ $this->info[$element]->content_model = '#PCDATA | Inline';
+ $this->info[$element]->content_model_type = 'optional';
+ }
+ }
+ }
+
+}
+
+?>
\ No newline at end of file
diff --git a/lib/htmlpurifier/HTMLPurifier/HTMLModule/StyleAttribute.php b/lib/htmlpurifier/HTMLPurifier/HTMLModule/StyleAttribute.php
new file mode 100644
index 0000000000..5ee5d1cf65
--- /dev/null
+++ b/lib/htmlpurifier/HTMLPurifier/HTMLModule/StyleAttribute.php
@@ -0,0 +1,27 @@
+ array('style' => false), // see constructor
+ 'Core' => array(0 => array('Style'))
+ );
+
+ function HTMLPurifier_HTMLModule_StyleAttribute() {
+ $this->attr_collections['Style']['style'] = new HTMLPurifier_AttrDef_CSS();
+ }
+
+}
+
+?>
\ No newline at end of file
diff --git a/lib/htmlpurifier/HTMLPurifier/HTMLModule/Tables.php b/lib/htmlpurifier/HTMLPurifier/HTMLModule/Tables.php
new file mode 100644
index 0000000000..ea41f5b103
--- /dev/null
+++ b/lib/htmlpurifier/HTMLPurifier/HTMLModule/Tables.php
@@ -0,0 +1,88 @@
+ 'table');
+
+ function HTMLPurifier_HTMLModule_Tables() {
+ foreach ($this->elements as $e) {
+ $this->info[$e] = new HTMLPurifier_ElementDef();
+ $this->info[$e]->attr = array(0 => array('Common'));
+ $attr =& $this->info[$e]->attr;
+ if ($e == 'caption') continue;
+ if ($e == 'table'){
+ $attr['border'] = 'Pixels';
+ $attr['cellpadding'] = 'Length';
+ $attr['cellspacing'] = 'Length';
+ $attr['frame'] = new HTMLPurifier_AttrDef_Enum(array(
+ 'void', 'above', 'below', 'hsides', 'lhs', 'rhs',
+ 'vsides', 'box', 'border'
+ ), false);
+ $attr['rules'] = new HTMLPurifier_AttrDef_Enum(array(
+ 'none', 'groups', 'rows', 'cols', 'all'
+ ), false);
+ $attr['summary'] = 'Text';
+ $attr['width'] = 'Length';
+ continue;
+ }
+ if ($e == 'col' || $e == 'colgroup') {
+ $attr['span'] = 'Number';
+ $attr['width'] = 'MultiLength';
+ }
+ if ($e == 'td' || $e == 'th') {
+ $attr['abbr'] = 'Text';
+ $attr['colspan'] = 'Number';
+ $attr['rowspan'] = 'Number';
+ }
+ $attr['align'] = new HTMLPurifier_AttrDef_Enum(array(
+ 'left', 'center', 'right', 'justify', 'char'
+ ), false);
+ $attr['valign'] = new HTMLPurifier_AttrDef_Enum(array(
+ 'top', 'middle', 'bottom', 'baseline'
+ ), false);
+ $attr['charoff'] = 'Length';
+ }
+ $this->info['caption']->content_model = '#PCDATA | Inline';
+ $this->info['caption']->content_model_type = 'optional';
+
+ // Is done directly because it doesn't leverage substitution
+ // mechanisms. True model is:
+ // 'caption?, ( col* | colgroup* ), (( thead?, tfoot?, tbody+ ) | ( tr+ ))'
+ $this->info['table']->child = new HTMLPurifier_ChildDef_Table();
+
+ $this->info['td']->content_model =
+ $this->info['th']->content_model = '#PCDATA | Flow';
+ $this->info['td']->content_model_type =
+ $this->info['th']->content_model_type = 'optional';
+
+ $this->info['tr']->content_model = 'td | th';
+ $this->info['tr']->content_model_type = 'required';
+
+ $this->info['col']->content_model_type = 'empty';
+
+ $this->info['colgroup']->content_model = 'col';
+ $this->info['colgroup']->content_model_type = 'optional';
+
+ $this->info['tbody']->content_model =
+ $this->info['thead']->content_model =
+ $this->info['tfoot']->content_model = 'tr';
+ $this->info['tbody']->content_model_type =
+ $this->info['thead']->content_model_type =
+ $this->info['tfoot']->content_model_type = 'required';
+
+ }
+
+}
+
+?>
\ No newline at end of file
diff --git a/lib/htmlpurifier/HTMLPurifier/HTMLModule/Text.php b/lib/htmlpurifier/HTMLPurifier/HTMLModule/Text.php
new file mode 100644
index 0000000000..bac05986c6
--- /dev/null
+++ b/lib/htmlpurifier/HTMLPurifier/HTMLModule/Text.php
@@ -0,0 +1,78 @@
+ 'h1 | h2 | h3 | h4 | h5 | h6',
+ 'Block' => 'address | blockquote | div | p | pre | nolink | tex | algebra', //moodle modification
+ 'Inline' => 'abbr | acronym | br | cite | code | dfn | em | kbd | q | samp | span | strong | var',
+ 'Flow' => 'Heading | Block | Inline'
+ );
+
+ function HTMLPurifier_HTMLModule_Text() {
+ foreach ($this->elements as $element) {
+ $this->info[$element] = new HTMLPurifier_ElementDef();
+ // attributes
+ if ($element == 'br') {
+ $this->info[$element]->attr = array(0 => array('Core'));
+ } elseif ($element == 'blockquote' || $element == 'q') {
+ $this->info[$element]->attr = array(0 => array('Common'), 'cite' => 'URI');
+ } else {
+ $this->info[$element]->attr = array(0 => array('Common'));
+ }
+ // content models
+ if ($element == 'br') {
+ $this->info[$element]->content_model_type = 'empty';
+ } elseif ($element == 'blockquote') {
+ $this->info[$element]->content_model = 'Heading | Block | List';
+ $this->info[$element]->content_model_type = 'optional';
+ } elseif ($element == 'div') {
+ $this->info[$element]->content_model = '#PCDATA | Flow';
+ $this->info[$element]->content_model_type = 'optional';
+ } else {
+ $this->info[$element]->content_model = '#PCDATA | Inline';
+ $this->info[$element]->content_model_type = 'optional';
+ }
+ }
+ // SGML permits exclusions for all descendants, but this is
+ // not possible with DTDs or XML Schemas. W3C has elected to
+ // use complicated compositions of content_models to simulate
+ // exclusion for children, but we go the simpler, SGML-style
+ // route of flat-out exclusions. Note that the Abstract Module
+ // is blithely unaware of such distinctions.
+ $this->info['pre']->excludes = array_flip(array(
+ 'img', 'big', 'small',
+ 'object', 'applet', 'font', 'basefont' // generally not allowed
+ ));
+ $this->info['p']->auto_close = array_flip(array(
+ 'address', 'blockquote', 'dd', 'dir', 'div', 'dl', 'dt',
+ 'h1', 'h2', 'h3', 'h4', 'h5', 'h6', 'hr', 'ol', 'p', 'pre',
+ 'table', 'ul', 'nolink', 'tex', 'algebra' //moodle modification
+ ));
+ }
+
+}
+
+?>
\ No newline at end of file
diff --git a/lib/htmlpurifier/HTMLPurifier/HTMLModule/TransformToStrict.php b/lib/htmlpurifier/HTMLPurifier/HTMLModule/TransformToStrict.php
new file mode 100644
index 0000000000..cdbe3733f2
--- /dev/null
+++ b/lib/htmlpurifier/HTMLPurifier/HTMLModule/TransformToStrict.php
@@ -0,0 +1,108 @@
+ false,
+ 'menu' => false,
+ 'dir' => false,
+ 'center'=> false
+ );
+
+ var $attr_collections = array(
+ 'Lang' => array(
+ 'lang' => false // placeholder
+ )
+ );
+
+ var $info_attr_transform_post = array(
+ 'lang' => false // placeholder
+ );
+
+ function HTMLPurifier_HTMLModule_TransformToStrict() {
+
+ // deprecated tag transforms
+ $this->info_tag_transform['font'] = new HTMLPurifier_TagTransform_Font();
+ $this->info_tag_transform['menu'] = new HTMLPurifier_TagTransform_Simple('ul');
+ $this->info_tag_transform['dir'] = new HTMLPurifier_TagTransform_Simple('ul');
+ $this->info_tag_transform['center'] = new HTMLPurifier_TagTransform_Center();
+
+ foreach ($this->elements as $name) {
+ $this->info[$name] = new HTMLPurifier_ElementDef();
+ $this->info[$name]->standalone = false;
+ }
+
+ // deprecated attribute transforms
+ $this->info['h1']->attr_transform_pre['align'] =
+ $this->info['h2']->attr_transform_pre['align'] =
+ $this->info['h3']->attr_transform_pre['align'] =
+ $this->info['h4']->attr_transform_pre['align'] =
+ $this->info['h5']->attr_transform_pre['align'] =
+ $this->info['h6']->attr_transform_pre['align'] =
+ $this->info['p'] ->attr_transform_pre['align'] =
+ new HTMLPurifier_AttrTransform_TextAlign();
+
+ // xml:lang <=> lang mirroring, implement in TransformToStrict,
+ // this is overridden in TransformToXHTML11
+ $this->info_attr_transform_post['lang'] = new HTMLPurifier_AttrTransform_Lang();
+ $this->attr_collections['Lang']['lang'] = new HTMLPurifier_AttrDef_Lang();
+
+ // this should not be applied to XHTML 1.0 Transitional, ONLY
+ // XHTML 1.0 Strict. We may need three classes
+ $this->info['blockquote']->content_model_type = 'strictblockquote';
+ $this->info['blockquote']->child = false; // recalculate please!
+
+ $this->info['table']->attr_transform_pre['bgcolor'] =
+ $this->info['tr']->attr_transform_pre['bgcolor'] =
+ $this->info['td']->attr_transform_pre['bgcolor'] =
+ $this->info['th']->attr_transform_pre['bgcolor'] = new HTMLPurifier_AttrTransform_BgColor();
+
+ $this->info['img']->attr_transform_pre['border'] = new HTMLPurifier_AttrTransform_Border();
+
+ $this->info['img']->attr_transform_pre['name'] =
+ $this->info['a']->attr_transform_pre['name'] = new HTMLPurifier_AttrTransform_Name();
+
+ $this->info['td']->attr_transform_pre['width'] =
+ $this->info['th']->attr_transform_pre['width'] =
+ $this->info['hr']->attr_transform_pre['width'] = new HTMLPurifier_AttrTransform_Length('width');
+
+ $this->info['td']->attr_transform_pre['height'] =
+ $this->info['th']->attr_transform_pre['height'] = new HTMLPurifier_AttrTransform_Length('height');
+
+ }
+
+ var $defines_child_def = true;
+ function getChildDef($def) {
+ if ($def->content_model_type != 'strictblockquote') return false;
+ return new HTMLPurifier_ChildDef_StrictBlockquote($def->content_model);
+ }
+
+}
+
+?>
\ No newline at end of file
diff --git a/lib/htmlpurifier/HTMLPurifier/HTMLModule/TransformToXHTML11.php b/lib/htmlpurifier/HTMLPurifier/HTMLModule/TransformToXHTML11.php
new file mode 100644
index 0000000000..0915f5b6e5
--- /dev/null
+++ b/lib/htmlpurifier/HTMLPurifier/HTMLModule/TransformToXHTML11.php
@@ -0,0 +1,30 @@
+ array(
+ 'lang' => false // remove it
+ )
+ );
+
+ var $info_attr_transform_post = array(
+ 'lang' => false // remove it
+ );
+
+}
+
+?>
\ No newline at end of file
diff --git a/lib/htmlpurifier/HTMLPurifier/HTMLModuleManager.php b/lib/htmlpurifier/HTMLPurifier/HTMLModuleManager.php
new file mode 100644
index 0000000000..e0090472ca
--- /dev/null
+++ b/lib/htmlpurifier/HTMLPurifier/HTMLModuleManager.php
@@ -0,0 +1,558 @@
+attrTypes = new HTMLPurifier_AttrTypes();
+
+ if (!$blank) $this->initialize();
+
+ }
+
+ function initialize() {
+ $this->initialized = true;
+
+ // load default modules to the recognized modules list (not active)
+ $modules = array(
+ // define
+ 'CommonAttributes',
+ 'Text', 'Hypertext', 'List', 'Presentation',
+ 'Edit', 'Bdo', 'Tables', 'Image', 'StyleAttribute',
+ // define-redefine
+ 'Legacy',
+ // redefine
+ 'TransformToStrict', 'TransformToXHTML11'
+ );
+ foreach ($modules as $module) {
+ $this->addModule($module);
+ }
+
+ // Safe modules for supported doctypes. These are included
+ // in the valid and active module lists by default
+ $this->collections['Safe'] = array(
+ '_Common' => array( // leading _ indicates private
+ 'CommonAttributes', 'Text', 'Hypertext', 'List',
+ 'Presentation', 'Edit', 'Bdo', 'Tables', 'Image',
+ 'StyleAttribute'
+ ),
+ // HTML definitions, defer to XHTML definitions
+ 'HTML 4.01 Transitional' => array(array('XHTML 1.0 Transitional')),
+ 'HTML 4.01 Strict' => array(array('XHTML 1.0 Strict')),
+ // XHTML definitions
+ 'XHTML 1.0 Transitional' => array( array('XHTML 1.0 Strict'), 'Legacy' ),
+ 'XHTML 1.0 Strict' => array(array('_Common')),
+ 'XHTML 1.1' => array(array('_Common')),
+ );
+
+ // Modules that specify elements that are unsafe from untrusted
+ // third-parties. These should be registered in $validModules but
+ // almost never $activeModules unless you really know what you're
+ // doing.
+ $this->collections['Unsafe'] = array();
+
+ // Modules to import if lenient mode (attempt to convert everything
+ // to a valid representation) is on. These must not be in $validModules
+ // unless specified so.
+ $this->collections['Lenient'] = array(
+ 'HTML 4.01 Strict' => array(array('XHTML 1.0 Strict')),
+ 'XHTML 1.0 Strict' => array('TransformToStrict'),
+ 'XHTML 1.1' => array(array('XHTML 1.0 Strict'), 'TransformToXHTML11')
+ );
+
+ // Modules to import if correctional mode (correct everything that
+ // is feasible to strict mode) is on. These must not be in $validModules
+ // unless specified so.
+ $this->collections['Correctional'] = array(
+ 'HTML 4.01 Transitional' => array(array('XHTML 1.0 Transitional')),
+ 'XHTML 1.0 Transitional' => array('TransformToStrict'), // probably want a different one
+ );
+
+ // User-space modules, custom code or whatever
+ $this->collections['Extension'] = array();
+
+ // setup active versus valid modules. ORDER IS IMPORTANT!
+ // definition modules
+ $this->makeCollectionActive('Safe');
+ $this->makeCollectionValid('Unsafe');
+ // redefinition modules
+ $this->makeCollectionActive('Lenient');
+ $this->makeCollectionActive('Correctional');
+
+ $this->autoDoctype = '*';
+ $this->autoCollection = 'Extension';
+
+ }
+
+ /**
+ * Adds a module to the recognized module list. This does not
+ * do anything else: the module must be added to a corresponding
+ * collection to be "activated".
+ * @param $module Mixed: string module name, with or without
+ * HTMLPurifier_HTMLModule prefix, or instance of
+ * subclass of HTMLPurifier_HTMLModule.
+ */
+ function addModule($module) {
+ if (is_string($module)) {
+ $original_module = $module;
+ if (!class_exists($module)) {
+ foreach ($this->prefixes as $prefix) {
+ $module = $prefix . $original_module;
+ if (class_exists($module)) break;
+ }
+ }
+ if (!class_exists($module)) {
+ trigger_error($original_module . ' module does not exist',
+ E_USER_ERROR);
+ return;
+ }
+ $module = new $module();
+ }
+ $module->order = $this->counter++; // assign then increment
+ $this->modules[$module->name] = $module;
+ if ($this->autoDoctype !== false && $this->autoCollection !== false) {
+ $this->collections[$this->autoCollection][$this->autoDoctype][] = $module->name;
+ }
+ }
+
+ /**
+ * Makes a collection active, while also making it valid if not
+ * already done so. See $activeModules for the semantics of "active".
+ * @param $collection_name Name of collection to activate
+ */
+ function makeCollectionActive($collection_name) {
+ if (!in_array($collection_name, $this->validCollections)) {
+ $this->makeCollectionValid($collection_name);
+ }
+ $this->activeCollections[] = $collection_name;
+ }
+
+ /**
+ * Makes a collection valid. See $validModules for the semantics of "valid"
+ */
+ function makeCollectionValid($collection_name) {
+ $this->validCollections[] = $collection_name;
+ }
+
+ /**
+ * Adds a class prefix that addModule() will use to resolve a
+ * string name to a concrete class
+ */
+ function addPrefix($prefix) {
+ $this->prefixes[] = (string) $prefix;
+ }
+
+ function setup($config) {
+
+ // load up the autocollection
+ if ($this->autoCollection !== false) {
+ $this->makeCollectionActive($this->autoCollection);
+ }
+
+ // retrieve the doctype
+ $this->doctype = $this->getDoctype($config);
+ if (isset($this->doctypeAliases[$this->doctype])) {
+ $this->doctype = $this->doctypeAliases[$this->doctype];
+ }
+
+ // process module collections to module name => module instance form
+ foreach ($this->collections as $col_i => $x) {
+ $this->processCollections($this->collections[$col_i]);
+ }
+
+ $this->validModules = $this->assembleModules($this->validCollections);
+ $this->activeModules = $this->assembleModules($this->activeCollections);
+
+ // setup lookup table based on all valid modules
+ foreach ($this->validModules as $module) {
+ foreach ($module->info as $name => $def) {
+ if (!isset($this->elementLookup[$name])) {
+ $this->elementLookup[$name] = array();
+ }
+ $this->elementLookup[$name][] = $module->name;
+ }
+ }
+
+ // note the different choice
+ $this->contentSets = new HTMLPurifier_ContentSets(
+ // content models that contain non-allowed elements are
+ // harmless because RemoveForeignElements will ensure
+ // they never get in anyway, and there is usually no
+ // reason why you should want to restrict a content
+ // model beyond what is mandated by the doctype.
+ // Note, however, that this means redefinitions of
+ // content models can't be tossed in validModels willy-nilly:
+ // that stuff still is regulated by configuration.
+ $this->validModules
+ );
+ $this->attrCollections = new HTMLPurifier_AttrCollections(
+ $this->attrTypes,
+ // only explicitly allowed modules are allowed to affect
+ // the global attribute collections. This mean's there's
+ // a distinction between loading the Bdo module, and the
+ // bdo element: Bdo will enable the dir attribute on all
+ // elements, while bdo will only define the bdo element,
+ // which will not have an editable directionality. This might
+ // catch people who are loading only elements by surprise, so
+ // we should consider loading an entire module if all the
+ // elements it defines are requested by the user, especially
+ // if it affects the global attribute collections.
+ $this->activeModules
+ );
+
+ }
+
+ /**
+ * Takes a list of collections and merges together all the defined
+ * modules for the current doctype from those collections.
+ * @param $collections List of collection suffixes we should grab
+ * modules from (like 'Safe' or 'Lenient')
+ */
+ function assembleModules($collections) {
+ $modules = array();
+ $numOfCollectionsUsed = 0;
+ foreach ($collections as $name) {
+ $disable_global = false;
+ if (!isset($this->collections[$name])) {
+ trigger_error("$name collection is undefined", E_USER_ERROR);
+ continue;
+ }
+ $cols = $this->collections[$name];
+ if (isset($cols[$this->doctype])) {
+ if (isset($cols[$this->doctype]['*'])) {
+ unset($cols[$this->doctype]['*']);
+ $disable_global = true;
+ }
+ $modules += $cols[$this->doctype];
+ $numOfCollectionsUsed++;
+ }
+ // accept catch-all doctype
+ if (
+ $this->doctype !== '*' &&
+ isset($cols['*']) &&
+ !$disable_global
+ ) {
+ $modules += $cols['*'];
+ }
+ }
+
+ if ($numOfCollectionsUsed < 1) {
+ // possible XSS injection if user-specified doctypes
+ // are allowed
+ trigger_error("Doctype {$this->doctype} does not exist, ".
+ "check for typos (if you desire a doctype that allows ".
+ "no elements, use an empty array collection)", E_USER_ERROR);
+ }
+ return $modules;
+ }
+
+ /**
+ * Takes a collection and performs inclusions and substitutions for it.
+ * @param $cols Reference to collections class member variable
+ */
+ function processCollections(&$cols) {
+
+ // $cols is the set of collections
+ // $col_i is the name (index) of a collection
+ // $col is a collection/list of modules
+
+ // perform inclusions
+ foreach ($cols as $col_i => $col) {
+ $seen = array();
+ if (!empty($col[0]) && is_array($col[0])) {
+ $seen[$col_i] = true; // recursion reporting
+ $includes = $col[0];
+ unset($cols[$col_i][0]); // remove inclusions value, recursion guard
+ } else {
+ $includes = array();
+ }
+ if (empty($includes)) continue;
+ for ($i = 0; isset($includes[$i]); $i++) {
+ $inc = $includes[$i];
+ if (isset($seen[$inc])) {
+ trigger_error(
+ "Circular inclusion detected in $col_i collection",
+ E_USER_ERROR
+ );
+ continue;
+ } else {
+ $seen[$inc] = true;
+ }
+ if (!isset($cols[$inc])) {
+ trigger_error(
+ "Collection $col_i tried to include undefined ".
+ "collection $inc", E_USER_ERROR);
+ continue;
+ }
+ foreach ($cols[$inc] as $module) {
+ if (is_array($module)) { // another inclusion!
+ foreach ($module as $inc2) $includes[] = $inc2;
+ continue;
+ }
+ $cols[$col_i][] = $module; // merge in the other modules
+ }
+ }
+ }
+
+ // replace with real modules, invert module from list to
+ // assoc array of module name to module instance
+ foreach ($cols as $col_i => $col) {
+ $ignore_global = false;
+ $order = array();
+ foreach ($col as $module_i => $module) {
+ unset($cols[$col_i][$module_i]);
+ if (is_array($module)) {
+ trigger_error("Illegal inclusion array at index".
+ " $module_i found collection $col_i, inclusion".
+ " arrays must be at start of collection (index 0)",
+ E_USER_ERROR);
+ continue;
+ }
+ if ($module_i === '*' && $module === false) {
+ $ignore_global = true;
+ continue;
+ }
+ if (!isset($this->modules[$module])) {
+ trigger_error(
+ "Collection $col_i references undefined ".
+ "module $module",
+ E_USER_ERROR
+ );
+ continue;
+ }
+ $module = $this->modules[$module];
+ $cols[$col_i][$module->name] = $module;
+ $order[$module->name] = $module->order;
+ }
+ array_multisort(
+ $order, SORT_ASC, SORT_NUMERIC, $cols[$col_i]
+ );
+ if ($ignore_global) $cols[$col_i]['*'] = false;
+ }
+
+ // delete pseudo-collections
+ foreach ($cols as $col_i => $col) {
+ if ($col_i[0] == '_') unset($cols[$col_i]);
+ }
+
+ }
+
+ /**
+ * Retrieves the doctype from the configuration object
+ */
+ function getDoctype($config) {
+ $doctype = $config->get('HTML', 'Doctype');
+ if ($doctype !== null) {
+ return $doctype;
+ }
+ if (!$this->initialized) {
+ // don't do HTML-oriented backwards compatibility stuff
+ // use either the auto-doctype, or the catch-all doctype
+ return $this->autoDoctype ? $this->autoDoctype : '*';
+ }
+ // this is backwards-compatibility stuff
+ if ($config->get('Core', 'XHTML')) {
+ $doctype = 'XHTML 1.0';
+ } else {
+ $doctype = 'HTML 4.01';
+ }
+ if ($config->get('HTML', 'Strict')) {
+ $doctype .= ' Strict';
+ } else {
+ $doctype .= ' Transitional';
+ }
+ return $doctype;
+ }
+
+ /**
+ * Retrieves merged element definitions for all active elements.
+ * @note We may want to generate an elements array during setup
+ * and pass that on, because a specific combination of
+ * elements may trigger the loading of a module.
+ * @param $config Instance of HTMLPurifier_Config, for determining
+ * stray elements.
+ */
+ function getElements($config) {
+
+ $elements = array();
+ foreach ($this->activeModules as $module) {
+ foreach ($module->elements as $name) {
+ $elements[$name] = $this->getElement($name, $config);
+ }
+ }
+
+ // standalone elements now loaded
+
+ return $elements;
+
+ }
+
+ /**
+ * Retrieves a single merged element definition
+ * @param $name Name of element
+ * @param $config Instance of HTMLPurifier_Config, may not be necessary.
+ */
+ function getElement($name, $config) {
+
+ $def = false;
+
+ $modules = $this->validModules;
+
+ if (!isset($this->elementLookup[$name])) {
+ return false;
+ }
+
+ foreach($this->elementLookup[$name] as $module_name) {
+
+ $module = $modules[$module_name];
+ $new_def = $module->info[$name];
+
+ if (!$def && $new_def->standalone) {
+ $def = $new_def;
+ } elseif ($def) {
+ $def->mergeIn($new_def);
+ } else {
+ // could "save it for another day":
+ // non-standalone definitions that don't have a standalone
+ // to merge into could be deferred to the end
+ continue;
+ }
+
+ // attribute value expansions
+ $this->attrCollections->performInclusions($def->attr);
+ $this->attrCollections->expandIdentifiers($def->attr, $this->attrTypes);
+
+ // descendants_are_inline, for ChildDef_Chameleon
+ if (is_string($def->content_model) &&
+ strpos($def->content_model, 'Inline') !== false) {
+ if ($name != 'del' && $name != 'ins') {
+ // this is for you, ins/del
+ $def->descendants_are_inline = true;
+ }
+ }
+
+ $this->contentSets->generateChildDef($def, $module);
+ }
+
+ return $def;
+
+ }
+
+}
+
+?>
\ No newline at end of file
diff --git a/lib/htmlpurifier/HTMLPurifier/IDAccumulator.php b/lib/htmlpurifier/HTMLPurifier/IDAccumulator.php
new file mode 100644
index 0000000000..40ff2384bb
--- /dev/null
+++ b/lib/htmlpurifier/HTMLPurifier/IDAccumulator.php
@@ -0,0 +1,42 @@
+ids[$id])) return false;
+ return $this->ids[$id] = true;
+ }
+
+ /**
+ * Load a list of IDs into the lookup table
+ * @param $array_of_ids Array of IDs to load
+ * @note This function doesn't care about duplicates
+ */
+ function load($array_of_ids) {
+ foreach ($array_of_ids as $id) {
+ $this->ids[$id] = true;
+ }
+ }
+
+}
+
+?>
\ No newline at end of file
diff --git a/lib/htmlpurifier/HTMLPurifier/Language.php b/lib/htmlpurifier/HTMLPurifier/Language.php
new file mode 100644
index 0000000000..ca6fe03138
--- /dev/null
+++ b/lib/htmlpurifier/HTMLPurifier/Language.php
@@ -0,0 +1,56 @@
+_loaded) return;
+ $factory = HTMLPurifier_LanguageFactory::instance();
+ $factory->loadLanguage($this->code);
+ foreach ($factory->keys as $key) {
+ $this->$key = $factory->cache[$this->code][$key];
+ }
+ $this->_loaded = true;
+ }
+
+ /**
+ * Retrieves a localised message. Does not perform any operations.
+ * @param $key string identifier of message
+ * @return string localised message
+ */
+ function getMessage($key) {
+ if (!$this->_loaded) $this->load();
+ if (!isset($this->messages[$key])) return '';
+ return $this->messages[$key];
+ }
+
+}
+
+?>
\ No newline at end of file
diff --git a/lib/htmlpurifier/HTMLPurifier/Language/classes/en-x-test.php b/lib/htmlpurifier/HTMLPurifier/Language/classes/en-x-test.php
new file mode 100644
index 0000000000..303ba4bae0
--- /dev/null
+++ b/lib/htmlpurifier/HTMLPurifier/Language/classes/en-x-test.php
@@ -0,0 +1,12 @@
+
\ No newline at end of file
diff --git a/lib/htmlpurifier/HTMLPurifier/Language/messages/en-x-test.php b/lib/htmlpurifier/HTMLPurifier/Language/messages/en-x-test.php
new file mode 100644
index 0000000000..115662bda9
--- /dev/null
+++ b/lib/htmlpurifier/HTMLPurifier/Language/messages/en-x-test.php
@@ -0,0 +1,11 @@
+ 'HTML Purifier X'
+);
+
+?>
\ No newline at end of file
diff --git a/lib/htmlpurifier/HTMLPurifier/Language/messages/en.php b/lib/htmlpurifier/HTMLPurifier/Language/messages/en.php
new file mode 100644
index 0000000000..7650b81803
--- /dev/null
+++ b/lib/htmlpurifier/HTMLPurifier/Language/messages/en.php
@@ -0,0 +1,12 @@
+ 'HTML Purifier',
+'pizza' => 'Pizza', // for unit testing purposes
+
+);
+
+?>
\ No newline at end of file
diff --git a/lib/htmlpurifier/HTMLPurifier/LanguageFactory.php b/lib/htmlpurifier/HTMLPurifier/LanguageFactory.php
new file mode 100644
index 0000000000..7097ced767
--- /dev/null
+++ b/lib/htmlpurifier/HTMLPurifier/LanguageFactory.php
@@ -0,0 +1,196 @@
+cache[$language_code][$key] = $value
+ * @value array map
+ */
+ var $cache;
+
+ /**
+ * Valid keys in the HTMLPurifier_Language object. Designates which
+ * variables to slurp out of a message file.
+ * @value array list
+ */
+ var $keys = array('fallback', 'messages');
+
+ /**
+ * Instance of HTMLPurifier_AttrDef_Lang to validate language codes
+ * @value object HTMLPurifier_AttrDef_Lang
+ */
+ var $validator;
+
+ /**
+ * Cached copy of dirname(__FILE__), directory of current file without
+ * trailing slash
+ * @value string filename
+ */
+ var $dir;
+
+ /**
+ * Keys whose contents are a hash map and can be merged
+ * @value array lookup
+ */
+ var $mergeable_keys_map = array('messages' => true);
+
+ /**
+ * Keys whose contents are a list and can be merged
+ * @value array lookup
+ */
+ var $mergeable_keys_list = array();
+
+ /**
+ * Retrieve sole instance of the factory.
+ * @static
+ * @param $prototype Optional prototype to overload sole instance with,
+ * or bool true to reset to default factory.
+ */
+ function &instance($prototype = null) {
+ static $instance = null;
+ if ($prototype !== null) {
+ $instance = $prototype;
+ } elseif ($instance === null || $prototype == true) {
+ $instance = new HTMLPurifier_LanguageFactory();
+ $instance->setup();
+ }
+ return $instance;
+ }
+
+ /**
+ * Sets up the singleton, much like a constructor
+ * @note Prevents people from getting this outside of the singleton
+ */
+ function setup() {
+ $this->validator = new HTMLPurifier_AttrDef_Lang();
+ $this->dir = dirname(__FILE__);
+ }
+
+ /**
+ * Creates a language object, handles class fallbacks
+ * @param $code string language code
+ */
+ function create($code) {
+
+ $config = $context = false; // hope it doesn't use these!
+ $code = $this->validator->validate($code, $config, $context);
+ if ($code === false) $code = 'en'; // malformed code becomes English
+
+ $pcode = str_replace('-', '_', $code); // make valid PHP classname
+ static $depth = 0; // recursion protection
+
+ if ($code == 'en') {
+ $class = 'HTMLPurifier_Language';
+ $file = $this->dir . '/Language.php';
+ } else {
+ $class = 'HTMLPurifier_Language_' . $pcode;
+ $file = $this->dir . '/Language/classes/' . $code . '.php';
+ // PHP5/APC deps bug workaround can go here
+ // you can bypass the conditional include by loading the
+ // file yourself
+ if (file_exists($file) && !class_exists($class)) {
+ include_once $file;
+ }
+ }
+
+ if (!class_exists($class)) {
+ // go fallback
+ $fallback = HTMLPurifier_Language::getFallbackFor($code);
+ $depth++;
+ $lang = Language::factory( $fallback );
+ $depth--;
+ } else {
+ $lang = new $class;
+ }
+ $lang->code = $code;
+
+ return $lang;
+
+ }
+
+ /**
+ * Returns the fallback language for language
+ * @note Loads the original language into cache
+ * @param $code string language code
+ */
+ function getFallbackFor($code) {
+ $this->loadLanguage($code);
+ return $this->cache[$code]['fallback'];
+ }
+
+ /**
+ * Loads language into the cache, handles message file and fallbacks
+ * @param $code string language code
+ */
+ function loadLanguage($code) {
+ static $languages_seen = array(); // recursion guard
+
+ // abort if we've already loaded it
+ if (isset($this->cache[$code])) return;
+
+ // generate filename
+ $filename = $this->dir . '/Language/messages/' . $code . '.php';
+
+ // default fallback : may be overwritten by the ensuing include
+ $fallback = ($code != 'en') ? 'en' : false;
+
+ // load primary localisation
+ if (!file_exists($filename)) {
+ // skip the include: will rely solely on fallback
+ $filename = $this->dir . '/Language/messages/en.php';
+ $cache = array();
+ } else {
+ include $filename;
+ $cache = compact($this->keys);
+ }
+
+ // load fallback localisation
+ if (!empty($fallback)) {
+
+ // infinite recursion guard
+ if (isset($languages_seen[$code])) {
+ trigger_error('Circular fallback reference in language ' .
+ $code, E_USER_ERROR);
+ $fallback = 'en';
+ }
+ $language_seen[$code] = true;
+
+ // load the fallback recursively
+ $this->loadLanguage($fallback);
+ $fallback_cache = $this->cache[$fallback];
+
+ // merge fallback with current language
+ foreach ( $this->keys as $key ) {
+ if (isset($cache[$key]) && isset($fallback_cache[$key])) {
+ if (isset($this->mergeable_keys_map[$key])) {
+ $cache[$key] = $cache[$key] + $fallback_cache[$key];
+ } elseif (isset($this->mergeable_keys_list[$key])) {
+ $cache[$key] = array_merge( $fallback_cache[$key], $cache[$key] );
+ }
+ } else {
+ $cache[$key] = $fallback_cache[$key];
+ }
+ }
+
+ }
+
+ // save to cache for later retrieval
+ $this->cache[$code] = $cache;
+
+ return;
+ }
+
+}
+
+?>
\ No newline at end of file
diff --git a/lib/htmlpurifier/HTMLPurifier/Lexer.php b/lib/htmlpurifier/HTMLPurifier/Lexer.php
new file mode 100644
index 0000000000..e7242e1e36
--- /dev/null
+++ b/lib/htmlpurifier/HTMLPurifier/Lexer.php
@@ -0,0 +1,237 @@
+_entity_parser = new HTMLPurifier_EntityParser();
+ }
+
+
+ /**
+ * Most common entity to raw value conversion table for special entities.
+ * @protected
+ */
+ var $_special_entity2str =
+ array(
+ '"' => '"',
+ '&' => '&',
+ '<' => '<',
+ '>' => '>',
+ ''' => "'",
+ ''' => "'",
+ ''' => "'"
+ );
+
+ /**
+ * Parses special entities into the proper characters.
+ *
+ * This string will translate escaped versions of the special characters
+ * into the correct ones.
+ *
+ * @warning
+ * You should be able to treat the output of this function as
+ * completely parsed, but that's only because all other entities should
+ * have been handled previously in substituteNonSpecialEntities()
+ *
+ * @param $string String character data to be parsed.
+ * @returns Parsed character data.
+ */
+ function parseData($string) {
+
+ // following functions require at least one character
+ if ($string === '') return '';
+
+ // subtracts amps that cannot possibly be escaped
+ $num_amp = substr_count($string, '&') - substr_count($string, '& ') -
+ ($string[strlen($string)-1] === '&' ? 1 : 0);
+
+ if (!$num_amp) return $string; // abort if no entities
+ $num_esc_amp = substr_count($string, '&');
+ $string = strtr($string, $this->_special_entity2str);
+
+ // code duplication for sake of optimization, see above
+ $num_amp_2 = substr_count($string, '&') - substr_count($string, '& ') -
+ ($string[strlen($string)-1] === '&' ? 1 : 0);
+
+ if ($num_amp_2 <= $num_esc_amp) return $string;
+
+ // hmm... now we have some uncommon entities. Use the callback.
+ $string = $this->_entity_parser->substituteSpecialEntities($string);
+ return $string;
+ }
+
+ /**
+ * Lexes an HTML string into tokens.
+ *
+ * @param $string String HTML.
+ * @return HTMLPurifier_Token array representation of HTML.
+ */
+ function tokenizeHTML($string, $config, &$context) {
+ trigger_error('Call to abstract class', E_USER_ERROR);
+ }
+
+ /**
+ * Retrieves or sets the default Lexer as a Prototype Factory.
+ *
+ * Depending on what PHP version you are running, the abstract base
+ * Lexer class will determine which concrete Lexer is best for you:
+ * HTMLPurifier_Lexer_DirectLex for PHP 4, and HTMLPurifier_Lexer_DOMLex
+ * for PHP 5 and beyond.
+ *
+ * Passing the optional prototype lexer parameter will override the
+ * default with your own implementation. A copy/reference of the prototype
+ * lexer will now be returned when you request a new lexer.
+ *
+ * @static
+ *
+ * @note
+ * Though it is possible to call this factory method from subclasses,
+ * such usage is not recommended.
+ *
+ * @param $prototype Optional prototype lexer.
+ * @return Concrete lexer.
+ */
+ function create($prototype = null) {
+ // we don't really care if it's a reference or a copy
+ static $lexer = null;
+ if ($prototype) {
+ $lexer = $prototype;
+ }
+ if (empty($lexer)) {
+ if (version_compare(PHP_VERSION, "5", ">=") && // check for PHP5
+ class_exists('DOMDocument')) { // check for DOM support
+ require_once 'HTMLPurifier/Lexer/DOMLex.php';
+ $lexer = new HTMLPurifier_Lexer_DOMLex();
+ } else {
+ require_once 'HTMLPurifier/Lexer/DirectLex.php';
+ $lexer = new HTMLPurifier_Lexer_DirectLex();
+ }
+ }
+ return $lexer;
+ }
+
+ /**
+ * Translates CDATA sections into regular sections (through escaping).
+ *
+ * @static
+ * @protected
+ * @param $string HTML string to process.
+ * @returns HTML with CDATA sections escaped.
+ */
+ function escapeCDATA($string) {
+ return preg_replace_callback(
+ '//',
+ array('HTMLPurifier_Lexer', 'CDATACallback'),
+ $string
+ );
+ }
+
+ /**
+ * Callback function for escapeCDATA() that does the work.
+ *
+ * @static
+ * @warning Though this is public in order to let the callback happen,
+ * calling it directly is not recommended.
+ * @params $matches PCRE matches array, with index 0 the entire match
+ * and 1 the inside of the CDATA section.
+ * @returns Escaped internals of the CDATA section.
+ */
+ function CDATACallback($matches) {
+ // not exactly sure why the character set is needed, but whatever
+ return htmlspecialchars($matches[1], ENT_COMPAT, 'UTF-8');
+ }
+
+ /**
+ * Takes a piece of HTML and normalizes it by converting entities, fixing
+ * encoding, extracting bits, and other good stuff.
+ */
+ function normalize($html, $config, &$context) {
+
+ // extract body from document if applicable
+ if ($config->get('Core', 'AcceptFullDocuments')) {
+ $html = $this->extractBody($html);
+ }
+
+ // escape CDATA
+ $html = $this->escapeCDATA($html);
+
+ // expand entities that aren't the big five
+ $html = $this->_entity_parser->substituteNonSpecialEntities($html);
+
+ // clean into wellformed UTF-8 string for an SGML context: this has
+ // to be done after entity expansion because the entities sometimes
+ // represent non-SGML characters (horror, horror!)
+ $html = HTMLPurifier_Encoder::cleanUTF8($html);
+
+ return $html;
+ }
+
+ /**
+ * Takes a string of HTML (fragment or document) and returns the content
+ */
+ function extractBody($html) {
+ $matches = array();
+ $result = preg_match('!]*>(.+?)!is', $html, $matches);
+ if ($result) {
+ return $matches[1];
+ } else {
+ return $html;
+ }
+ }
+
+}
+
+?>
\ No newline at end of file
diff --git a/lib/htmlpurifier/HTMLPurifier/Lexer/DOMLex.php b/lib/htmlpurifier/HTMLPurifier/Lexer/DOMLex.php
new file mode 100644
index 0000000000..9286b023d0
--- /dev/null
+++ b/lib/htmlpurifier/HTMLPurifier/Lexer/DOMLex.php
@@ -0,0 +1,152 @@
+factory = new HTMLPurifier_TokenFactory();
+ }
+
+ public function tokenizeHTML($string, $config, &$context) {
+
+ $string = $this->normalize($string, $config, $context);
+
+ // preprocess string, essential for UTF-8
+ $string =
+ ''.
+ ''.
+ ''.
+ 'to
+ foreach ($definition->info[$token->name]->attr_transform_pre
+ as $transform
+ ) {
+ $attr = $transform->transform($attr, $config, $context);
+ }
+
+ // create alias to this element's attribute definition array, see
+ // also $d_defs (global attribute definition array)
+ // DEFINITION CALL
+ $defs = $definition->info[$token->name]->attr;
+
+ // iterate through all the attribute keypairs
+ // Watch out for name collisions: $key has previously been used
+ foreach ($attr as $attr_key => $value) {
+
+ // call the definition
+ if ( isset($defs[$attr_key]) ) {
+ // there is a local definition defined
+ if ($defs[$attr_key] === false) {
+ // We've explicitly been told not to allow this element.
+ // This is usually when there's a global definition
+ // that must be overridden.
+ // Theoretically speaking, we could have a
+ // AttrDef_DenyAll, but this is faster!
+ $result = false;
+ } else {
+ // validate according to the element's definition
+ $result = $defs[$attr_key]->validate(
+ $value, $config, $context
+ );
+ }
+ } elseif ( isset($d_defs[$attr_key]) ) {
+ // there is a global definition defined, validate according
+ // to the global definition
+ $result = $d_defs[$attr_key]->validate(
+ $value, $config, $context
+ );
+ } else {
+ // system never heard of the attribute? DELETE!
+ $result = false;
+ }
+
+ // put the results into effect
+ if ($result === false || $result === null) {
+ // remove the attribute
+ unset($attr[$attr_key]);
+ } elseif (is_string($result)) {
+ // simple substitution
+ $attr[$attr_key] = $result;
+ }
+
+ // we'd also want slightly more complicated substitution
+ // involving an array as the return value,
+ // although we're not sure how colliding attributes would
+ // resolve (certain ones would be completely overriden,
+ // others would prepend themselves).
+ }
+
+ // post transforms
+
+ // ex.