From 89028eed59517f3eb3b1ce2c64fe987ab9ac3dc8 Mon Sep 17 00:00:00 2001 From: skodak <skodak> Date: Sun, 20 May 2007 05:35:43 +0000 Subject: [PATCH] MDL-9855 upgrade HTML Purifier to v1.6.1 --- lib/htmlpurifier/HTMLPurifier.php | 4 +- .../HTMLPurifier/AttrDef/Enum.php | 4 + .../HTMLPurifier/AttrDef/HTML/FrameTarget.php | 34 +++++++ .../HTMLPurifier/AttrTransform.php | 24 +++++ .../HTMLPurifier/AttrTransform/BgColor.php | 6 +- .../HTMLPurifier/AttrTransform/BoolToCSS.php | 39 ++++++++ .../HTMLPurifier/AttrTransform/Border.php | 14 +-- .../HTMLPurifier/AttrTransform/EnumToCSS.php | 60 ++++++++++++ .../HTMLPurifier/AttrTransform/ImgSpace.php | 47 +++++++++ .../HTMLPurifier/AttrTransform/Length.php | 8 +- .../HTMLPurifier/AttrTransform/Name.php | 16 +-- .../HTMLPurifier/CSSDefinition.php | 3 + .../HTMLPurifier/ConfigSchema.php | 4 + lib/htmlpurifier/HTMLPurifier/ElementDef.php | 2 +- .../HTMLPurifier/HTMLDefinition.php | 15 ++- .../HTMLPurifier/HTMLModule/Bdo.php | 1 - .../HTMLPurifier/HTMLModule/Edit.php | 1 - .../HTMLPurifier/HTMLModule/Hypertext.php | 1 - .../HTMLPurifier/HTMLModule/Image.php | 1 - .../HTMLPurifier/HTMLModule/List.php | 2 +- .../HTMLPurifier/HTMLModule/Presentation.php | 1 - .../HTMLPurifier/HTMLModule/Scripting.php | 67 +++++++++++++ .../HTMLPurifier/HTMLModule/Tables.php | 1 - .../HTMLPurifier/HTMLModule/Target.php | 26 +++++ .../HTMLPurifier/HTMLModule/Text.php | 2 - .../HTMLModule/TransformToStrict.php | 98 ++++++++++++++++++- .../HTMLModule/TransformToXHTML11.php | 6 ++ .../HTMLPurifier/HTMLModuleManager.php | 57 ++++++++--- .../HTMLPurifier/Lexer/DirectLex.php | 17 ++++ .../Strategy/RemoveForeignElements.php | 3 +- .../HTMLPurifier/TagTransform/Font.php | 13 ++- lib/htmlpurifier/readme_moodle.txt | 2 +- 32 files changed, 513 insertions(+), 66 deletions(-) create mode 100644 lib/htmlpurifier/HTMLPurifier/AttrDef/HTML/FrameTarget.php create mode 100644 lib/htmlpurifier/HTMLPurifier/AttrTransform/BoolToCSS.php create mode 100644 lib/htmlpurifier/HTMLPurifier/AttrTransform/EnumToCSS.php create mode 100644 lib/htmlpurifier/HTMLPurifier/AttrTransform/ImgSpace.php create mode 100644 lib/htmlpurifier/HTMLPurifier/HTMLModule/Scripting.php create mode 100644 lib/htmlpurifier/HTMLPurifier/HTMLModule/Target.php diff --git a/lib/htmlpurifier/HTMLPurifier.php b/lib/htmlpurifier/HTMLPurifier.php index 5a0ce99d0b..3d538bca1a 100644 --- a/lib/htmlpurifier/HTMLPurifier.php +++ b/lib/htmlpurifier/HTMLPurifier.php @@ -22,7 +22,7 @@ */ /* - HTML Purifier 1.6.0 - Standards Compliant HTML Filtering + HTML Purifier 1.6.1 - Standards Compliant HTML Filtering Copyright (C) 2006 Edward Z. Yang This library is free software; you can redistribute it and/or @@ -64,7 +64,7 @@ require_once 'HTMLPurifier/Encoder.php'; class HTMLPurifier { - var $version = '1.6.0'; + var $version = '1.6.1'; var $config; var $filters; diff --git a/lib/htmlpurifier/HTMLPurifier/AttrDef/Enum.php b/lib/htmlpurifier/HTMLPurifier/AttrDef/Enum.php index 3246318f68..91a075f87a 100644 --- a/lib/htmlpurifier/HTMLPurifier/AttrDef/Enum.php +++ b/lib/htmlpurifier/HTMLPurifier/AttrDef/Enum.php @@ -5,6 +5,9 @@ require_once 'HTMLPurifier/AttrDef.php'; // Enum = Enumerated /** * Validates a keyword against a list of valid values. + * @warning The case-insensitive compare of this function uses PHP's + * built-in strtolower and ctype_lower functions, which may + * cause problems with international comparisons */ class HTMLPurifier_AttrDef_Enum extends HTMLPurifier_AttrDef { @@ -34,6 +37,7 @@ class HTMLPurifier_AttrDef_Enum extends HTMLPurifier_AttrDef function validate($string, $config, &$context) { $string = trim($string); if (!$this->case_sensitive) { + // we may want to do full case-insensitive libraries $string = ctype_lower($string) ? $string : strtolower($string); } $result = isset($this->valid_values[$string]); diff --git a/lib/htmlpurifier/HTMLPurifier/AttrDef/HTML/FrameTarget.php b/lib/htmlpurifier/HTMLPurifier/AttrDef/HTML/FrameTarget.php new file mode 100644 index 0000000000..5893bbfa0a --- /dev/null +++ b/lib/htmlpurifier/HTMLPurifier/AttrDef/HTML/FrameTarget.php @@ -0,0 +1,34 @@ +<?php + +HTMLPurifier_ConfigSchema::define( + 'Attr', 'AllowedFrameTargets', array(), 'lookup', + 'Lookup table of all allowed link frame targets. Some commonly used '. + 'link targets include _blank, _self, _parent and _top. Values should '. + 'be lowercase, as validation will be done in a case-sensitive manner '. + 'despite W3C\'s recommendation. XHTML 1.0 Strict does not permit '. + 'the target attribute so this directive will have no effect in that '. + 'doctype. XHTML 1.1 does not enable the Target module by default, you '. + 'will have to manually enable it (see the module documentation for more details.)' +); + +require_once 'HTMLPurifier/AttrDef/Enum.php'; + +/** + * Special-case enum attribute definition that lazy loads allowed frame targets + */ +class HTMLPurifier_AttrDef_HTML_FrameTarget extends HTMLPurifier_AttrDef_Enum +{ + + var $valid_values = false; // uninitialized value + var $case_sensitive = false; + + function HTMLPurifier_AttrDef_HTML_FrameTarget() {} + + function validate($string, $config, &$context) { + if ($this->valid_values === false) $this->valid_values = $config->get('Attr', 'AllowedFrameTargets'); + return parent::validate($string, $config, $context); + } + +} + +?> \ No newline at end of file diff --git a/lib/htmlpurifier/HTMLPurifier/AttrTransform.php b/lib/htmlpurifier/HTMLPurifier/AttrTransform.php index 3513669ae1..2fa07b4755 100644 --- a/lib/htmlpurifier/HTMLPurifier/AttrTransform.php +++ b/lib/htmlpurifier/HTMLPurifier/AttrTransform.php @@ -29,6 +29,30 @@ class HTMLPurifier_AttrTransform function transform($attr, $config, &$context) { trigger_error('Cannot call abstract function', E_USER_ERROR); } + + /** + * Prepends CSS properties to the style attribute, creating the + * attribute if it doesn't exist. + * @param $attr Attribute array to process (passed by reference) + * @param $css CSS to prepend + */ + function prependCSS(&$attr, $css) { + $attr['style'] = isset($attr['style']) ? $attr['style'] : ''; + $attr['style'] = $css . $attr['style']; + } + + /** + * Retrieves and removes an attribute + * @param $attr Attribute array to process (passed by reference) + * @param $key Key of attribute to confiscate + */ + function confiscateAttr(&$attr, $key) { + if (!isset($attr[$key])) return null; + $value = $attr[$key]; + unset($attr[$key]); + return $value; + } + } ?> \ No newline at end of file diff --git a/lib/htmlpurifier/HTMLPurifier/AttrTransform/BgColor.php b/lib/htmlpurifier/HTMLPurifier/AttrTransform/BgColor.php index abfd03427d..a7bb2b4564 100644 --- a/lib/htmlpurifier/HTMLPurifier/AttrTransform/BgColor.php +++ b/lib/htmlpurifier/HTMLPurifier/AttrTransform/BgColor.php @@ -12,12 +12,10 @@ extends HTMLPurifier_AttrTransform { if (!isset($attr['bgcolor'])) return $attr; - $bgcolor = $attr['bgcolor']; - unset($attr['bgcolor']); + $bgcolor = $this->confiscateAttr($attr, 'bgcolor'); // some validation should happen here - $attr['style'] = isset($attr['style']) ? $attr['style'] : ''; - $attr['style'] = "background-color:$bgcolor;" . $attr['style']; + $this->prependCSS($attr, "background-color:$bgcolor;"); return $attr; diff --git a/lib/htmlpurifier/HTMLPurifier/AttrTransform/BoolToCSS.php b/lib/htmlpurifier/HTMLPurifier/AttrTransform/BoolToCSS.php new file mode 100644 index 0000000000..f4a16a7f17 --- /dev/null +++ b/lib/htmlpurifier/HTMLPurifier/AttrTransform/BoolToCSS.php @@ -0,0 +1,39 @@ +<?php + +require_once 'HTMLPurifier/AttrTransform.php'; + +/** + * Pre-transform that changes converts a boolean attribute to fixed CSS + */ +class HTMLPurifier_AttrTransform_BoolToCSS +extends HTMLPurifier_AttrTransform { + + /** + * Name of boolean attribute that is trigger + */ + var $attr; + + /** + * CSS declarations to add to style, needs trailing semicolon + */ + var $css; + + /** + * @param $attr string attribute name to convert from + * @param $css string CSS declarations to add to style (needs semicolon) + */ + function HTMLPurifier_AttrTransform_BoolToCSS($attr, $css) { + $this->attr = $attr; + $this->css = $css; + } + + function transform($attr, $config, &$context) { + if (!isset($attr[$this->attr])) return $attr; + unset($attr[$this->attr]); + $this->prependCSS($attr, $this->css); + return $attr; + } + +} + +?> \ No newline at end of file diff --git a/lib/htmlpurifier/HTMLPurifier/AttrTransform/Border.php b/lib/htmlpurifier/HTMLPurifier/AttrTransform/Border.php index 0b745d3045..10c62e3c5b 100644 --- a/lib/htmlpurifier/HTMLPurifier/AttrTransform/Border.php +++ b/lib/htmlpurifier/HTMLPurifier/AttrTransform/Border.php @@ -5,22 +5,14 @@ require_once 'HTMLPurifier/AttrTransform.php'; /** * Pre-transform that changes deprecated border attribute to CSS. */ -class HTMLPurifier_AttrTransform_Border -extends HTMLPurifier_AttrTransform { +class HTMLPurifier_AttrTransform_Border extends HTMLPurifier_AttrTransform { function transform($attr, $config, &$context) { - if (!isset($attr['border'])) return $attr; - - $border_width = $attr['border']; - unset($attr['border']); + $border_width = $this->confiscateAttr($attr, 'border'); // some validation should happen here - - $attr['style'] = isset($attr['style']) ? $attr['style'] : ''; - $attr['style'] = "border:{$border_width}px solid;" . $attr['style']; - + $this->prependCSS($attr, "border:{$border_width}px solid;"); return $attr; - } } diff --git a/lib/htmlpurifier/HTMLPurifier/AttrTransform/EnumToCSS.php b/lib/htmlpurifier/HTMLPurifier/AttrTransform/EnumToCSS.php new file mode 100644 index 0000000000..ed4dfc32dd --- /dev/null +++ b/lib/htmlpurifier/HTMLPurifier/AttrTransform/EnumToCSS.php @@ -0,0 +1,60 @@ +<?php + +require_once 'HTMLPurifier/AttrTransform.php'; + +/** + * Generic pre-transform that converts an attribute with a fixed number of + * values (enumerated) to CSS. + */ +class HTMLPurifier_AttrTransform_EnumToCSS extends HTMLPurifier_AttrTransform { + + /** + * Name of attribute to transform from + */ + var $attr; + + /** + * Lookup array of attribute values to CSS + */ + var $enumToCSS = array(); + + /** + * Case sensitivity of the matching + * @warning Currently can only be guaranteed to work with ASCII + * values. + */ + var $caseSensitive = false; + + /** + * @param $attr String attribute name to transform from + * @param $enumToCSS Lookup array of attribute values to CSS + * @param $case_sensitive Boolean case sensitivity indicator, default false + */ + function HTMLPurifier_AttrTransform_EnumToCSS($attr, $enum_to_css, $case_sensitive = false) { + $this->attr = $attr; + $this->enumToCSS = $enum_to_css; + $this->caseSensitive = (bool) $case_sensitive; + } + + function transform($attr, $config, &$context) { + + if (!isset($attr[$this->attr])) return $attr; + + $value = trim($attr[$this->attr]); + unset($attr[$this->attr]); + + if (!$this->caseSensitive) $value = strtolower($value); + + if (!isset($this->enumToCSS[$value])) { + return $attr; + } + + $this->prependCSS($attr, $this->enumToCSS[$value]); + + return $attr; + + } + +} + +?> \ No newline at end of file diff --git a/lib/htmlpurifier/HTMLPurifier/AttrTransform/ImgSpace.php b/lib/htmlpurifier/HTMLPurifier/AttrTransform/ImgSpace.php new file mode 100644 index 0000000000..53c787e2c9 --- /dev/null +++ b/lib/htmlpurifier/HTMLPurifier/AttrTransform/ImgSpace.php @@ -0,0 +1,47 @@ +<?php + +require_once 'HTMLPurifier/AttrTransform.php'; + +/** + * Pre-transform that changes deprecated hspace and vspace attributes to CSS + */ +class HTMLPurifier_AttrTransform_ImgSpace +extends HTMLPurifier_AttrTransform { + + var $attr; + var $css = array( + 'hspace' => array('left', 'right'), + 'vspace' => array('top', 'bottom') + ); + + function HTMLPurifier_AttrTransform_ImgSpace($attr) { + $this->attr = $attr; + if (!isset($this->css[$attr])) { + trigger_error(htmlspecialchars($attr) . ' is not valid space attribute'); + } + } + + function transform($attr, $config, &$context) { + + if (!isset($attr[$this->attr])) return $attr; + + $width = $this->confiscateAttr($attr, $this->attr); + // some validation could happen here + + if (!isset($this->css[$this->attr])) return $attr; + + $style = ''; + foreach ($this->css[$this->attr] as $suffix) { + $property = "margin-$suffix"; + $style .= "$property:{$width}px;"; + } + + $this->prependCSS($attr, $style); + + return $attr; + + } + +} + +?> \ No newline at end of file diff --git a/lib/htmlpurifier/HTMLPurifier/AttrTransform/Length.php b/lib/htmlpurifier/HTMLPurifier/AttrTransform/Length.php index 16d3d1d8ca..2292aa133e 100644 --- a/lib/htmlpurifier/HTMLPurifier/AttrTransform/Length.php +++ b/lib/htmlpurifier/HTMLPurifier/AttrTransform/Length.php @@ -18,13 +18,9 @@ class HTMLPurifier_AttrTransform_Length extends HTMLPurifier_AttrTransform function transform($attr, $config, &$context) { if (!isset($attr[$this->name])) return $attr; - $length = $attr[$this->name]; - unset($attr[$this->name]); + $length = $this->confiscateAttr($attr, $this->name); if(ctype_digit($length)) $length .= 'px'; - - $attr['style'] = isset($attr['style']) ? $attr['style'] : ''; - $attr['style'] = $this->cssName . ":$length;" . $attr['style']; - + $this->prependCSS($attr, $this->cssName . ":$length;"); return $attr; } diff --git a/lib/htmlpurifier/HTMLPurifier/AttrTransform/Name.php b/lib/htmlpurifier/HTMLPurifier/AttrTransform/Name.php index 0f815b69e3..f14c147989 100644 --- a/lib/htmlpurifier/HTMLPurifier/AttrTransform/Name.php +++ b/lib/htmlpurifier/HTMLPurifier/AttrTransform/Name.php @@ -9,21 +9,11 @@ class HTMLPurifier_AttrTransform_Name extends HTMLPurifier_AttrTransform { function transform($attr, $config, &$context) { - if (!isset($attr['name'])) return $attr; - - $name = $attr['name']; - unset($attr['name']); - - if (isset($attr['id'])) { - // ID already set, discard name - return $attr; - } - - $attr['id'] = $name; - + $id = $this->confiscateAttr($attr, 'name'); + if ( isset($attr['id'])) return $attr; + $attr['id'] = $id; return $attr; - } } diff --git a/lib/htmlpurifier/HTMLPurifier/CSSDefinition.php b/lib/htmlpurifier/HTMLPurifier/CSSDefinition.php index 5de49b69b3..23a66ab76a 100644 --- a/lib/htmlpurifier/HTMLPurifier/CSSDefinition.php +++ b/lib/htmlpurifier/HTMLPurifier/CSSDefinition.php @@ -206,6 +206,9 @@ class HTMLPurifier_CSSDefinition new HTMLPurifier_AttrDef_CSS_Percentage() )); + // partial support + $this->info['white-space'] = new HTMLPurifier_AttrDef_Enum(array('nowrap')); + } } diff --git a/lib/htmlpurifier/HTMLPurifier/ConfigSchema.php b/lib/htmlpurifier/HTMLPurifier/ConfigSchema.php index 9f1f3e3eb3..940e8e6199 100644 --- a/lib/htmlpurifier/HTMLPurifier/ConfigSchema.php +++ b/lib/htmlpurifier/HTMLPurifier/ConfigSchema.php @@ -334,6 +334,10 @@ class HTMLPurifier_ConfigSchema { case 'hash': case 'lookup': if (is_string($var)) { + // special case: technically, this is an array with + // a single empty string item, but having an empty + // array is more intuitive + if ($var == '') return array(); // simplistic string to array method that only works // for simple lists of tag names or alphanumeric characters $var = explode(',',$var); diff --git a/lib/htmlpurifier/HTMLPurifier/ElementDef.php b/lib/htmlpurifier/HTMLPurifier/ElementDef.php index 21bc5f36a3..73c94abe13 100644 --- a/lib/htmlpurifier/HTMLPurifier/ElementDef.php +++ b/lib/htmlpurifier/HTMLPurifier/ElementDef.php @@ -95,7 +95,7 @@ class HTMLPurifier_ElementDef // later keys takes precedence foreach($def->attr as $k => $v) { - if ($k == 0) { + if ($k === 0) { // merge in the includes // sorry, no way to override an include foreach ($v as $v2) { diff --git a/lib/htmlpurifier/HTMLPurifier/HTMLDefinition.php b/lib/htmlpurifier/HTMLPurifier/HTMLDefinition.php index 3af445ceb0..c1dd6535c4 100644 --- a/lib/htmlpurifier/HTMLPurifier/HTMLDefinition.php +++ b/lib/htmlpurifier/HTMLPurifier/HTMLDefinition.php @@ -183,9 +183,18 @@ class HTMLPurifier_HTMLDefinition $this->manager->setup($this->config); foreach ($this->manager->activeModules as $module) { - foreach($module->info_tag_transform as $k => $v) $this->info_tag_transform[$k] = $v; - foreach($module->info_attr_transform_pre as $k => $v) $this->info_attr_transform_pre[$k] = $v; - foreach($module->info_attr_transform_post as $k => $v) $this->info_attr_transform_post[$k]= $v; + foreach($module->info_tag_transform as $k => $v) { + if ($v === false) unset($this->info_tag_transform[$k]); + else $this->info_tag_transform[$k] = $v; + } + foreach($module->info_attr_transform_pre as $k => $v) { + if ($v === false) unset($this->info_attr_transform_pre[$k]); + else $this->info_attr_transform_pre[$k] = $v; + } + foreach($module->info_attr_transform_post as $k => $v) { + if ($v === false) unset($this->info_attr_transform_post[$k]); + else $this->info_attr_transform_post[$k] = $v; + } } $this->info = $this->manager->getElements($this->config); diff --git a/lib/htmlpurifier/HTMLPurifier/HTMLModule/Bdo.php b/lib/htmlpurifier/HTMLPurifier/HTMLModule/Bdo.php index 17e5e987fd..6feae0050d 100644 --- a/lib/htmlpurifier/HTMLPurifier/HTMLModule/Bdo.php +++ b/lib/htmlpurifier/HTMLPurifier/HTMLModule/Bdo.php @@ -12,7 +12,6 @@ class HTMLPurifier_HTMLModule_Bdo extends HTMLPurifier_HTMLModule var $name = 'Bdo'; var $elements = array('bdo'); - var $info = array(); var $content_sets = array('Inline' => 'bdo'); var $attr_collections = array( 'I18N' => array('dir' => false) diff --git a/lib/htmlpurifier/HTMLPurifier/HTMLModule/Edit.php b/lib/htmlpurifier/HTMLPurifier/HTMLModule/Edit.php index 6a415906e6..c3dc019700 100644 --- a/lib/htmlpurifier/HTMLPurifier/HTMLModule/Edit.php +++ b/lib/htmlpurifier/HTMLPurifier/HTMLModule/Edit.php @@ -12,7 +12,6 @@ class HTMLPurifier_HTMLModule_Edit extends HTMLPurifier_HTMLModule var $name = 'Edit'; var $elements = array('del', 'ins'); - var $info = array(); var $content_sets = array('Inline' => 'del | ins'); function HTMLPurifier_HTMLModule_Edit() { diff --git a/lib/htmlpurifier/HTMLPurifier/HTMLModule/Hypertext.php b/lib/htmlpurifier/HTMLPurifier/HTMLModule/Hypertext.php index e285e8ba1f..baa20fd14b 100644 --- a/lib/htmlpurifier/HTMLPurifier/HTMLModule/Hypertext.php +++ b/lib/htmlpurifier/HTMLPurifier/HTMLModule/Hypertext.php @@ -11,7 +11,6 @@ class HTMLPurifier_HTMLModule_Hypertext extends HTMLPurifier_HTMLModule var $name = 'Hypertext'; var $elements = array('a'); - var $info = array(); var $content_sets = array('Inline' => 'a'); function HTMLPurifier_HTMLModule_Hypertext() { diff --git a/lib/htmlpurifier/HTMLPurifier/HTMLModule/Image.php b/lib/htmlpurifier/HTMLPurifier/HTMLModule/Image.php index 3852836de7..bf234b1372 100644 --- a/lib/htmlpurifier/HTMLPurifier/HTMLModule/Image.php +++ b/lib/htmlpurifier/HTMLPurifier/HTMLModule/Image.php @@ -15,7 +15,6 @@ class HTMLPurifier_HTMLModule_Image extends HTMLPurifier_HTMLModule var $name = 'Image'; var $elements = array('img'); - var $info = array(); var $content_sets = array('Inline' => 'img'); function HTMLPurifier_HTMLModule_Image() { diff --git a/lib/htmlpurifier/HTMLPurifier/HTMLModule/List.php b/lib/htmlpurifier/HTMLPurifier/HTMLModule/List.php index c74982df4e..f9f2c4e21f 100644 --- a/lib/htmlpurifier/HTMLPurifier/HTMLModule/List.php +++ b/lib/htmlpurifier/HTMLPurifier/HTMLModule/List.php @@ -10,7 +10,7 @@ class HTMLPurifier_HTMLModule_List extends HTMLPurifier_HTMLModule var $name = 'List'; var $elements = array('dl', 'dt', 'dd', 'ol', 'ul', 'li'); - var $info = array(); + // According to the abstract schema, the List content set is a fully formed // one or more expr, but it invariably occurs in an optional declaration // so we're not going to do that subtlety. It might cause trouble diff --git a/lib/htmlpurifier/HTMLPurifier/HTMLModule/Presentation.php b/lib/htmlpurifier/HTMLPurifier/HTMLModule/Presentation.php index 42d9c11e46..5c80db407b 100644 --- a/lib/htmlpurifier/HTMLPurifier/HTMLModule/Presentation.php +++ b/lib/htmlpurifier/HTMLPurifier/HTMLModule/Presentation.php @@ -17,7 +17,6 @@ class HTMLPurifier_HTMLModule_Presentation extends HTMLPurifier_HTMLModule var $name = 'Presentation'; var $elements = array('b', 'big', 'hr', 'i', 'small', 'sub', 'sup', 'tt'); - var $info = array(); var $content_sets = array( 'Block' => 'hr', 'Inline' => 'b | big | i | small | sub | sup | tt' diff --git a/lib/htmlpurifier/HTMLPurifier/HTMLModule/Scripting.php b/lib/htmlpurifier/HTMLPurifier/HTMLModule/Scripting.php new file mode 100644 index 0000000000..e3ef802bf4 --- /dev/null +++ b/lib/htmlpurifier/HTMLPurifier/HTMLModule/Scripting.php @@ -0,0 +1,67 @@ +<?php + +/* + +WARNING: THIS MODULE IS EXTREMELY DANGEROUS AS IT ENABLES INLINE SCRIPTING +INSIDE HTML PURIFIER DOCUMENTS. USE ONLY WITH TRUSTED USER INPUT!!! + +Usage: + +require_once 'HTMLPurifier/HTMLModule/Scripting.php'; +$def =& $config->getHTMLDefinition(true); // get the raw version +$def->manager->addModule('Scripting'); + +This must come before any other calls to getHTMLDefinition() + +*/ + +/** + * Implements required attribute stipulation for <script> + */ +class HTMLPurifier_AttrTransform_ScriptRequired extends HTMLPurifier_AttrTransform +{ + function transform($attr, $config, &$context) { + if (!isset($attr['type'])) { + $attr['type'] = 'text/javascript'; + } + return $attr; + } +} + +/** + * XHTML 1.1 Scripting module, defines elements that are used to contain + * information pertaining to executable scripts or the lack of support + * for executable scripts. + * @note This module does not contain inline scripting elements + */ +class HTMLPurifier_HTMLModule_Scripting extends HTMLPurifier_HTMLModule +{ + var $name = 'Scripting'; + var $elements = array('script', 'noscript'); + var $content_sets = array('Block' => 'script | noscript', 'Inline' => 'script | noscript'); + + function HTMLPurifier_HTMLModule_Scripting() { + // TODO: create custom child-definition for noscript that + // auto-wraps stray #PCDATA in a similar manner to + // blockquote's custom definition (we would use it but + // blockquote's contents are optional while noscript's contents + // are required) + foreach ($this->elements as $element) { + $this->info[$element] = new HTMLPurifier_ElementDef(); + } + $this->info['noscript']->attr = array( 0 => array('Common') ); + $this->info['noscript']->content_model = 'Heading | List | Block'; + $this->info['noscript']->content_model_type = 'required'; + $this->info['script']->attr = array( + 'defer' => new HTMLPurifier_AttrDef_Enum(array('defer')), + 'src' => new HTMLPurifier_AttrDef_URI(true), + 'type' => new HTMLPurifier_AttrDef_Enum(array('text/javascript')) + ); + $this->info['script']->content_model = '#PCDATA'; + $this->info['script']->content_model_type = 'optional'; + $this->info['script']->attr_transform_post['type'] = + new HTMLPurifier_AttrTransform_ScriptRequired(); + } +} + +?> \ No newline at end of file diff --git a/lib/htmlpurifier/HTMLPurifier/HTMLModule/Tables.php b/lib/htmlpurifier/HTMLPurifier/HTMLModule/Tables.php index ea41f5b103..003ff62487 100644 --- a/lib/htmlpurifier/HTMLPurifier/HTMLModule/Tables.php +++ b/lib/htmlpurifier/HTMLPurifier/HTMLModule/Tables.php @@ -12,7 +12,6 @@ class HTMLPurifier_HTMLModule_Tables extends HTMLPurifier_HTMLModule var $name = 'Tables'; var $elements = array('caption', 'table', 'td', 'th', 'tr', 'col', 'colgroup', 'tbody', 'thead', 'tfoot'); - var $info = array(); var $content_sets = array('Block' => 'table'); function HTMLPurifier_HTMLModule_Tables() { diff --git a/lib/htmlpurifier/HTMLPurifier/HTMLModule/Target.php b/lib/htmlpurifier/HTMLPurifier/HTMLModule/Target.php new file mode 100644 index 0000000000..1c2104bae8 --- /dev/null +++ b/lib/htmlpurifier/HTMLPurifier/HTMLModule/Target.php @@ -0,0 +1,26 @@ +<?php + +require_once 'HTMLPurifier/AttrDef/HTML/FrameTarget.php'; + +/** + * XHTML 1.1 Target Module, defines target attribute in link elements. + */ +class HTMLPurifier_HTMLModule_Target extends HTMLPurifier_HTMLModule +{ + + var $name = 'Target'; + var $elements = array('a'); + + function HTMLPurifier_HTMLModule_Target() { + foreach ($this->elements as $e) { + $this->info[$e] = new HTMLPurifier_ElementDef(); + $this->info[$e]->standalone = false; + $this->info[$e]->attr = array( + 'target' => new HTMLPurifier_AttrDef_HTML_FrameTarget() + ); + } + } + +} + +?> \ No newline at end of file diff --git a/lib/htmlpurifier/HTMLPurifier/HTMLModule/Text.php b/lib/htmlpurifier/HTMLPurifier/HTMLModule/Text.php index bac05986c6..6f81dcf389 100644 --- a/lib/htmlpurifier/HTMLPurifier/HTMLModule/Text.php +++ b/lib/htmlpurifier/HTMLPurifier/HTMLModule/Text.php @@ -22,8 +22,6 @@ class HTMLPurifier_HTMLModule_Text extends HTMLPurifier_HTMLModule 'h4', 'h5', 'h6', 'kbd', 'p', 'pre', 'q', 'samp', 'span', 'strong', 'var', 'nolink', 'tex', 'algebra'); //moodle modification - var $info = array(); - var $content_sets = array( 'Heading' => 'h1 | h2 | h3 | h4 | h5 | h6', 'Block' => 'address | blockquote | div | p | pre | nolink | tex | algebra', //moodle modification diff --git a/lib/htmlpurifier/HTMLPurifier/HTMLModule/TransformToStrict.php b/lib/htmlpurifier/HTMLPurifier/HTMLModule/TransformToStrict.php index cdbe3733f2..0b6c8370ab 100644 --- a/lib/htmlpurifier/HTMLPurifier/HTMLModule/TransformToStrict.php +++ b/lib/htmlpurifier/HTMLPurifier/HTMLModule/TransformToStrict.php @@ -7,11 +7,13 @@ require_once 'HTMLPurifier/TagTransform/Center.php'; require_once 'HTMLPurifier/TagTransform/Font.php'; require_once 'HTMLPurifier/AttrTransform/Lang.php'; -require_once 'HTMLPurifier/AttrTransform/TextAlign.php'; require_once 'HTMLPurifier/AttrTransform/BgColor.php'; +require_once 'HTMLPurifier/AttrTransform/BoolToCSS.php'; require_once 'HTMLPurifier/AttrTransform/Border.php'; require_once 'HTMLPurifier/AttrTransform/Name.php'; require_once 'HTMLPurifier/AttrTransform/Length.php'; +require_once 'HTMLPurifier/AttrTransform/ImgSpace.php'; +require_once 'HTMLPurifier/AttrTransform/EnumToCSS.php'; /** * Proprietary module that transforms deprecated elements into Strict @@ -25,7 +27,8 @@ class HTMLPurifier_HTMLModule_TransformToStrict extends HTMLPurifier_HTMLModule // we're actually modifying these elements, not defining them var $elements = array('h1', 'h2', 'h3', 'h4', 'h5', 'h6', 'p', - 'blockquote', 'table', 'td', 'th', 'tr', 'img', 'a', 'hr'); + 'blockquote', 'table', 'td', 'th', 'tr', 'img', 'a', 'hr', 'br', + 'caption', 'ul', 'ol', 'li'); var $info_tag_transform = array( // placeholders, see constructor for definitions @@ -47,6 +50,13 @@ class HTMLPurifier_HTMLModule_TransformToStrict extends HTMLPurifier_HTMLModule function HTMLPurifier_HTMLModule_TransformToStrict() { + // behavior with transformations when there's another CSS property + // working on it is interesting: the CSS will *always* override + // the deprecated attribute, whereas an inline CSS declaration will + // override the corresponding declaration in, say, an external + // stylesheet. This behavior won't affect most people, but it + // does represent an operational difference we CANNOT fix. + // deprecated tag transforms $this->info_tag_transform['font'] = new HTMLPurifier_TagTransform_Font(); $this->info_tag_transform['menu'] = new HTMLPurifier_TagTransform_Simple('ul'); @@ -59,6 +69,11 @@ class HTMLPurifier_HTMLModule_TransformToStrict extends HTMLPurifier_HTMLModule } // deprecated attribute transforms + + // align battery + $align_lookup = array(); + $align_values = array('left', 'right', 'center', 'justify'); + foreach ($align_values as $v) $align_lookup[$v] = "text-align:$v;"; $this->info['h1']->attr_transform_pre['align'] = $this->info['h2']->attr_transform_pre['align'] = $this->info['h3']->attr_transform_pre['align'] = @@ -66,7 +81,7 @@ class HTMLPurifier_HTMLModule_TransformToStrict extends HTMLPurifier_HTMLModule $this->info['h5']->attr_transform_pre['align'] = $this->info['h6']->attr_transform_pre['align'] = $this->info['p'] ->attr_transform_pre['align'] = - new HTMLPurifier_AttrTransform_TextAlign(); + new HTMLPurifier_AttrTransform_EnumToCSS('align', $align_lookup); // xml:lang <=> lang mirroring, implement in TransformToStrict, // this is overridden in TransformToXHTML11 @@ -92,9 +107,86 @@ class HTMLPurifier_HTMLModule_TransformToStrict extends HTMLPurifier_HTMLModule $this->info['th']->attr_transform_pre['width'] = $this->info['hr']->attr_transform_pre['width'] = new HTMLPurifier_AttrTransform_Length('width'); + $this->info['td']->attr_transform_pre['nowrap'] = + $this->info['th']->attr_transform_pre['nowrap'] = new HTMLPurifier_AttrTransform_BoolToCSS('nowrap', 'white-space:nowrap;'); + $this->info['td']->attr_transform_pre['height'] = $this->info['th']->attr_transform_pre['height'] = new HTMLPurifier_AttrTransform_Length('height'); + $this->info['img']->attr_transform_pre['hspace'] = new HTMLPurifier_AttrTransform_ImgSpace('hspace'); + $this->info['img']->attr_transform_pre['vspace'] = new HTMLPurifier_AttrTransform_ImgSpace('vspace'); + + $this->info['hr']->attr_transform_pre['size'] = new HTMLPurifier_AttrTransform_Length('size', 'height'); + + // this transformation is not precise but often good enough. + // different browsers use different styles to designate noshade + $this->info['hr']->attr_transform_pre['noshade'] = new HTMLPurifier_AttrTransform_BoolToCSS('noshade', 'color:#808080;background-color:#808080;border: 0;'); + + $this->info['br']->attr_transform_pre['clear'] = + new HTMLPurifier_AttrTransform_EnumToCSS('clear', array( + 'left' => 'clear:left;', + 'right' => 'clear:right;', + 'all' => 'clear:both;', + 'none' => 'clear:none;', + )); + + // this is a slightly unreasonable attribute + $this->info['caption']->attr_transform_pre['align'] = + new HTMLPurifier_AttrTransform_EnumToCSS('align', array( + // we're following IE's behavior, not Firefox's, due + // to the fact that no one supports caption-side:right, + // W3C included (with CSS 2.1) + 'left' => 'text-align:left;', + 'right' => 'text-align:right;', + 'top' => 'caption-side:top;', + 'bottom' => 'caption-side:bottom;' // not supported by IE + )); + + $this->info['table']->attr_transform_pre['align'] = + new HTMLPurifier_AttrTransform_EnumToCSS('align', array( + 'left' => 'float:left;', + 'center' => 'margin-left:auto;margin-right:auto;', + 'right' => 'float:right;' + )); + + $this->info['img']->attr_transform_pre['align'] = + new HTMLPurifier_AttrTransform_EnumToCSS('align', array( + 'left' => 'float:left;', + 'right' => 'float:right;', + 'top' => 'vertical-align:top;', + 'middle' => 'vertical-align:middle;', + 'bottom' => 'vertical-align:baseline;', + )); + + $this->info['hr']->attr_transform_pre['align'] = + new HTMLPurifier_AttrTransform_EnumToCSS('align', array( + 'left' => 'margin-left:0;margin-right:auto;text-align:left;', + 'center' => 'margin-left:auto;margin-right:auto;text-align:center;', + 'right' => 'margin-left:auto;margin-right:0;text-align:right;' + )); + + $ul_types = array( + 'disc' => 'list-style-type:disc;', + 'square' => 'list-style-type:square;', + 'circle' => 'list-style-type:circle;' + ); + $ol_types = array( + '1' => 'list-style-type:decimal;', + 'i' => 'list-style-type:lower-roman;', + 'I' => 'list-style-type:upper-roman;', + 'a' => 'list-style-type:lower-alpha;', + 'A' => 'list-style-type:upper-alpha;' + ); + $li_types = $ul_types + $ol_types; + + $this->info['ul']->attr_transform_pre['type'] = + new HTMLPurifier_AttrTransform_EnumToCSS('type', $ul_types); + $this->info['ol']->attr_transform_pre['type'] = + new HTMLPurifier_AttrTransform_EnumToCSS('type', $ol_types, true); + $this->info['li']->attr_transform_pre['type'] = + new HTMLPurifier_AttrTransform_EnumToCSS('type', $li_types, true); + + } var $defines_child_def = true; diff --git a/lib/htmlpurifier/HTMLPurifier/HTMLModule/TransformToXHTML11.php b/lib/htmlpurifier/HTMLPurifier/HTMLModule/TransformToXHTML11.php index 0915f5b6e5..68aac61312 100644 --- a/lib/htmlpurifier/HTMLPurifier/HTMLModule/TransformToXHTML11.php +++ b/lib/htmlpurifier/HTMLPurifier/HTMLModule/TransformToXHTML11.php @@ -1,5 +1,7 @@ <?php +require_once 'HTMLPurifier/AttrTransform/Lang.php'; + /** * Proprietary module that transforms XHTML 1.0 deprecated aspects into * XHTML 1.1 compliant ones, when possible. For maximum effectiveness, @@ -25,6 +27,10 @@ class HTMLPurifier_HTMLModule_TransformToXHTML11 extends HTMLPurifier_HTMLModule 'lang' => false // remove it ); + function HTMLPurifier_HTMLModule_TransformToXHTML11() { + $this->info_attr_transform_pre['lang'] = new HTMLPurifier_AttrTransform_Lang(); + } + } ?> \ No newline at end of file diff --git a/lib/htmlpurifier/HTMLPurifier/HTMLModuleManager.php b/lib/htmlpurifier/HTMLPurifier/HTMLModuleManager.php index e0090472ca..81ef13a5f4 100644 --- a/lib/htmlpurifier/HTMLPurifier/HTMLModuleManager.php +++ b/lib/htmlpurifier/HTMLPurifier/HTMLModuleManager.php @@ -22,6 +22,7 @@ require_once 'HTMLPurifier/HTMLModule/Tables.php'; require_once 'HTMLPurifier/HTMLModule/Image.php'; require_once 'HTMLPurifier/HTMLModule/StyleAttribute.php'; require_once 'HTMLPurifier/HTMLModule/Legacy.php'; +require_once 'HTMLPurifier/HTMLModule/Target.php'; // proprietary modules require_once 'HTMLPurifier/HTMLModule/TransformToStrict.php'; @@ -134,6 +135,7 @@ class HTMLPurifier_HTMLModuleManager 'CommonAttributes', 'Text', 'Hypertext', 'List', 'Presentation', 'Edit', 'Bdo', 'Tables', 'Image', 'StyleAttribute', + 'Target', // define-redefine 'Legacy', // redefine @@ -155,7 +157,7 @@ class HTMLPurifier_HTMLModuleManager 'HTML 4.01 Transitional' => array(array('XHTML 1.0 Transitional')), 'HTML 4.01 Strict' => array(array('XHTML 1.0 Strict')), // XHTML definitions - 'XHTML 1.0 Transitional' => array( array('XHTML 1.0 Strict'), 'Legacy' ), + 'XHTML 1.0 Transitional' => array( array('XHTML 1.0 Strict'), 'Legacy', 'Target' ), 'XHTML 1.0 Strict' => array(array('_Common')), 'XHTML 1.1' => array(array('_Common')), ); @@ -206,20 +208,35 @@ class HTMLPurifier_HTMLModuleManager * @param $module Mixed: string module name, with or without * HTMLPurifier_HTMLModule prefix, or instance of * subclass of HTMLPurifier_HTMLModule. + * @note This function will not call autoload, you must instantiate + * (and thus invoke) autoload outside the method. + * @note If a string is passed as a module name, different variants + * will be tested in this order: + * - Check for HTMLPurifier_HTMLModule_$name + * - Check all prefixes with $name in order they were added + * - Check for literal object name + * - Throw fatal error + * If your object name collides with an internal class, specify + * your module manually. */ function addModule($module) { if (is_string($module)) { $original_module = $module; - if (!class_exists($module)) { - foreach ($this->prefixes as $prefix) { - $module = $prefix . $original_module; - if (class_exists($module)) break; + $ok = false; + foreach ($this->prefixes as $prefix) { + $module = $prefix . $original_module; + if ($this->_classExists($module)) { + $ok = true; + break; } } - if (!class_exists($module)) { - trigger_error($original_module . ' module does not exist', - E_USER_ERROR); - return; + if (!$ok) { + $module = $original_module; + if (!$this->_classExists($module)) { + trigger_error($original_module . ' module does not exist', + E_USER_ERROR); + return; + } } $module = new $module(); } @@ -230,6 +247,23 @@ class HTMLPurifier_HTMLModuleManager } } + /** + * Safely tests for class existence without invoking __autoload in PHP5 + * @param $name String class name to test + * @private + */ + function _classExists($name) { + static $is_php_4 = null; + if ($is_php_4 === null) { + $is_php_4 = version_compare(PHP_VERSION, '5', '<'); + } + if ($is_php_4) { + return class_exists($name); + } else { + return class_exists($name, false); + } + } + /** * Makes a collection active, while also making it valid if not * already done so. See $activeModules for the semantics of "active". @@ -491,7 +525,8 @@ class HTMLPurifier_HTMLModuleManager $elements = array(); foreach ($this->activeModules as $module) { - foreach ($module->elements as $name) { + foreach ($module->info as $name => $v) { + if (isset($elements[$name])) continue; $elements[$name] = $this->getElement($name, $config); } } @@ -555,4 +590,4 @@ class HTMLPurifier_HTMLModuleManager } -?> \ No newline at end of file +?> diff --git a/lib/htmlpurifier/HTMLPurifier/Lexer/DirectLex.php b/lib/htmlpurifier/HTMLPurifier/Lexer/DirectLex.php index 65d95a7cf9..57d116a4fb 100644 --- a/lib/htmlpurifier/HTMLPurifier/Lexer/DirectLex.php +++ b/lib/htmlpurifier/HTMLPurifier/Lexer/DirectLex.php @@ -110,6 +110,23 @@ class HTMLPurifier_Lexer_DirectLex extends HTMLPurifier_Lexer continue; } + // Check leading character is alnum, if not, we may + // have accidently grabbed an emoticon. Translate into + // text and go our merry way + if (!ctype_alnum($segment[0])) { + $array[] = new + HTMLPurifier_Token_Text( + '<' . + $this->parseData( + $segment + ) . + '>' + ); + $cursor = $position_next_gt + 1; + $inside_tag = false; + continue; + } + // Check if it is explicitly self closing, if so, remove // trailing slash. Remember, we could have a tag like <br>, so // any later token processing scripts must convert improperly diff --git a/lib/htmlpurifier/HTMLPurifier/Strategy/RemoveForeignElements.php b/lib/htmlpurifier/HTMLPurifier/Strategy/RemoveForeignElements.php index 27caf3645f..cb5c4dd1b3 100644 --- a/lib/htmlpurifier/HTMLPurifier/Strategy/RemoveForeignElements.php +++ b/lib/htmlpurifier/HTMLPurifier/Strategy/RemoveForeignElements.php @@ -29,6 +29,7 @@ class HTMLPurifier_Strategy_RemoveForeignElements extends HTMLPurifier_Strategy $generator = new HTMLPurifier_Generator(); $result = array(); $escape_invalid_tags = $config->get('Core', 'EscapeInvalidTags'); + $remove_invalid_img = $config->get('Core', 'RemoveInvalidImg'); foreach($tokens as $token) { if (!empty( $token->is_tag )) { // DEFINITION CALL @@ -37,7 +38,7 @@ class HTMLPurifier_Strategy_RemoveForeignElements extends HTMLPurifier_Strategy // hard-coded image special case, pre-emptively drop // if not available. Probably not abstract-able - if ( $token->name == 'img' ) { + if ( $token->name == 'img' && $remove_invalid_img ) { if (!isset($token->attr['src'])) { continue; } diff --git a/lib/htmlpurifier/HTMLPurifier/TagTransform/Font.php b/lib/htmlpurifier/HTMLPurifier/TagTransform/Font.php index ae6d783809..dedaf8b245 100644 --- a/lib/htmlpurifier/HTMLPurifier/TagTransform/Font.php +++ b/lib/htmlpurifier/HTMLPurifier/TagTransform/Font.php @@ -20,6 +20,7 @@ class HTMLPurifier_TagTransform_Font extends HTMLPurifier_TagTransform var $transform_to = 'span'; var $_size_lookup = array( + '0' => 'xx-small', '1' => 'xx-small', '2' => 'small', '3' => 'medium', @@ -28,9 +29,10 @@ class HTMLPurifier_TagTransform_Font extends HTMLPurifier_TagTransform '6' => 'xx-large', '7' => '300%', '-1' => 'smaller', - '+1' => 'larger', '-2' => '60%', + '+1' => 'larger', '+2' => '150%', + '+3' => '200%', '+4' => '300%' ); @@ -58,6 +60,15 @@ class HTMLPurifier_TagTransform_Font extends HTMLPurifier_TagTransform // handle size transform if (isset($attr['size'])) { + // normalize large numbers + if ($attr['size']{0} == '+' || $attr['size']{0} == '-') { + $size = (int) $attr['size']; + if ($size < -2) $attr['size'] = '-2'; + if ($size > 4) $attr['size'] = '+4'; + } else { + $size = (int) $attr['size']; + if ($size > 7) $attr['size'] = '7'; + } if (isset($this->_size_lookup[$attr['size']])) { $prepend_style .= 'font-size:' . $this->_size_lookup[$attr['size']] . ';'; diff --git a/lib/htmlpurifier/readme_moodle.txt b/lib/htmlpurifier/readme_moodle.txt index 53cf1070b1..34af69a1c1 100644 --- a/lib/htmlpurifier/readme_moodle.txt +++ b/lib/htmlpurifier/readme_moodle.txt @@ -1,4 +1,4 @@ -Description of HTML Purifier v1.6.0 library import into Moodle +Description of HTML Purifier v1.6.1 library import into Moodle Changes: * Text.php - added nolink, tex and algebra tags -- 2.39.5