From e0ac8448c79df889c91e2bb454fd1a9dafbcdd53 Mon Sep 17 00:00:00 2001 From: skodak Date: Wed, 18 Apr 2007 21:52:03 +0000 Subject: [PATCH] MDL-9151 HTML Purifier cleaning support - enable switch is in experimental section MDL-9435 Reviewved url cleaning in redirect() --- admin/settings/misc.php | 1 + lang/en_utf8/admin.php | 2 + lang/en_utf8/docs/credits.html | 11 + lib/htmlpurifier/CREDITS | 7 + lib/htmlpurifier/HTMLPurifier.auto.php | 10 + lib/htmlpurifier/HTMLPurifier.func.php | 21 + lib/htmlpurifier/HTMLPurifier.php | 170 ++++++ .../HTMLPurifier/AttrCollections.php | 100 ++++ lib/htmlpurifier/HTMLPurifier/AttrDef.php | 67 +++ lib/htmlpurifier/HTMLPurifier/AttrDef/CSS.php | 69 +++ .../HTMLPurifier/AttrDef/CSS/Background.php | 87 +++ .../AttrDef/CSS/BackgroundPosition.php | 130 ++++ .../HTMLPurifier/AttrDef/CSS/Border.php | 45 ++ .../HTMLPurifier/AttrDef/CSS/Color.php | 97 +++ .../HTMLPurifier/AttrDef/CSS/Composite.php | 38 ++ .../HTMLPurifier/AttrDef/CSS/Font.php | 154 +++++ .../HTMLPurifier/AttrDef/CSS/FontFamily.php | 66 +++ .../HTMLPurifier/AttrDef/CSS/Length.php | 56 ++ .../HTMLPurifier/AttrDef/CSS/ListStyle.php | 80 +++ .../HTMLPurifier/AttrDef/CSS/Multiple.php | 58 ++ .../HTMLPurifier/AttrDef/CSS/Number.php | 61 ++ .../HTMLPurifier/AttrDef/CSS/Percentage.php | 43 ++ .../AttrDef/CSS/TextDecoration.php | 41 ++ .../HTMLPurifier/AttrDef/CSS/URI.php | 58 ++ .../HTMLPurifier/AttrDef/Enum.php | 46 ++ .../HTMLPurifier/AttrDef/HTML/ID.php | 121 ++++ .../HTMLPurifier/AttrDef/HTML/Length.php | 44 ++ .../HTMLPurifier/AttrDef/HTML/LinkTypes.php | 75 +++ .../HTMLPurifier/AttrDef/HTML/MultiLength.php | 44 ++ .../HTMLPurifier/AttrDef/HTML/Nmtokens.php | 51 ++ .../HTMLPurifier/AttrDef/HTML/Pixels.php | 37 ++ .../HTMLPurifier/AttrDef/Integer.php | 75 +++ .../HTMLPurifier/AttrDef/Lang.php | 75 +++ .../HTMLPurifier/AttrDef/Text.php | 17 + lib/htmlpurifier/HTMLPurifier/AttrDef/URI.php | 296 ++++++++++ .../HTMLPurifier/AttrDef/URI/Email.php | 17 + .../AttrDef/URI/Email/SimpleCheck.php | 23 + .../HTMLPurifier/AttrDef/URI/Host.php | 54 ++ .../HTMLPurifier/AttrDef/URI/IPv4.php | 36 ++ .../HTMLPurifier/AttrDef/URI/IPv6.php | 99 ++++ .../HTMLPurifier/AttrTransform.php | 34 ++ .../HTMLPurifier/AttrTransform/BdoDir.php | 31 + .../HTMLPurifier/AttrTransform/BgColor.php | 28 + .../HTMLPurifier/AttrTransform/Border.php | 28 + .../AttrTransform/ImgRequired.php | 50 ++ .../HTMLPurifier/AttrTransform/Lang.php | 30 + .../HTMLPurifier/AttrTransform/Length.php | 33 ++ .../HTMLPurifier/AttrTransform/Name.php | 31 + .../HTMLPurifier/AttrTransform/TextAlign.php | 36 ++ lib/htmlpurifier/HTMLPurifier/AttrTypes.php | 41 ++ .../HTMLPurifier/CSSDefinition.php | 213 +++++++ lib/htmlpurifier/HTMLPurifier/ChildDef.php | 55 ++ .../HTMLPurifier/ChildDef/Chameleon.php | 51 ++ .../HTMLPurifier/ChildDef/Custom.php | 75 +++ .../HTMLPurifier/ChildDef/Empty.php | 22 + .../HTMLPurifier/ChildDef/Optional.php | 23 + .../HTMLPurifier/ChildDef/Required.php | 107 ++++ .../ChildDef/StrictBlockquote.php | 76 +++ .../HTMLPurifier/ChildDef/Table.php | 142 +++++ lib/htmlpurifier/HTMLPurifier/Config.php | 222 +++++++ lib/htmlpurifier/HTMLPurifier/ConfigDef.php | 10 + .../HTMLPurifier/ConfigDef/Directive.php | 74 +++ .../HTMLPurifier/ConfigDef/DirectiveAlias.php | 27 + .../HTMLPurifier/ConfigDef/Namespace.php | 23 + .../HTMLPurifier/ConfigSchema.php | 386 ++++++++++++ lib/htmlpurifier/HTMLPurifier/ContentSets.php | 148 +++++ lib/htmlpurifier/HTMLPurifier/Context.php | 76 +++ lib/htmlpurifier/HTMLPurifier/ElementDef.php | 122 ++++ lib/htmlpurifier/HTMLPurifier/Encoder.php | 403 +++++++++++++ .../HTMLPurifier/EntityLookup.php | 46 ++ .../HTMLPurifier/EntityLookup/entities.ser | 1 + .../HTMLPurifier/EntityParser.php | 158 +++++ lib/htmlpurifier/HTMLPurifier/Error.php | 8 + lib/htmlpurifier/HTMLPurifier/Filter.php | 39 ++ .../HTMLPurifier/Filter/YouTube.php | 34 ++ lib/htmlpurifier/HTMLPurifier/Generator.php | 158 +++++ .../HTMLPurifier/HTMLDefinition.php | 281 +++++++++ lib/htmlpurifier/HTMLPurifier/HTMLModule.php | 125 ++++ .../HTMLPurifier/HTMLModule/Bdo.php | 43 ++ .../HTMLModule/CommonAttributes.php | 31 + .../HTMLPurifier/HTMLModule/Edit.php | 46 ++ .../HTMLPurifier/HTMLModule/Hypertext.php | 37 ++ .../HTMLPurifier/HTMLModule/Image.php | 38 ++ .../HTMLPurifier/HTMLModule/Legacy.php | 60 ++ .../HTMLPurifier/HTMLModule/List.php | 46 ++ .../HTMLPurifier/HTMLModule/Presentation.php | 41 ++ .../HTMLModule/StyleAttribute.php | 27 + .../HTMLPurifier/HTMLModule/Tables.php | 88 +++ .../HTMLPurifier/HTMLModule/Text.php | 78 +++ .../HTMLModule/TransformToStrict.php | 108 ++++ .../HTMLModule/TransformToXHTML11.php | 30 + .../HTMLPurifier/HTMLModuleManager.php | 558 ++++++++++++++++++ .../HTMLPurifier/IDAccumulator.php | 42 ++ lib/htmlpurifier/HTMLPurifier/Language.php | 56 ++ .../Language/classes/en-x-test.php | 12 + .../Language/messages/en-x-test.php | 11 + .../HTMLPurifier/Language/messages/en.php | 12 + .../HTMLPurifier/LanguageFactory.php | 196 ++++++ lib/htmlpurifier/HTMLPurifier/Lexer.php | 237 ++++++++ .../HTMLPurifier/Lexer/DOMLex.php | 152 +++++ .../HTMLPurifier/Lexer/DirectLex.php | 309 ++++++++++ .../HTMLPurifier/Lexer/PEARSax3.php | 110 ++++ .../HTMLPurifier/PercentEncoder.php | 47 ++ lib/htmlpurifier/HTMLPurifier/Printer.php | 149 +++++ .../HTMLPurifier/Printer/CSSDefinition.php | 40 ++ .../HTMLPurifier/Printer/HTMLDefinition.php | 210 +++++++ lib/htmlpurifier/HTMLPurifier/Strategy.php | 33 ++ .../HTMLPurifier/Strategy/Composite.php | 30 + .../HTMLPurifier/Strategy/Core.php | 25 + .../HTMLPurifier/Strategy/FixNesting.php | 292 +++++++++ .../HTMLPurifier/Strategy/MakeWellFormed.php | 158 +++++ .../Strategy/RemoveForeignElements.php | 86 +++ .../Strategy/ValidateAttributes.php | 129 ++++ .../HTMLPurifier/TagTransform.php | 29 + .../HTMLPurifier/TagTransform/Center.php | 34 ++ .../HTMLPurifier/TagTransform/Font.php | 83 +++ .../HTMLPurifier/TagTransform/Simple.php | 26 + lib/htmlpurifier/HTMLPurifier/Token.php | 168 ++++++ .../HTMLPurifier/TokenFactory.php | 96 +++ lib/htmlpurifier/HTMLPurifier/URIScheme.php | 44 ++ .../HTMLPurifier/URIScheme/ftp.php | 45 ++ .../HTMLPurifier/URIScheme/http.php | 24 + .../HTMLPurifier/URIScheme/https.php | 14 + .../HTMLPurifier/URIScheme/mailto.php | 30 + .../HTMLPurifier/URIScheme/news.php | 24 + .../HTMLPurifier/URIScheme/nntp.php | 24 + .../HTMLPurifier/URISchemeRegistry.php | 104 ++++ lib/htmlpurifier/readme_moodle.txt | 8 + lib/weblib.php | 60 +- 129 files changed, 10389 insertions(+), 20 deletions(-) create mode 100644 lib/htmlpurifier/CREDITS create mode 100644 lib/htmlpurifier/HTMLPurifier.auto.php create mode 100644 lib/htmlpurifier/HTMLPurifier.func.php create mode 100644 lib/htmlpurifier/HTMLPurifier.php create mode 100644 lib/htmlpurifier/HTMLPurifier/AttrCollections.php create mode 100644 lib/htmlpurifier/HTMLPurifier/AttrDef.php create mode 100644 lib/htmlpurifier/HTMLPurifier/AttrDef/CSS.php create mode 100644 lib/htmlpurifier/HTMLPurifier/AttrDef/CSS/Background.php create mode 100644 lib/htmlpurifier/HTMLPurifier/AttrDef/CSS/BackgroundPosition.php create mode 100644 lib/htmlpurifier/HTMLPurifier/AttrDef/CSS/Border.php create mode 100644 lib/htmlpurifier/HTMLPurifier/AttrDef/CSS/Color.php create mode 100644 lib/htmlpurifier/HTMLPurifier/AttrDef/CSS/Composite.php create mode 100644 lib/htmlpurifier/HTMLPurifier/AttrDef/CSS/Font.php create mode 100644 lib/htmlpurifier/HTMLPurifier/AttrDef/CSS/FontFamily.php create mode 100644 lib/htmlpurifier/HTMLPurifier/AttrDef/CSS/Length.php create mode 100644 lib/htmlpurifier/HTMLPurifier/AttrDef/CSS/ListStyle.php create mode 100644 lib/htmlpurifier/HTMLPurifier/AttrDef/CSS/Multiple.php create mode 100644 lib/htmlpurifier/HTMLPurifier/AttrDef/CSS/Number.php create mode 100644 lib/htmlpurifier/HTMLPurifier/AttrDef/CSS/Percentage.php create mode 100644 lib/htmlpurifier/HTMLPurifier/AttrDef/CSS/TextDecoration.php create mode 100644 lib/htmlpurifier/HTMLPurifier/AttrDef/CSS/URI.php create mode 100644 lib/htmlpurifier/HTMLPurifier/AttrDef/Enum.php create mode 100644 lib/htmlpurifier/HTMLPurifier/AttrDef/HTML/ID.php create mode 100644 lib/htmlpurifier/HTMLPurifier/AttrDef/HTML/Length.php create mode 100644 lib/htmlpurifier/HTMLPurifier/AttrDef/HTML/LinkTypes.php create mode 100644 lib/htmlpurifier/HTMLPurifier/AttrDef/HTML/MultiLength.php create mode 100644 lib/htmlpurifier/HTMLPurifier/AttrDef/HTML/Nmtokens.php create mode 100644 lib/htmlpurifier/HTMLPurifier/AttrDef/HTML/Pixels.php create mode 100644 lib/htmlpurifier/HTMLPurifier/AttrDef/Integer.php create mode 100644 lib/htmlpurifier/HTMLPurifier/AttrDef/Lang.php create mode 100644 lib/htmlpurifier/HTMLPurifier/AttrDef/Text.php create mode 100644 lib/htmlpurifier/HTMLPurifier/AttrDef/URI.php create mode 100644 lib/htmlpurifier/HTMLPurifier/AttrDef/URI/Email.php create mode 100644 lib/htmlpurifier/HTMLPurifier/AttrDef/URI/Email/SimpleCheck.php create mode 100644 lib/htmlpurifier/HTMLPurifier/AttrDef/URI/Host.php create mode 100644 lib/htmlpurifier/HTMLPurifier/AttrDef/URI/IPv4.php create mode 100644 lib/htmlpurifier/HTMLPurifier/AttrDef/URI/IPv6.php create mode 100644 lib/htmlpurifier/HTMLPurifier/AttrTransform.php create mode 100644 lib/htmlpurifier/HTMLPurifier/AttrTransform/BdoDir.php create mode 100644 lib/htmlpurifier/HTMLPurifier/AttrTransform/BgColor.php create mode 100644 lib/htmlpurifier/HTMLPurifier/AttrTransform/Border.php create mode 100644 lib/htmlpurifier/HTMLPurifier/AttrTransform/ImgRequired.php create mode 100644 lib/htmlpurifier/HTMLPurifier/AttrTransform/Lang.php create mode 100644 lib/htmlpurifier/HTMLPurifier/AttrTransform/Length.php create mode 100644 lib/htmlpurifier/HTMLPurifier/AttrTransform/Name.php create mode 100644 lib/htmlpurifier/HTMLPurifier/AttrTransform/TextAlign.php create mode 100644 lib/htmlpurifier/HTMLPurifier/AttrTypes.php create mode 100644 lib/htmlpurifier/HTMLPurifier/CSSDefinition.php create mode 100644 lib/htmlpurifier/HTMLPurifier/ChildDef.php create mode 100644 lib/htmlpurifier/HTMLPurifier/ChildDef/Chameleon.php create mode 100644 lib/htmlpurifier/HTMLPurifier/ChildDef/Custom.php create mode 100644 lib/htmlpurifier/HTMLPurifier/ChildDef/Empty.php create mode 100644 lib/htmlpurifier/HTMLPurifier/ChildDef/Optional.php create mode 100644 lib/htmlpurifier/HTMLPurifier/ChildDef/Required.php create mode 100644 lib/htmlpurifier/HTMLPurifier/ChildDef/StrictBlockquote.php create mode 100644 lib/htmlpurifier/HTMLPurifier/ChildDef/Table.php create mode 100644 lib/htmlpurifier/HTMLPurifier/Config.php create mode 100644 lib/htmlpurifier/HTMLPurifier/ConfigDef.php create mode 100644 lib/htmlpurifier/HTMLPurifier/ConfigDef/Directive.php create mode 100644 lib/htmlpurifier/HTMLPurifier/ConfigDef/DirectiveAlias.php create mode 100644 lib/htmlpurifier/HTMLPurifier/ConfigDef/Namespace.php create mode 100644 lib/htmlpurifier/HTMLPurifier/ConfigSchema.php create mode 100644 lib/htmlpurifier/HTMLPurifier/ContentSets.php create mode 100644 lib/htmlpurifier/HTMLPurifier/Context.php create mode 100644 lib/htmlpurifier/HTMLPurifier/ElementDef.php create mode 100644 lib/htmlpurifier/HTMLPurifier/Encoder.php create mode 100644 lib/htmlpurifier/HTMLPurifier/EntityLookup.php create mode 100644 lib/htmlpurifier/HTMLPurifier/EntityLookup/entities.ser create mode 100644 lib/htmlpurifier/HTMLPurifier/EntityParser.php create mode 100644 lib/htmlpurifier/HTMLPurifier/Error.php create mode 100644 lib/htmlpurifier/HTMLPurifier/Filter.php create mode 100644 lib/htmlpurifier/HTMLPurifier/Filter/YouTube.php create mode 100644 lib/htmlpurifier/HTMLPurifier/Generator.php create mode 100644 lib/htmlpurifier/HTMLPurifier/HTMLDefinition.php create mode 100644 lib/htmlpurifier/HTMLPurifier/HTMLModule.php create mode 100644 lib/htmlpurifier/HTMLPurifier/HTMLModule/Bdo.php create mode 100644 lib/htmlpurifier/HTMLPurifier/HTMLModule/CommonAttributes.php create mode 100644 lib/htmlpurifier/HTMLPurifier/HTMLModule/Edit.php create mode 100644 lib/htmlpurifier/HTMLPurifier/HTMLModule/Hypertext.php create mode 100644 lib/htmlpurifier/HTMLPurifier/HTMLModule/Image.php create mode 100644 lib/htmlpurifier/HTMLPurifier/HTMLModule/Legacy.php create mode 100644 lib/htmlpurifier/HTMLPurifier/HTMLModule/List.php create mode 100644 lib/htmlpurifier/HTMLPurifier/HTMLModule/Presentation.php create mode 100644 lib/htmlpurifier/HTMLPurifier/HTMLModule/StyleAttribute.php create mode 100644 lib/htmlpurifier/HTMLPurifier/HTMLModule/Tables.php create mode 100644 lib/htmlpurifier/HTMLPurifier/HTMLModule/Text.php create mode 100644 lib/htmlpurifier/HTMLPurifier/HTMLModule/TransformToStrict.php create mode 100644 lib/htmlpurifier/HTMLPurifier/HTMLModule/TransformToXHTML11.php create mode 100644 lib/htmlpurifier/HTMLPurifier/HTMLModuleManager.php create mode 100644 lib/htmlpurifier/HTMLPurifier/IDAccumulator.php create mode 100644 lib/htmlpurifier/HTMLPurifier/Language.php create mode 100644 lib/htmlpurifier/HTMLPurifier/Language/classes/en-x-test.php create mode 100644 lib/htmlpurifier/HTMLPurifier/Language/messages/en-x-test.php create mode 100644 lib/htmlpurifier/HTMLPurifier/Language/messages/en.php create mode 100644 lib/htmlpurifier/HTMLPurifier/LanguageFactory.php create mode 100644 lib/htmlpurifier/HTMLPurifier/Lexer.php create mode 100644 lib/htmlpurifier/HTMLPurifier/Lexer/DOMLex.php create mode 100644 lib/htmlpurifier/HTMLPurifier/Lexer/DirectLex.php create mode 100644 lib/htmlpurifier/HTMLPurifier/Lexer/PEARSax3.php create mode 100644 lib/htmlpurifier/HTMLPurifier/PercentEncoder.php create mode 100644 lib/htmlpurifier/HTMLPurifier/Printer.php create mode 100644 lib/htmlpurifier/HTMLPurifier/Printer/CSSDefinition.php create mode 100644 lib/htmlpurifier/HTMLPurifier/Printer/HTMLDefinition.php create mode 100644 lib/htmlpurifier/HTMLPurifier/Strategy.php create mode 100644 lib/htmlpurifier/HTMLPurifier/Strategy/Composite.php create mode 100644 lib/htmlpurifier/HTMLPurifier/Strategy/Core.php create mode 100644 lib/htmlpurifier/HTMLPurifier/Strategy/FixNesting.php create mode 100644 lib/htmlpurifier/HTMLPurifier/Strategy/MakeWellFormed.php create mode 100644 lib/htmlpurifier/HTMLPurifier/Strategy/RemoveForeignElements.php create mode 100644 lib/htmlpurifier/HTMLPurifier/Strategy/ValidateAttributes.php create mode 100644 lib/htmlpurifier/HTMLPurifier/TagTransform.php create mode 100644 lib/htmlpurifier/HTMLPurifier/TagTransform/Center.php create mode 100644 lib/htmlpurifier/HTMLPurifier/TagTransform/Font.php create mode 100644 lib/htmlpurifier/HTMLPurifier/TagTransform/Simple.php create mode 100644 lib/htmlpurifier/HTMLPurifier/Token.php create mode 100644 lib/htmlpurifier/HTMLPurifier/TokenFactory.php create mode 100644 lib/htmlpurifier/HTMLPurifier/URIScheme.php create mode 100644 lib/htmlpurifier/HTMLPurifier/URIScheme/ftp.php create mode 100644 lib/htmlpurifier/HTMLPurifier/URIScheme/http.php create mode 100644 lib/htmlpurifier/HTMLPurifier/URIScheme/https.php create mode 100644 lib/htmlpurifier/HTMLPurifier/URIScheme/mailto.php create mode 100644 lib/htmlpurifier/HTMLPurifier/URIScheme/news.php create mode 100644 lib/htmlpurifier/HTMLPurifier/URIScheme/nntp.php create mode 100644 lib/htmlpurifier/HTMLPurifier/URISchemeRegistry.php create mode 100644 lib/htmlpurifier/readme_moodle.txt diff --git a/admin/settings/misc.php b/admin/settings/misc.php index 117dcbb291..d910603816 100644 --- a/admin/settings/misc.php +++ b/admin/settings/misc.php @@ -7,6 +7,7 @@ $temp = new admin_settingpage('experimental', get_string('experimental', 'admin' $temp->add(new admin_setting_configcheckbox('enableajax', get_string('enableajax', 'admin'), get_string('configenableajax', 'admin'), 0)); $temp->add(new admin_setting_configcheckbox('enableglobalsearch', get_string('enableglobalsearch', 'admin'), get_string('configenableglobalsearch', 'admin'), 0)); $temp->add(new admin_setting_configcheckbox('smartpix', get_string('smartpix', 'admin'), get_string('configsmartpix', 'admin'), 0)); +$temp->add(new admin_setting_configcheckbox('enablehtmlpurifier', get_string('enablehtmlpurifier', 'admin'), get_string('configenablehtmlpurifier', 'admin'), 0)); $ADMIN->add('misc', $temp); // XMLDB editor diff --git a/lang/en_utf8/admin.php b/lang/en_utf8/admin.php index f79bab02b3..d75adb3ca6 100644 --- a/lang/en_utf8/admin.php +++ b/lang/en_utf8/admin.php @@ -87,6 +87,7 @@ $string['configeditordictionary'] = 'This value will be used if aspell doesn\'t $string['configenableajax'] = 'This setting allows you to control the use of AJAX (advanced client/server interfaces using Javascript) across the whole site. With this setting enabled users can sill make a choice in their profile, otherwise AJAX is disabled for everybody.'; $string['configenablecourserequests'] = 'This will allow any user to request a course be created.'; $string['configenableglobalsearch'] = 'This setting enables global text searching in resources and activities, it is not compatible with PHP 4.'; +$string['configenablehtmlpurifier'] = 'Use HTML Purifier instead of KSES for celaning of untrusted text. HTML Purifier is actively developed and is belived to be more secure, but it is more resource intensive. Expect minor visual differences in the resulting html code. Please note that embed and object tags can not be enabled, MathML tags and old lang tags are not supported. '; $string['configenablerssfeeds'] = 'This switch will enable RSS feeds from across the site. To actually see any change you will need to enable RSS feeds in the individual modules too - go to the Modules settings under Admin Configuration.'; $string['configenablerssfeedsdisabled'] = 'It is not available because RSS feeds are disabled in all the Site. To enable them, go to the Variables settings under Admin Configuration.'; $string['configenablestats'] = 'If you choose \'yes\' here, Moodle\'s cronjob will process the logs and gather some statistics. Depending on the amount of traffic on your site, this can take awhile. If you enable this, you will be able to see some interesting graphs and statistics about each of your courses, or on a sitewide basis.'; @@ -261,6 +262,7 @@ $string['editstrings'] = 'Edit words or phrases'; $string['enableajax'] = 'Enable AJAX'; $string['enablecourserequests'] = 'Enable course requests'; $string['enableglobalsearch'] = 'Enable global search'; +$string['enablehtmlpurifier'] = 'Enable HTML Purifier'; $string['enablerecordcache'] = 'Enable Record Cache'; $string['enablerssfeeds'] = 'Enable RSS feeds'; $string['enablestats'] = 'Enable statistics'; diff --git a/lang/en_utf8/docs/credits.html b/lang/en_utf8/docs/credits.html index 05fe0eae45..dee954aef1 100644 --- a/lang/en_utf8/docs/credits.html +++ b/lang/en_utf8/docs/credits.html @@ -278,6 +278,17 @@ URL: http://typo3.org/

+ +

HTML Purifier - lib/htmlpurifier

+
Standards-compliant HTML filter library.
+
+ CVS version: 1.60
+ Copyright (C) 2006 Edward Z. Yang
+ License: GNU LGPL
+ URL: http://hp.jpsband.org/
+
+

Moodle Documentation

diff --git a/lib/htmlpurifier/CREDITS b/lib/htmlpurifier/CREDITS new file mode 100644 index 0000000000..c3e7bb8e2f --- /dev/null +++ b/lib/htmlpurifier/CREDITS @@ -0,0 +1,7 @@ + +CREDITS + +Almost everything written by Edward Z. Yang (Ambush Commander). Lots of thanks +to the DevNetwork Community for their help (see docs/ref-devnetwork.html for +more details), Feyd especially (namely IPv6 and optimization). Thanks to RSnake +for letting me package his fantastic XSS cheatsheet for a smoketest. diff --git a/lib/htmlpurifier/HTMLPurifier.auto.php b/lib/htmlpurifier/HTMLPurifier.auto.php new file mode 100644 index 0000000000..a66fd2e25d --- /dev/null +++ b/lib/htmlpurifier/HTMLPurifier.auto.php @@ -0,0 +1,10 @@ + \ No newline at end of file diff --git a/lib/htmlpurifier/HTMLPurifier.func.php b/lib/htmlpurifier/HTMLPurifier.func.php new file mode 100644 index 0000000000..876ad7b298 --- /dev/null +++ b/lib/htmlpurifier/HTMLPurifier.func.php @@ -0,0 +1,21 @@ +purify($html, $config); +} + +?> \ No newline at end of file diff --git a/lib/htmlpurifier/HTMLPurifier.php b/lib/htmlpurifier/HTMLPurifier.php new file mode 100644 index 0000000000..5a0ce99d0b --- /dev/null +++ b/lib/htmlpurifier/HTMLPurifier.php @@ -0,0 +1,170 @@ +config = HTMLPurifier_Config::create($config); + + $this->lexer = HTMLPurifier_Lexer::create(); + $this->strategy = new HTMLPurifier_Strategy_Core(); + $this->generator = new HTMLPurifier_Generator(); + + } + + /** + * Adds a filter to process the output. First come first serve + * @param $filter HTMLPurifier_Filter object + */ + function addFilter($filter) { + $this->filters[] = $filter; + } + + /** + * Filters an HTML snippet/document to be XSS-free and standards-compliant. + * + * @param $html String of HTML to purify + * @param $config HTMLPurifier_Config object for this operation, if omitted, + * defaults to the config object specified during this + * object's construction. The parameter can also be any type + * that HTMLPurifier_Config::create() supports. + * @return Purified HTML + */ + function purify($html, $config = null) { + + $config = $config ? HTMLPurifier_Config::create($config) : $this->config; + + $context = new HTMLPurifier_Context(); + $html = HTMLPurifier_Encoder::convertToUTF8($html, $config, $context); + + for ($i = 0, $size = count($this->filters); $i < $size; $i++) { + $html = $this->filters[$i]->preFilter($html, $config, $context); + } + + // purified HTML + $html = + $this->generator->generateFromTokens( + // list of tokens + $this->strategy->execute( + // list of un-purified tokens + $this->lexer->tokenizeHTML( + // un-purified HTML + $html, $config, $context + ), + $config, $context + ), + $config, $context + ); + + for ($i = $size - 1; $i >= 0; $i--) { + $html = $this->filters[$i]->postFilter($html, $config, $context); + } + + $html = HTMLPurifier_Encoder::convertFromUTF8($html, $config, $context); + $this->context =& $context; + return $html; + } + + /** + * Filters an array of HTML snippets + * @param $config Optional HTMLPurifier_Config object for this operation. + * See HTMLPurifier::purify() for more details. + * @return Array of purified HTML + */ + function purifyArray($array_of_html, $config = null) { + $context_array = array(); + foreach ($array_of_html as $key => $html) { + $array_of_html[$key] = $this->purify($html, $config); + $context_array[$key] = $this->context; + } + $this->context = $context_array; + return $array_of_html; + } + + +} + +?> \ No newline at end of file diff --git a/lib/htmlpurifier/HTMLPurifier/AttrCollections.php b/lib/htmlpurifier/HTMLPurifier/AttrCollections.php new file mode 100644 index 0000000000..8318abb15c --- /dev/null +++ b/lib/htmlpurifier/HTMLPurifier/AttrCollections.php @@ -0,0 +1,100 @@ +info; + // load extensions from the modules + foreach ($modules as $module) { + foreach ($module->attr_collections as $coll_i => $coll) { + foreach ($coll as $attr_i => $attr) { + if ($attr_i === 0 && isset($info[$coll_i][$attr_i])) { + // merge in includes + $info[$coll_i][$attr_i] = array_merge( + $info[$coll_i][$attr_i], $attr); + continue; + } + $info[$coll_i][$attr_i] = $attr; + } + } + } + // perform internal expansions and inclusions + foreach ($info as $name => $attr) { + // merge attribute collections that include others + $this->performInclusions($info[$name]); + // replace string identifiers with actual attribute objects + $this->expandIdentifiers($info[$name], $attr_types); + } + } + + /** + * Takes a reference to an attribute associative array and performs + * all inclusions specified by the zero index. + * @param &$attr Reference to attribute array + */ + function performInclusions(&$attr) { + if (!isset($attr[0])) return; + $merge = $attr[0]; + // loop through all the inclusions + for ($i = 0; isset($merge[$i]); $i++) { + // foreach attribute of the inclusion, copy it over + foreach ($this->info[$merge[$i]] as $key => $value) { + if (isset($attr[$key])) continue; // also catches more inclusions + $attr[$key] = $value; + } + if (isset($info[$merge[$i]][0])) { + // recursion + $merge = array_merge($merge, isset($info[$merge[$i]][0])); + } + } + unset($attr[0]); + } + + /** + * Expands all string identifiers in an attribute array by replacing + * them with the appropriate values inside HTMLPurifier_AttrTypes + * @param &$attr Reference to attribute array + * @param $attr_types HTMLPurifier_AttrTypes instance + */ + function expandIdentifiers(&$attr, $attr_types) { + foreach ($attr as $def_i => $def) { + if ($def_i === 0) continue; + if (!is_string($def)) continue; + if ($def === false) { + unset($attr[$def_i]); + continue; + } + if (isset($attr_types->info[$def])) { + $attr[$def_i] = $attr_types->info[$def]; + } else { + trigger_error('Attempted to reference undefined attribute type', E_USER_ERROR); + unset($attr[$def_i]); + } + } + } + +} + +?> \ No newline at end of file diff --git a/lib/htmlpurifier/HTMLPurifier/AttrDef.php b/lib/htmlpurifier/HTMLPurifier/AttrDef.php new file mode 100644 index 0000000000..334a7acedd --- /dev/null +++ b/lib/htmlpurifier/HTMLPurifier/AttrDef.php @@ -0,0 +1,67 @@ + by removing + * leading and trailing whitespace, ignoring line feeds, and replacing + * carriage returns and tabs with spaces. While most useful for HTML + * attributes specified as CDATA, it can also be applied to most CSS + * values. + * + * @note This method is not entirely standards compliant, as trim() removes + * more types of whitespace than specified in the spec. In practice, + * this is rarely a problem, as those extra characters usually have + * already been removed by HTMLPurifier_Encoder. + * + * @warning This processing is inconsistent with XML's whitespace handling + * as specified by section 3.3.3 and referenced XHTML 1.0 section + * 4.7. Compliant processing requires all line breaks normalized + * to "\n", so the fix is not as simple as fixing it in this + * function. Trim and whitespace collapsing are supposed to only + * occur in NMTOKENs. However, note that we are NOT necessarily + * parsing XML, thus, this behavior may still be correct. + * + * @public + */ + function parseCDATA($string) { + $string = trim($string); + $string = str_replace("\n", '', $string); + $string = str_replace(array("\r", "\t"), ' ', $string); + return $string; + } +} + +?> \ No newline at end of file diff --git a/lib/htmlpurifier/HTMLPurifier/AttrDef/CSS.php b/lib/htmlpurifier/HTMLPurifier/AttrDef/CSS.php new file mode 100644 index 0000000000..220ec0d0d1 --- /dev/null +++ b/lib/htmlpurifier/HTMLPurifier/AttrDef/CSS.php @@ -0,0 +1,69 @@ +parseCDATA($css); + + $definition = $config->getCSSDefinition(); + + // we're going to break the spec and explode by semicolons. + // This is because semicolon rarely appears in escaped form + // Doing this is generally flaky but fast + // IT MIGHT APPEAR IN URIs, see HTMLPurifier_AttrDef_CSSURI + // for details + + $declarations = explode(';', $css); + $propvalues = array(); + + foreach ($declarations as $declaration) { + if (!$declaration) continue; + if (!strpos($declaration, ':')) continue; + list($property, $value) = explode(':', $declaration, 2); + $property = trim($property); + $value = trim($value); + if (!isset($definition->info[$property])) continue; + // inefficient call, since the validator will do this again + if (strtolower(trim($value)) !== 'inherit') { + // inherit works for everything (but only on the base property) + $result = $definition->info[$property]->validate( + $value, $config, $context ); + } else { + $result = 'inherit'; + } + if ($result === false) continue; + $propvalues[$property] = $result; + } + + // procedure does not write the new CSS simultaneously, so it's + // slightly inefficient, but it's the only way of getting rid of + // duplicates. Perhaps config to optimize it, but not now. + + $new_declarations = ''; + foreach ($propvalues as $prop => $value) { + $new_declarations .= "$prop:$value;"; + } + + return $new_declarations ? $new_declarations : false; + + } + +} + +?> \ No newline at end of file diff --git a/lib/htmlpurifier/HTMLPurifier/AttrDef/CSS/Background.php b/lib/htmlpurifier/HTMLPurifier/AttrDef/CSS/Background.php new file mode 100644 index 0000000000..42d8bcf0e6 --- /dev/null +++ b/lib/htmlpurifier/HTMLPurifier/AttrDef/CSS/Background.php @@ -0,0 +1,87 @@ +getCSSDefinition(); + $this->info['background-color'] = $def->info['background-color']; + $this->info['background-image'] = $def->info['background-image']; + $this->info['background-repeat'] = $def->info['background-repeat']; + $this->info['background-attachment'] = $def->info['background-attachment']; + $this->info['background-position'] = $def->info['background-position']; + } + + function validate($string, $config, &$context) { + + // regular pre-processing + $string = $this->parseCDATA($string); + if ($string === '') return false; + + // assumes URI doesn't have spaces in it + $bits = explode(' ', strtolower($string)); // bits to process + + $caught = array(); + $caught['color'] = false; + $caught['image'] = false; + $caught['repeat'] = false; + $caught['attachment'] = false; + $caught['position'] = false; + + $i = 0; // number of catches + $none = false; + + foreach ($bits as $bit) { + if ($bit === '') continue; + foreach ($caught as $key => $status) { + if ($key != 'position') { + if ($status !== false) continue; + $r = $this->info['background-' . $key]->validate($bit, $config, $context); + } else { + $r = $bit; + } + if ($r === false) continue; + if ($key == 'position') { + if ($caught[$key] === false) $caught[$key] = ''; + $caught[$key] .= $r . ' '; + } else { + $caught[$key] = $r; + } + $i++; + break; + } + } + + if (!$i) return false; + if ($caught['position'] !== false) { + $caught['position'] = $this->info['background-position']-> + validate($caught['position'], $config, $context); + } + + $ret = array(); + foreach ($caught as $value) { + if ($value === false) continue; + $ret[] = $value; + } + + if (empty($ret)) return false; + return implode(' ', $ret); + + } + +} + +?> \ No newline at end of file diff --git a/lib/htmlpurifier/HTMLPurifier/AttrDef/CSS/BackgroundPosition.php b/lib/htmlpurifier/HTMLPurifier/AttrDef/CSS/BackgroundPosition.php new file mode 100644 index 0000000000..77a3ddd6e3 --- /dev/null +++ b/lib/htmlpurifier/HTMLPurifier/AttrDef/CSS/BackgroundPosition.php @@ -0,0 +1,130 @@ + | | left | center | right + ] + [ + | | top | center | bottom + ]? + ] | + [ // this signifies that the vertical and horizontal adjectives + // can be arbitrarily ordered, however, there can only be two, + // one of each, or none at all + [ + left | center | right + ] || + [ + top | center | bottom + ] + ] + top, left = 0% + center, (none) = 50% + bottom, right = 100% +*/ + +/* QuirksMode says: + keyword + length/percentage must be ordered correctly, as per W3C + + Internet Explorer and Opera, however, support arbitrary ordering. We + should fix it up. + + Minor issue though, not strictly necessary. +*/ + +// control freaks may appreciate the ability to convert these to +// percentages or something, but it's not necessary + +/** + * Validates the value of background-position. + */ +class HTMLPurifier_AttrDef_CSS_BackgroundPosition extends HTMLPurifier_AttrDef +{ + + var $length; + var $percentage; + + function HTMLPurifier_AttrDef_CSS_BackgroundPosition() { + $this->length = new HTMLPurifier_AttrDef_CSS_Length(); + $this->percentage = new HTMLPurifier_AttrDef_CSS_Percentage(); + } + + function validate($string, $config, &$context) { + $string = $this->parseCDATA($string); + $bits = explode(' ', $string); + + $keywords = array(); + $keywords['h'] = false; // left, right + $keywords['v'] = false; // top, bottom + $keywords['c'] = false; // center + $measures = array(); + + $i = 0; + + $lookup = array( + 'top' => 'v', + 'bottom' => 'v', + 'left' => 'h', + 'right' => 'h', + 'center' => 'c' + ); + + foreach ($bits as $bit) { + if ($bit === '') continue; + + // test for keyword + $lbit = ctype_lower($bit) ? $bit : strtolower($bit); + if (isset($lookup[$lbit])) { + $status = $lookup[$lbit]; + $keywords[$status] = $lbit; + $i++; + } + + // test for length + $r = $this->length->validate($bit, $config, $context); + if ($r !== false) { + $measures[] = $r; + $i++; + } + + // test for percentage + $r = $this->percentage->validate($bit, $config, $context); + if ($r !== false) { + $measures[] = $r; + $i++; + } + + } + + if (!$i) return false; // no valid values were caught + + + $ret = array(); + + // first keyword + if ($keywords['h']) $ret[] = $keywords['h']; + elseif (count($measures)) $ret[] = array_shift($measures); + elseif ($keywords['c']) { + $ret[] = $keywords['c']; + $keywords['c'] = false; // prevent re-use: center = center center + } + + if ($keywords['v']) $ret[] = $keywords['v']; + elseif (count($measures)) $ret[] = array_shift($measures); + elseif ($keywords['c']) $ret[] = $keywords['c']; + + if (empty($ret)) return false; + return implode(' ', $ret); + + } + +} + +?> \ No newline at end of file diff --git a/lib/htmlpurifier/HTMLPurifier/AttrDef/CSS/Border.php b/lib/htmlpurifier/HTMLPurifier/AttrDef/CSS/Border.php new file mode 100644 index 0000000000..583f14fd09 --- /dev/null +++ b/lib/htmlpurifier/HTMLPurifier/AttrDef/CSS/Border.php @@ -0,0 +1,45 @@ +getCSSDefinition(); + $this->info['border-width'] = $def->info['border-width']; + $this->info['border-style'] = $def->info['border-style']; + $this->info['border-top-color'] = $def->info['border-top-color']; + } + + function validate($string, $config, &$context) { + $string = $this->parseCDATA($string); + // we specifically will not support rgb() syntax with spaces + $bits = explode(' ', $string); + $done = array(); // segments we've finished + $ret = ''; // return value + foreach ($bits as $bit) { + foreach ($this->info as $propname => $validator) { + if (isset($done[$propname])) continue; + $r = $validator->validate($bit, $config, $context); + if ($r !== false) { + $ret .= $r . ' '; + $done[$propname] = true; + break; + } + } + } + return rtrim($ret); + } + +} + +?> \ No newline at end of file diff --git a/lib/htmlpurifier/HTMLPurifier/AttrDef/CSS/Color.php b/lib/htmlpurifier/HTMLPurifier/AttrDef/CSS/Color.php new file mode 100644 index 0000000000..4e6a78acf8 --- /dev/null +++ b/lib/htmlpurifier/HTMLPurifier/AttrDef/CSS/Color.php @@ -0,0 +1,97 @@ + '#800000', + 'red' => '#F00', + 'orange' => '#FFA500', + 'yellow' => '#FF0', + 'olive' => '#808000', + 'purple' => '#800080', + 'fuchsia' => '#F0F', + 'white' => '#FFF', + 'lime' => '#0F0', + 'green' => '#008000', + 'navy' => '#000080', + 'blue' => '#00F', + 'aqua' => '#0FF', + 'teal' => '#008080', + 'black' => '#000', + 'silver' => '#C0C0C0', + 'gray' => '#808080' + ); + + function validate($color, $config, &$context) { + + $color = trim($color); + if (!$color) return false; + + $lower = strtolower($color); + if (isset($this->colors[$lower])) return $this->colors[$lower]; + + if ($color[0] === '#') { + // hexadecimal handling + $hex = substr($color, 1); + $length = strlen($hex); + if ($length !== 3 && $length !== 6) return false; + if (!ctype_xdigit($hex)) return false; + } else { + // rgb literal handling + if (strpos($color, 'rgb(')) return false; + $length = strlen($color); + if (strpos($color, ')') !== $length - 1) return false; + $triad = substr($color, 4, $length - 4 - 1); + $parts = explode(',', $triad); + if (count($parts) !== 3) return false; + $type = false; // to ensure that they're all the same type + $new_parts = array(); + foreach ($parts as $part) { + $part = trim($part); + if ($part === '') return false; + $length = strlen($part); + if ($part[$length - 1] === '%') { + // handle percents + if (!$type) { + $type = 'percentage'; + } elseif ($type !== 'percentage') { + return false; + } + $num = (float) substr($part, 0, $length - 1); + if ($num < 0) $num = 0; + if ($num > 100) $num = 100; + $new_parts[] = "$num%"; + } else { + // handle integers + if (!$type) { + $type = 'integer'; + } elseif ($type !== 'integer') { + return false; + } + $num = (int) $part; + if ($num < 0) $num = 0; + if ($num > 255) $num = 255; + $new_parts[] = (string) $num; + } + } + $new_triad = implode(',', $new_parts); + $color = "rgb($new_triad)"; + } + + return $color; + + } + +} + +?> \ No newline at end of file diff --git a/lib/htmlpurifier/HTMLPurifier/AttrDef/CSS/Composite.php b/lib/htmlpurifier/HTMLPurifier/AttrDef/CSS/Composite.php new file mode 100644 index 0000000000..9d2803d26c --- /dev/null +++ b/lib/htmlpurifier/HTMLPurifier/AttrDef/CSS/Composite.php @@ -0,0 +1,38 @@ +defs = $defs; + } + + function validate($string, $config, &$context) { + foreach ($this->defs as $i => $def) { + $result = $this->defs[$i]->validate($string, $config, $context); + if ($result !== false) return $result; + } + return false; + } + +} + +?> \ No newline at end of file diff --git a/lib/htmlpurifier/HTMLPurifier/AttrDef/CSS/Font.php b/lib/htmlpurifier/HTMLPurifier/AttrDef/CSS/Font.php new file mode 100644 index 0000000000..1b3b090503 --- /dev/null +++ b/lib/htmlpurifier/HTMLPurifier/AttrDef/CSS/Font.php @@ -0,0 +1,154 @@ + true, + 'icon' => true, + 'menu' => true, + 'message-box' => true, + 'small-caption' => true, + 'status-bar' => true + ); + + function HTMLPurifier_AttrDef_CSS_Font($config) { + $def = $config->getCSSDefinition(); + $this->info['font-style'] = $def->info['font-style']; + $this->info['font-variant'] = $def->info['font-variant']; + $this->info['font-weight'] = $def->info['font-weight']; + $this->info['font-size'] = $def->info['font-size']; + $this->info['line-height'] = $def->info['line-height']; + $this->info['font-family'] = $def->info['font-family']; + } + + function validate($string, $config, &$context) { + + // regular pre-processing + $string = $this->parseCDATA($string); + if ($string === '') return false; + + // check if it's one of the keywords + $lowercase_string = strtolower($string); + if (isset($this->system_fonts[$lowercase_string])) { + return $lowercase_string; + } + + $bits = explode(' ', $string); // bits to process + $stage = 0; // this indicates what we're looking for + $caught = array(); // which stage 0 properties have we caught? + $stage_1 = array('font-style', 'font-variant', 'font-weight'); + $final = ''; // output + + for ($i = 0, $size = count($bits); $i < $size; $i++) { + if ($bits[$i] === '') continue; + switch ($stage) { + + // attempting to catch font-style, font-variant or font-weight + case 0: + foreach ($stage_1 as $validator_name) { + if (isset($caught[$validator_name])) continue; + $r = $this->info[$validator_name]->validate( + $bits[$i], $config, $context); + if ($r !== false) { + $final .= $r . ' '; + $caught[$validator_name] = true; + break; + } + } + // all three caught, continue on + if (count($caught) >= 3) $stage = 1; + if ($r !== false) break; + + // attempting to catch font-size and perhaps line-height + case 1: + $found_slash = false; + if (strpos($bits[$i], '/') !== false) { + list($font_size, $line_height) = + explode('/', $bits[$i]); + if ($line_height === '') { + // ooh, there's a space after the slash! + $line_height = false; + $found_slash = true; + } + } else { + $font_size = $bits[$i]; + $line_height = false; + } + $r = $this->info['font-size']->validate( + $font_size, $config, $context); + if ($r !== false) { + $final .= $r; + // attempt to catch line-height + if ($line_height === false) { + // we need to scroll forward + for ($j = $i + 1; $j < $size; $j++) { + if ($bits[$j] === '') continue; + if ($bits[$j] === '/') { + if ($found_slash) { + return false; + } else { + $found_slash = true; + continue; + } + } + $line_height = $bits[$j]; + break; + } + } else { + // slash already found + $found_slash = true; + $j = $i; + } + if ($found_slash) { + $i = $j; + $r = $this->info['line-height']->validate( + $line_height, $config, $context); + if ($r !== false) { + $final .= '/' . $r; + } + } + $final .= ' '; + $stage = 2; + break; + } + return false; + + // attempting to catch font-family + case 2: + $font_family = + implode(' ', array_slice($bits, $i, $size - $i)); + $r = $this->info['font-family']->validate( + $font_family, $config, $context); + if ($r !== false) { + $final .= $r . ' '; + // processing completed successfully + return rtrim($final); + } + return false; + } + } + return false; + } + +} + +?> \ No newline at end of file diff --git a/lib/htmlpurifier/HTMLPurifier/AttrDef/CSS/FontFamily.php b/lib/htmlpurifier/HTMLPurifier/AttrDef/CSS/FontFamily.php new file mode 100644 index 0000000000..15cbbf3995 --- /dev/null +++ b/lib/htmlpurifier/HTMLPurifier/AttrDef/CSS/FontFamily.php @@ -0,0 +1,66 @@ + true, + 'sans-serif' => true, + 'monospace' => true, + 'fantasy' => true, + 'cursive' => true + ); + + function validate($string, $config, &$context) { + $string = $this->parseCDATA($string); + // assume that no font names contain commas in them + $fonts = explode(',', $string); + $final = ''; + foreach($fonts as $font) { + $font = trim($font); + if ($font === '') continue; + // match a generic name + if (isset($this->generic_names[$font])) { + $final .= $font . ', '; + continue; + } + // match a quoted name + if ($font[0] === '"' || $font[0] === "'") { + $length = strlen($font); + if ($length <= 2) continue; + $quote = $font[0]; + if ($font[$length - 1] !== $quote) continue; + $font = substr($font, 1, $length - 2); + } + // process font + if (ctype_alnum($font)) { + // very simple font, allow it in unharmed + $final .= $font . ', '; + continue; + } + $nospace = str_replace(array(' ', '.', '!'), '', $font); + if (ctype_alnum($nospace)) { + // font with spaces in it + $final .= "'$font', "; + continue; + } + } + $final = rtrim($final, ', '); + if ($final === '') return false; + return $final; + } + +} + +?> \ No newline at end of file diff --git a/lib/htmlpurifier/HTMLPurifier/AttrDef/CSS/Length.php b/lib/htmlpurifier/HTMLPurifier/AttrDef/CSS/Length.php new file mode 100644 index 0000000000..7da26a8f6b --- /dev/null +++ b/lib/htmlpurifier/HTMLPurifier/AttrDef/CSS/Length.php @@ -0,0 +1,56 @@ + true, 'ex' => true, 'px' => true, 'in' => true, + 'cm' => true, 'mm' => true, 'pt' => true, 'pc' => true); + /** + * Instance of HTMLPurifier_AttrDef_Number to defer number validation to + */ + var $number_def; + + /** + * @param $non_negative Bool indication whether or not negative values are + * allowed. + */ + function HTMLPurifier_AttrDef_CSS_Length($non_negative = false) { + $this->number_def = new HTMLPurifier_AttrDef_CSS_Number($non_negative); + } + + function validate($length, $config, &$context) { + + $length = $this->parseCDATA($length); + if ($length === '') return false; + if ($length === '0') return '0'; + $strlen = strlen($length); + if ($strlen === 1) return false; // impossible! + + // we assume all units are two characters + $unit = substr($length, $strlen - 2); + if (!ctype_lower($unit)) $unit = strtolower($unit); + $number = substr($length, 0, $strlen - 2); + + if (!isset($this->units[$unit])) return false; + + $number = $this->number_def->validate($number, $config, $context); + if ($number === false) return false; + + return $number . $unit; + + } + +} + +?> \ No newline at end of file diff --git a/lib/htmlpurifier/HTMLPurifier/AttrDef/CSS/ListStyle.php b/lib/htmlpurifier/HTMLPurifier/AttrDef/CSS/ListStyle.php new file mode 100644 index 0000000000..2d2ed12da6 --- /dev/null +++ b/lib/htmlpurifier/HTMLPurifier/AttrDef/CSS/ListStyle.php @@ -0,0 +1,80 @@ +getCSSDefinition(); + $this->info['list-style-type'] = $def->info['list-style-type']; + $this->info['list-style-position'] = $def->info['list-style-position']; + $this->info['list-style-image'] = $def->info['list-style-image']; + } + + function validate($string, $config, &$context) { + + // regular pre-processing + $string = $this->parseCDATA($string); + if ($string === '') return false; + + // assumes URI doesn't have spaces in it + $bits = explode(' ', strtolower($string)); // bits to process + + $caught = array(); + $caught['type'] = false; + $caught['position'] = false; + $caught['image'] = false; + + $i = 0; // number of catches + $none = false; + + foreach ($bits as $bit) { + if ($i >= 3) return; // optimization bit + if ($bit === '') continue; + foreach ($caught as $key => $status) { + if ($status !== false) continue; + $r = $this->info['list-style-' . $key]->validate($bit, $config, $context); + if ($r === false) continue; + if ($r === 'none') { + if ($none) continue; + else $none = true; + if ($key == 'image') continue; + } + $caught[$key] = $r; + $i++; + break; + } + } + + if (!$i) return false; + + $ret = array(); + + // construct type + if ($caught['type']) $ret[] = $caught['type']; + + // construct image + if ($caught['image']) $ret[] = $caught['image']; + + // construct position + if ($caught['position']) $ret[] = $caught['position']; + + if (empty($ret)) return false; + return implode(' ', $ret); + + } + +} + +?> \ No newline at end of file diff --git a/lib/htmlpurifier/HTMLPurifier/AttrDef/CSS/Multiple.php b/lib/htmlpurifier/HTMLPurifier/AttrDef/CSS/Multiple.php new file mode 100644 index 0000000000..0d1c840615 --- /dev/null +++ b/lib/htmlpurifier/HTMLPurifier/AttrDef/CSS/Multiple.php @@ -0,0 +1,58 @@ +single = $single; + $this->max = $max; + } + + function validate($string, $config, &$context) { + $string = $this->parseCDATA($string); + if ($string === '') return false; + $parts = explode(' ', $string); // parseCDATA replaced \r, \t and \n + $length = count($parts); + $final = ''; + for ($i = 0, $num = 0; $i < $length && $num < $this->max; $i++) { + if (ctype_space($parts[$i])) continue; + $result = $this->single->validate($parts[$i], $config, $context); + if ($result !== false) { + $final .= $result . ' '; + $num++; + } + } + if ($final === '') return false; + return rtrim($final); + } + +} + +?> \ No newline at end of file diff --git a/lib/htmlpurifier/HTMLPurifier/AttrDef/CSS/Number.php b/lib/htmlpurifier/HTMLPurifier/AttrDef/CSS/Number.php new file mode 100644 index 0000000000..48f1335ac8 --- /dev/null +++ b/lib/htmlpurifier/HTMLPurifier/AttrDef/CSS/Number.php @@ -0,0 +1,61 @@ +non_negative = $non_negative; + } + + function validate($number, $config, &$context) { + + $number = $this->parseCDATA($number); + + if ($number === '') return false; + + $sign = ''; + switch ($number[0]) { + case '-': + if ($this->non_negative) return false; + $sign = '-'; + case '+': + $number = substr($number, 1); + } + + if (ctype_digit($number)) { + $number = ltrim($number, '0'); + return $number ? $sign . $number : '0'; + } + if (!strpos($number, '.')) return false; + + list($left, $right) = explode('.', $number, 2); + + if (!ctype_digit($left)) return false; + $left = ltrim($left, '0'); + + $right = rtrim($right, '0'); + + if ($right === '') { + return $left ? $sign . $left : '0'; + } elseif (!ctype_digit($right)) { + return false; + } + + return $sign . $left . '.' . $right; + + } + +} + +?> \ No newline at end of file diff --git a/lib/htmlpurifier/HTMLPurifier/AttrDef/CSS/Percentage.php b/lib/htmlpurifier/HTMLPurifier/AttrDef/CSS/Percentage.php new file mode 100644 index 0000000000..cc96f15d8c --- /dev/null +++ b/lib/htmlpurifier/HTMLPurifier/AttrDef/CSS/Percentage.php @@ -0,0 +1,43 @@ +number_def = new HTMLPurifier_AttrDef_CSS_Number($non_negative); + } + + function validate($string, $config, &$context) { + + $string = $this->parseCDATA($string); + + if ($string === '') return false; + $length = strlen($string); + if ($length === 1) return false; + if ($string[$length - 1] !== '%') return false; + + $number = substr($string, 0, $length - 1); + $number = $this->number_def->validate($number, $config, $context); + + if ($number === false) return false; + return "$number%"; + + } + +} + +?> \ No newline at end of file diff --git a/lib/htmlpurifier/HTMLPurifier/AttrDef/CSS/TextDecoration.php b/lib/htmlpurifier/HTMLPurifier/AttrDef/CSS/TextDecoration.php new file mode 100644 index 0000000000..294dd83077 --- /dev/null +++ b/lib/htmlpurifier/HTMLPurifier/AttrDef/CSS/TextDecoration.php @@ -0,0 +1,41 @@ + true, + 'overline' => true, + 'underline' => true + ); + + function validate($string, $config, &$context) { + + $string = strtolower($this->parseCDATA($string)); + $parts = explode(' ', $string); + $final = ''; + foreach ($parts as $part) { + if (isset($this->allowed_values[$part])) { + $final .= $part . ' '; + } + } + $final = rtrim($final); + if ($final === '') return false; + return $final; + + } + +} + +?> \ No newline at end of file diff --git a/lib/htmlpurifier/HTMLPurifier/AttrDef/CSS/URI.php b/lib/htmlpurifier/HTMLPurifier/AttrDef/CSS/URI.php new file mode 100644 index 0000000000..b310907cd5 --- /dev/null +++ b/lib/htmlpurifier/HTMLPurifier/AttrDef/CSS/URI.php @@ -0,0 +1,58 @@ +HTMLPurifier_AttrDef_URI(true); // always embedded + } + + function validate($uri_string, $config, &$context) { + // parse the URI out of the string and then pass it onto + // the parent object + + $uri_string = $this->parseCDATA($uri_string); + if (strpos($uri_string, 'url(') !== 0) return false; + $uri_string = substr($uri_string, 4); + $new_length = strlen($uri_string) - 1; + if ($uri_string[$new_length] != ')') return false; + $uri = trim(substr($uri_string, 0, $new_length)); + + if (isset($uri[0]) && ($uri[0] == "'" || $uri[0] == '"')) { + $quote = $uri[0]; + $new_length = strlen($uri) - 1; + if ($uri[$new_length] !== $quote) return false; + $uri = substr($uri, 1, $new_length - 1); + } + + $keys = array( '(', ')', ',', ' ', '"', "'"); + $values = array('\$', '\$', '\\,', '\\ ', '\\"', "\\'"); + $uri = str_replace($values, $keys, $uri); + + $result = parent::validate($uri, $config, $context); + + if ($result === false) return false; + + // escape necessary characters according to CSS spec + // except for the comma, none of these should appear in the + // URI at all + $result = str_replace($keys, $values, $result); + + return "url($result)"; + + } + +} + +?> \ No newline at end of file diff --git a/lib/htmlpurifier/HTMLPurifier/AttrDef/Enum.php b/lib/htmlpurifier/HTMLPurifier/AttrDef/Enum.php new file mode 100644 index 0000000000..3246318f68 --- /dev/null +++ b/lib/htmlpurifier/HTMLPurifier/AttrDef/Enum.php @@ -0,0 +1,46 @@ +valid_values = array_flip($valid_values); + $this->case_sensitive = $case_sensitive; + } + + function validate($string, $config, &$context) { + $string = trim($string); + if (!$this->case_sensitive) { + $string = ctype_lower($string) ? $string : strtolower($string); + } + $result = isset($this->valid_values[$string]); + + return $result ? $string : false; + } + +} + +?> \ No newline at end of file diff --git a/lib/htmlpurifier/HTMLPurifier/AttrDef/HTML/ID.php b/lib/htmlpurifier/HTMLPurifier/AttrDef/HTML/ID.php new file mode 100644 index 0000000000..c8bf29913c --- /dev/null +++ b/lib/htmlpurifier/HTMLPurifier/AttrDef/HTML/ID.php @@ -0,0 +1,121 @@ +get('Attr', 'EnableID')) return false; + + $id = trim($id); // trim it first + + if ($id === '') return false; + + $prefix = $config->get('Attr', 'IDPrefix'); + if ($prefix !== '') { + $prefix .= $config->get('Attr', 'IDPrefixLocal'); + // prevent re-appending the prefix + if (strpos($id, $prefix) !== 0) $id = $prefix . $id; + } elseif ($config->get('Attr', 'IDPrefixLocal') !== '') { + trigger_error('%Attr.IDPrefixLocal cannot be used unless '. + '%Attr.IDPrefix is set', E_USER_WARNING); + } + + //if (!$this->ref) { + $id_accumulator =& $context->get('IDAccumulator'); + if (isset($id_accumulator->ids[$id])) return false; + //} + + // we purposely avoid using regex, hopefully this is faster + + if (ctype_alpha($id)) { + $result = true; + } else { + if (!ctype_alpha(@$id[0])) return false; + $trim = trim( // primitive style of regexps, I suppose + $id, + 'A..Za..z0..9:-._' + ); + $result = ($trim === ''); + } + + $regexp = $config->get('Attr', 'IDBlacklistRegexp'); + if ($regexp && preg_match($regexp, $id)) { + return false; + } + + if (/*!$this->ref && */$result) $id_accumulator->add($id); + + // if no change was made to the ID, return the result + // else, return the new id if stripping whitespace made it + // valid, or return false. + return $result ? $id : false; + + } + +} + +?> \ No newline at end of file diff --git a/lib/htmlpurifier/HTMLPurifier/AttrDef/HTML/Length.php b/lib/htmlpurifier/HTMLPurifier/AttrDef/HTML/Length.php new file mode 100644 index 0000000000..ac83295a03 --- /dev/null +++ b/lib/htmlpurifier/HTMLPurifier/AttrDef/HTML/Length.php @@ -0,0 +1,44 @@ + 100) return '100%'; + + return ((string) $points) . '%'; + + } + +} + +?> \ No newline at end of file diff --git a/lib/htmlpurifier/HTMLPurifier/AttrDef/HTML/LinkTypes.php b/lib/htmlpurifier/HTMLPurifier/AttrDef/HTML/LinkTypes.php new file mode 100644 index 0000000000..94a47ba92e --- /dev/null +++ b/lib/htmlpurifier/HTMLPurifier/AttrDef/HTML/LinkTypes.php @@ -0,0 +1,75 @@ + 'AllowedRel', + 'rev' => 'AllowedRev' + ); + + /** Name config attribute to pull. */ + var $name; + + function HTMLPurifier_AttrDef_HTML_LinkTypes($name) { + if (!isset($this->configLookup[$name])) { + trigger_error('Unrecognized attribute name for link '. + 'relationship.', E_USER_ERROR); + return; + } + $this->name = $this->configLookup[$name]; + } + + function validate($string, $config, &$context) { + + $allowed = $config->get('Attr', $this->name); + if (empty($allowed)) return false; + + $string = $this->parseCDATA($string); + $parts = explode(' ', $string); + + // lookup to prevent duplicates + $ret_lookup = array(); + foreach ($parts as $part) { + $part = strtolower(trim($part)); + if (!isset($allowed[$part])) continue; + $ret_lookup[$part] = true; + } + + if (empty($ret_lookup)) return false; + + $ret_array = array(); + foreach ($ret_lookup as $part => $bool) $ret_array[] = $part; + $string = implode(' ', $ret_array); + + return $string; + + } + +} + +?> \ No newline at end of file diff --git a/lib/htmlpurifier/HTMLPurifier/AttrDef/HTML/MultiLength.php b/lib/htmlpurifier/HTMLPurifier/AttrDef/HTML/MultiLength.php new file mode 100644 index 0000000000..f50259b6fd --- /dev/null +++ b/lib/htmlpurifier/HTMLPurifier/AttrDef/HTML/MultiLength.php @@ -0,0 +1,44 @@ + \ No newline at end of file diff --git a/lib/htmlpurifier/HTMLPurifier/AttrDef/HTML/Nmtokens.php b/lib/htmlpurifier/HTMLPurifier/AttrDef/HTML/Nmtokens.php new file mode 100644 index 0000000000..1eaeaa7e6a --- /dev/null +++ b/lib/htmlpurifier/HTMLPurifier/AttrDef/HTML/Nmtokens.php @@ -0,0 +1,51 @@ + \ No newline at end of file diff --git a/lib/htmlpurifier/HTMLPurifier/AttrDef/HTML/Pixels.php b/lib/htmlpurifier/HTMLPurifier/AttrDef/HTML/Pixels.php new file mode 100644 index 0000000000..4c29091254 --- /dev/null +++ b/lib/htmlpurifier/HTMLPurifier/AttrDef/HTML/Pixels.php @@ -0,0 +1,37 @@ + + // WARNING, above link WILL crash you if you're using Windows + + if ($int > 1200) return '1200'; + + return (string) $int; + + } + +} + +?> \ No newline at end of file diff --git a/lib/htmlpurifier/HTMLPurifier/AttrDef/Integer.php b/lib/htmlpurifier/HTMLPurifier/AttrDef/Integer.php new file mode 100644 index 0000000000..d6953d6165 --- /dev/null +++ b/lib/htmlpurifier/HTMLPurifier/AttrDef/Integer.php @@ -0,0 +1,75 @@ +negative = $negative; + $this->zero = $zero; + $this->positive = $positive; + } + + function validate($integer, $config, &$context) { + + $integer = $this->parseCDATA($integer); + if ($integer === '') return false; + + // we could possibly simply typecast it to integer, but there are + // certain fringe cases that must not return an integer. + + // clip leading sign + if ( $this->negative && $integer[0] === '-' ) { + $digits = substr($integer, 1); + if ($digits === '0') $integer = '0'; // rm minus sign for zero + } elseif( $this->positive && $integer[0] === '+' ) { + $digits = $integer = substr($integer, 1); // rm unnecessary plus + } else { + $digits = $integer; + } + + // test if it's numeric + if (!ctype_digit($digits)) return false; + + // perform scope tests + if (!$this->zero && $integer == 0) return false; + if (!$this->positive && $integer > 0) return false; + if (!$this->negative && $integer < 0) return false; + + return $integer; + + } + +} + +?> \ No newline at end of file diff --git a/lib/htmlpurifier/HTMLPurifier/AttrDef/Lang.php b/lib/htmlpurifier/HTMLPurifier/AttrDef/Lang.php new file mode 100644 index 0000000000..72d67f643c --- /dev/null +++ b/lib/htmlpurifier/HTMLPurifier/AttrDef/Lang.php @@ -0,0 +1,75 @@ + 8 || !ctype_alnum($subtags[1])) { + return $new_string; + } + if (!ctype_lower($subtags[1])) $subtags[1] = strtolower($subtags[1]); + + $new_string .= '-' . $subtags[1]; + if ($num_subtags == 2) return $new_string; + + // process all other subtags, index 2 and up + for ($i = 2; $i < $num_subtags; $i++) { + $length = strlen($subtags[$i]); + if ($length == 0 || $length > 8 || !ctype_alnum($subtags[$i])) { + return $new_string; + } + if (!ctype_lower($subtags[$i])) { + $subtags[$i] = strtolower($subtags[$i]); + } + $new_string .= '-' . $subtags[$i]; + } + + return $new_string; + + } + +} + +?> \ No newline at end of file diff --git a/lib/htmlpurifier/HTMLPurifier/AttrDef/Text.php b/lib/htmlpurifier/HTMLPurifier/AttrDef/Text.php new file mode 100644 index 0000000000..eb2a24a711 --- /dev/null +++ b/lib/htmlpurifier/HTMLPurifier/AttrDef/Text.php @@ -0,0 +1,17 @@ +parseCDATA($string); + } + +} + +?> \ No newline at end of file diff --git a/lib/htmlpurifier/HTMLPurifier/AttrDef/URI.php b/lib/htmlpurifier/HTMLPurifier/AttrDef/URI.php new file mode 100644 index 0000000000..7102718136 --- /dev/null +++ b/lib/htmlpurifier/HTMLPurifier/AttrDef/URI.php @@ -0,0 +1,296 @@ +http://www.google.com/url?q=%s). '. + 'This prevents PageRank leaks, while being as transparent as possible '. + 'to users (you may also want to add some client side JavaScript to '. + 'override the text in the statusbar). Warning: many security experts '. + 'believe that this form of protection does not deter spam-bots. '. + 'You can also use this directive to redirect users to a splash page '. + 'telling them they are leaving your website. '. + 'This directive has been available since 1.3.0.' +); + +HTMLPurifier_ConfigSchema::define( + 'URI', 'HostBlacklist', array(), 'list', + 'List of strings that are forbidden in the host of any URI. Use it to '. + 'kill domain names of spam, etc. Note that it will catch anything in '. + 'the domain, so moo.com will catch moo.com.example.com. '. + 'This directive has been available since 1.3.0.' +); + +HTMLPurifier_ConfigSchema::define( + 'URI', 'Disable', false, 'bool', + 'Disables all URIs in all forms. Not sure why you\'d want to do that '. + '(after all, the Internet\'s founded on the notion of a hyperlink). '. + 'This directive has been available since 1.3.0.' +); +HTMLPurifier_ConfigSchema::defineAlias('Attr', 'DisableURI', 'URI', 'Disable'); + +/** + * Validates a URI as defined by RFC 3986. + * @note Scheme-specific mechanics deferred to HTMLPurifier_URIScheme + */ +class HTMLPurifier_AttrDef_URI extends HTMLPurifier_AttrDef +{ + + var $host; + var $PercentEncoder; + var $embeds_resource; + + /** + * @param $embeds_resource_resource Does the URI here result in an extra HTTP request? + */ + function HTMLPurifier_AttrDef_URI($embeds_resource = false) { + $this->host = new HTMLPurifier_AttrDef_URI_Host(); + $this->PercentEncoder = new HTMLPurifier_PercentEncoder(); + $this->embeds_resource = (bool) $embeds_resource; + } + + function validate($uri, $config, &$context) { + + // We'll write stack-based parsers later, for now, use regexps to + // get things working as fast as possible (irony) + + if ($config->get('URI', 'Disable')) return false; + + // parse as CDATA + $uri = $this->parseCDATA($uri); + + // fix up percent-encoding + $uri = $this->PercentEncoder->normalize($uri); + + // while it would be nice to use parse_url(), that's specifically + // for HTTP and thus won't work for our generic URI parsing + + // according to the RFC... (but this cuts corners, i.e. non-validating) + $r_URI = '!'. + '(([^:/?#<>\'"]+):)?'. // 2. Scheme + '(//([^/?#<>\'"]*))?'. // 4. Authority + '([^?#<>\'"]*)'. // 5. Path + '(\?([^#<>\'"]*))?'. // 7. Query + '(#([^<>\'"]*))?'. // 8. Fragment + '!'; + + $matches = array(); + $result = preg_match($r_URI, $uri, $matches); + + if (!$result) return false; // invalid URI + + // seperate out parts + $scheme = !empty($matches[1]) ? $matches[2] : null; + $authority = !empty($matches[3]) ? $matches[4] : null; + $path = $matches[5]; // always present, can be empty + $query = !empty($matches[6]) ? $matches[7] : null; + $fragment = !empty($matches[8]) ? $matches[9] : null; + + + + $registry =& HTMLPurifier_URISchemeRegistry::instance(); + if ($scheme !== null) { + // no need to validate the scheme's fmt since we do that when we + // retrieve the specific scheme object from the registry + $scheme = ctype_lower($scheme) ? $scheme : strtolower($scheme); + $scheme_obj = $registry->getScheme($scheme, $config, $context); + if (!$scheme_obj) return false; // invalid scheme, clean it out + } else { + $scheme_obj = $registry->getScheme( + $config->get('URI', 'DefaultScheme'), $config, $context + ); + } + + + // the URI we're processing embeds_resource a resource in the page, but the URI + // it references cannot be located + if ($this->embeds_resource && !$scheme_obj->browsable) { + return false; + } + + + if ($authority !== null) { + + // remove URI if it's absolute and we disabled externals or + // if it's absolute and embedded and we disabled external resources + unset($our_host); + if ( + $config->get('URI', 'DisableExternal') || + ( + $config->get('URI', 'DisableExternalResources') && + $this->embeds_resource + ) + ) { + $our_host = $config->get('URI', 'Host'); + if ($our_host === null) return false; + } + + $HEXDIG = '[A-Fa-f0-9]'; + $unreserved = 'A-Za-z0-9-._~'; // make sure you wrap with [] + $sub_delims = '!$&\'()'; // needs [] + $pct_encoded = "%$HEXDIG$HEXDIG"; + $r_userinfo = "(?:[$unreserved$sub_delims:]|$pct_encoded)*"; + $r_authority = "/^(($r_userinfo)@)?(\[[^\]]+\]|[^:]*)(:(\d*))?/"; + $matches = array(); + preg_match($r_authority, $authority, $matches); + // overloads regexp! + $userinfo = !empty($matches[1]) ? $matches[2] : null; + $host = !empty($matches[3]) ? $matches[3] : null; + $port = !empty($matches[4]) ? $matches[5] : null; + + // validate port + if ($port !== null) { + $port = (int) $port; + if ($port < 1 || $port > 65535) $port = null; + } + + $host = $this->host->validate($host, $config, $context); + if ($host === false) $host = null; + + if ($this->checkBlacklist($host, $config, $context)) return false; + + // more lenient absolute checking + if (isset($our_host)) { + $host_parts = array_reverse(explode('.', $host)); + // could be cached + $our_host_parts = array_reverse(explode('.', $our_host)); + foreach ($our_host_parts as $i => $discard) { + if (!isset($host_parts[$i])) return false; + if ($host_parts[$i] != $our_host_parts[$i]) return false; + } + } + + // userinfo and host are validated within the regexp + + } else { + $port = $host = $userinfo = null; + } + + + // query and fragment are quite simple in terms of definition: + // *( pchar / "/" / "?" ), so define their validation routines + // when we start fixing percent encoding + + + + // path gets to be validated against a hodge-podge of rules depending + // on the status of authority and scheme, but it's not that important, + // esp. since it won't be applicable to everyone + + + + // okay, now we defer execution to the subobject for more processing + // note that $fragment is omitted + list($userinfo, $host, $port, $path, $query) = + $scheme_obj->validateComponents( + $userinfo, $host, $port, $path, $query, $config, $context + ); + + + // reconstruct authority + $authority = null; + if (!is_null($userinfo) || !is_null($host) || !is_null($port)) { + $authority = ''; + if($userinfo !== null) $authority .= $userinfo . '@'; + $authority .= $host; + if($port !== null) $authority .= ':' . $port; + } + + // reconstruct the result + $result = ''; + if ($scheme !== null) $result .= "$scheme:"; + if ($authority !== null) $result .= "//$authority"; + $result .= $path; + if ($query !== null) $result .= "?$query"; + if ($fragment !== null) $result .= "#$fragment"; + + // munge if necessary + $munge = $config->get('URI', 'Munge'); + if (!empty($scheme_obj->browsable) && $munge !== null) { + if ($authority !== null) { + $result = str_replace('%s', rawurlencode($result), $munge); + } + } + + return $result; + + } + + /** + * Checks a host against an array blacklist + * @param $host Host to check + * @param $config HTMLPurifier_Config instance + * @param $context HTMLPurifier_Context instance + * @return bool Is spam? + */ + function checkBlacklist($host, &$config, &$context) { + $blacklist = $config->get('URI', 'HostBlacklist'); + if (!empty($blacklist)) { + foreach($blacklist as $blacklisted_host_fragment) { + if (strpos($host, $blacklisted_host_fragment) !== false) { + return true; + } + } + } + return false; + } + +} + +?> diff --git a/lib/htmlpurifier/HTMLPurifier/AttrDef/URI/Email.php b/lib/htmlpurifier/HTMLPurifier/AttrDef/URI/Email.php new file mode 100644 index 0000000000..80b8d367e1 --- /dev/null +++ b/lib/htmlpurifier/HTMLPurifier/AttrDef/URI/Email.php @@ -0,0 +1,17 @@ + \ No newline at end of file diff --git a/lib/htmlpurifier/HTMLPurifier/AttrDef/URI/Email/SimpleCheck.php b/lib/htmlpurifier/HTMLPurifier/AttrDef/URI/Email/SimpleCheck.php new file mode 100644 index 0000000000..e35b1b4b28 --- /dev/null +++ b/lib/htmlpurifier/HTMLPurifier/AttrDef/URI/Email/SimpleCheck.php @@ -0,0 +1,23 @@ +" + // that needs more percent encoding to be done + if ($string == '') return false; + $string = trim($string); + $result = preg_match('/^[A-Z0-9._%-]+@[A-Z0-9.-]+\.[A-Z]{2,4}$/i', $string); + return $result ? $string : false; + } + +} + +?> \ No newline at end of file diff --git a/lib/htmlpurifier/HTMLPurifier/AttrDef/URI/Host.php b/lib/htmlpurifier/HTMLPurifier/AttrDef/URI/Host.php new file mode 100644 index 0000000000..5344cdac25 --- /dev/null +++ b/lib/htmlpurifier/HTMLPurifier/AttrDef/URI/Host.php @@ -0,0 +1,54 @@ +ipv4 = new HTMLPurifier_AttrDef_URI_IPv4(); + $this->ipv6 = new HTMLPurifier_AttrDef_URI_IPv6(); + } + + function validate($string, $config, &$context) { + $length = strlen($string); + if ($string === '') return ''; + if ($length > 1 && $string[0] === '[' && $string[$length-1] === ']') { + //IPv6 + $ip = substr($string, 1, $length - 2); + $valid = $this->ipv6->validate($ip, $config, $context); + if ($valid === false) return false; + return '['. $valid . ']'; + } + + // need to do checks on unusual encodings too + $ipv4 = $this->ipv4->validate($string, $config, $context); + if ($ipv4 !== false) return $ipv4; + + // validate a domain name here, do filtering, etc etc etc + + // We could use this, but it would break I18N domain names + //$match = preg_match('/^[a-z0-9][\w\-\.]*[a-z0-9]$/i', $string); + //if (!$match) return false; + + return $string; + } + +} + +?> \ No newline at end of file diff --git a/lib/htmlpurifier/HTMLPurifier/AttrDef/URI/IPv4.php b/lib/htmlpurifier/HTMLPurifier/AttrDef/URI/IPv4.php new file mode 100644 index 0000000000..0730bbc8ac --- /dev/null +++ b/lib/htmlpurifier/HTMLPurifier/AttrDef/URI/IPv4.php @@ -0,0 +1,36 @@ +ip4 = "(?:{$oct}\\.{$oct}\\.{$oct}\\.{$oct})"; + } + + function validate($aIP, $config, &$context) { + + if (preg_match('#^' . $this->ip4 . '$#s', $aIP)) + { + return $aIP; + } + + return false; + + } + +} + +?> \ No newline at end of file diff --git a/lib/htmlpurifier/HTMLPurifier/AttrDef/URI/IPv6.php b/lib/htmlpurifier/HTMLPurifier/AttrDef/URI/IPv6.php new file mode 100644 index 0000000000..73f085e55e --- /dev/null +++ b/lib/htmlpurifier/HTMLPurifier/AttrDef/URI/IPv6.php @@ -0,0 +1,99 @@ +ip4 . '$#s', $aIP, $find)) + { + $aIP = substr($aIP, 0, 0-strlen($find[0])); + $ip = explode('.', $find[0]); + $ip = array_map('dechex', $ip); + $aIP .= $ip[0] . $ip[1] . ':' . $ip[2] . $ip[3]; + unset($find, $ip); + } + + // compression check + $aIP = explode('::', $aIP); + $c = count($aIP); + if ($c > 2) + { + return false; + } + elseif ($c == 2) + { + list($first, $second) = $aIP; + $first = explode(':', $first); + $second = explode(':', $second); + + if (count($first) + count($second) > 8) + { + return false; + } + + while(count($first) < 8) + { + array_push($first, '0'); + } + + array_splice($first, 8 - count($second), 8, $second); + $aIP = $first; + unset($first,$second); + } + else + { + $aIP = explode(':', $aIP[0]); + } + $c = count($aIP); + + if ($c != 8) + { + return false; + } + + // All the pieces should be 16-bit hex strings. Are they? + foreach ($aIP as $piece) + { + if (!preg_match('#^[0-9a-fA-F]{4}$#s', sprintf('%04s', $piece))) + { + return false; + } + } + + return $original; + + } + +} + +?> \ No newline at end of file diff --git a/lib/htmlpurifier/HTMLPurifier/AttrTransform.php b/lib/htmlpurifier/HTMLPurifier/AttrTransform.php new file mode 100644 index 0000000000..3513669ae1 --- /dev/null +++ b/lib/htmlpurifier/HTMLPurifier/AttrTransform.php @@ -0,0 +1,34 @@ + \ No newline at end of file diff --git a/lib/htmlpurifier/HTMLPurifier/AttrTransform/BdoDir.php b/lib/htmlpurifier/HTMLPurifier/AttrTransform/BdoDir.php new file mode 100644 index 0000000000..0ea5eb6dc2 --- /dev/null +++ b/lib/htmlpurifier/HTMLPurifier/AttrTransform/BdoDir.php @@ -0,0 +1,31 @@ +get('Attr', 'DefaultTextDir'); + return $attr; + } + +} + +?> \ No newline at end of file diff --git a/lib/htmlpurifier/HTMLPurifier/AttrTransform/BgColor.php b/lib/htmlpurifier/HTMLPurifier/AttrTransform/BgColor.php new file mode 100644 index 0000000000..abfd03427d --- /dev/null +++ b/lib/htmlpurifier/HTMLPurifier/AttrTransform/BgColor.php @@ -0,0 +1,28 @@ + \ No newline at end of file diff --git a/lib/htmlpurifier/HTMLPurifier/AttrTransform/Border.php b/lib/htmlpurifier/HTMLPurifier/AttrTransform/Border.php new file mode 100644 index 0000000000..0b745d3045 --- /dev/null +++ b/lib/htmlpurifier/HTMLPurifier/AttrTransform/Border.php @@ -0,0 +1,28 @@ + \ No newline at end of file diff --git a/lib/htmlpurifier/HTMLPurifier/AttrTransform/ImgRequired.php b/lib/htmlpurifier/HTMLPurifier/AttrTransform/ImgRequired.php new file mode 100644 index 0000000000..4ff356d889 --- /dev/null +++ b/lib/htmlpurifier/HTMLPurifier/AttrTransform/ImgRequired.php @@ -0,0 +1,50 @@ +get('Attr', 'DefaultInvalidImage'); + $src = false; + } + + if (!isset($attr['alt'])) { + if ($src) { + $attr['alt'] = basename($attr['src']); + } else { + $attr['alt'] = $config->get('Attr', 'DefaultInvalidImageAlt'); + } + } + + return $attr; + + } + +} + +?> \ No newline at end of file diff --git a/lib/htmlpurifier/HTMLPurifier/AttrTransform/Lang.php b/lib/htmlpurifier/HTMLPurifier/AttrTransform/Lang.php new file mode 100644 index 0000000000..acb1786ae9 --- /dev/null +++ b/lib/htmlpurifier/HTMLPurifier/AttrTransform/Lang.php @@ -0,0 +1,30 @@ + \ No newline at end of file diff --git a/lib/htmlpurifier/HTMLPurifier/AttrTransform/Length.php b/lib/htmlpurifier/HTMLPurifier/AttrTransform/Length.php new file mode 100644 index 0000000000..16d3d1d8ca --- /dev/null +++ b/lib/htmlpurifier/HTMLPurifier/AttrTransform/Length.php @@ -0,0 +1,33 @@ +name = $name; + $this->cssName = $css_name ? $css_name : $name; + } + + function transform($attr, $config, &$context) { + if (!isset($attr[$this->name])) return $attr; + $length = $attr[$this->name]; + unset($attr[$this->name]); + if(ctype_digit($length)) $length .= 'px'; + + $attr['style'] = isset($attr['style']) ? $attr['style'] : ''; + $attr['style'] = $this->cssName . ":$length;" . $attr['style']; + + return $attr; + } + +} + +?> \ No newline at end of file diff --git a/lib/htmlpurifier/HTMLPurifier/AttrTransform/Name.php b/lib/htmlpurifier/HTMLPurifier/AttrTransform/Name.php new file mode 100644 index 0000000000..0f815b69e3 --- /dev/null +++ b/lib/htmlpurifier/HTMLPurifier/AttrTransform/Name.php @@ -0,0 +1,31 @@ + \ No newline at end of file diff --git a/lib/htmlpurifier/HTMLPurifier/AttrTransform/TextAlign.php b/lib/htmlpurifier/HTMLPurifier/AttrTransform/TextAlign.php new file mode 100644 index 0000000000..09088fe176 --- /dev/null +++ b/lib/htmlpurifier/HTMLPurifier/AttrTransform/TextAlign.php @@ -0,0 +1,36 @@ + 1, + 'right' => 1, + 'center' => 1, + 'justify' => 1); + + if (!isset($values[$align])) { + return $attr; + } + + $attr['style'] = isset($attr['style']) ? $attr['style'] : ''; + $attr['style'] = "text-align:$align;" . $attr['style']; + + return $attr; + + } + +} + +?> \ No newline at end of file diff --git a/lib/htmlpurifier/HTMLPurifier/AttrTypes.php b/lib/htmlpurifier/HTMLPurifier/AttrTypes.php new file mode 100644 index 0000000000..e13d0d3005 --- /dev/null +++ b/lib/htmlpurifier/HTMLPurifier/AttrTypes.php @@ -0,0 +1,41 @@ +info['CDATA'] = new HTMLPurifier_AttrDef_Text(); + $this->info['ID'] = new HTMLPurifier_AttrDef_HTML_ID(); + $this->info['Length'] = new HTMLPurifier_AttrDef_HTML_Length(); + $this->info['MultiLength'] = new HTMLPurifier_AttrDef_HTML_MultiLength(); + $this->info['NMTOKENS'] = new HTMLPurifier_AttrDef_HTML_Nmtokens(); + $this->info['Pixels'] = new HTMLPurifier_AttrDef_HTML_Pixels(); + $this->info['Text'] = new HTMLPurifier_AttrDef_Text(); + $this->info['URI'] = new HTMLPurifier_AttrDef_URI(); + + // number is really a positive integer (one or more digits) + $this->info['Number'] = new HTMLPurifier_AttrDef_Integer(false, false, true); + } +} + +?> diff --git a/lib/htmlpurifier/HTMLPurifier/CSSDefinition.php b/lib/htmlpurifier/HTMLPurifier/CSSDefinition.php new file mode 100644 index 0000000000..5de49b69b3 --- /dev/null +++ b/lib/htmlpurifier/HTMLPurifier/CSSDefinition.php @@ -0,0 +1,213 @@ +info['text-align'] = new HTMLPurifier_AttrDef_Enum( + array('left', 'right', 'center', 'justify'), false); + + $border_style = + $this->info['border-bottom-style'] = + $this->info['border-right-style'] = + $this->info['border-left-style'] = + $this->info['border-top-style'] = new HTMLPurifier_AttrDef_Enum( + array('none', 'hidden', 'dotted', 'dashed', 'solid', 'double', + 'groove', 'ridge', 'inset', 'outset'), false); + + $this->info['border-style'] = new HTMLPurifier_AttrDef_CSS_Multiple($border_style); + + $this->info['clear'] = new HTMLPurifier_AttrDef_Enum( + array('none', 'left', 'right', 'both'), false); + $this->info['float'] = new HTMLPurifier_AttrDef_Enum( + array('none', 'left', 'right'), false); + $this->info['font-style'] = new HTMLPurifier_AttrDef_Enum( + array('normal', 'italic', 'oblique'), false); + $this->info['font-variant'] = new HTMLPurifier_AttrDef_Enum( + array('normal', 'small-caps'), false); + + $uri_or_none = new HTMLPurifier_AttrDef_CSS_Composite( + array( + new HTMLPurifier_AttrDef_Enum(array('none')), + new HTMLPurifier_AttrDef_CSS_URI() + ) + ); + + $this->info['list-style-position'] = new HTMLPurifier_AttrDef_Enum( + array('inside', 'outside'), false); + $this->info['list-style-type'] = new HTMLPurifier_AttrDef_Enum( + array('disc', 'circle', 'square', 'decimal', 'lower-roman', + 'upper-roman', 'lower-alpha', 'upper-alpha', 'none'), false); + $this->info['list-style-image'] = $uri_or_none; + + $this->info['list-style'] = new HTMLPurifier_AttrDef_CSS_ListStyle($config); + + $this->info['text-transform'] = new HTMLPurifier_AttrDef_Enum( + array('capitalize', 'uppercase', 'lowercase', 'none'), false); + $this->info['color'] = new HTMLPurifier_AttrDef_CSS_Color(); + + $this->info['background-image'] = $uri_or_none; + $this->info['background-repeat'] = new HTMLPurifier_AttrDef_Enum( + array('repeat', 'repeat-x', 'repeat-y', 'no-repeat') + ); + $this->info['background-attachment'] = new HTMLPurifier_AttrDef_Enum( + array('scroll', 'fixed') + ); + $this->info['background-position'] = new HTMLPurifier_AttrDef_CSS_BackgroundPosition(); + + $border_color = + $this->info['border-top-color'] = + $this->info['border-bottom-color'] = + $this->info['border-left-color'] = + $this->info['border-right-color'] = + $this->info['background-color'] = new HTMLPurifier_AttrDef_CSS_Composite(array( + new HTMLPurifier_AttrDef_Enum(array('transparent')), + new HTMLPurifier_AttrDef_CSS_Color() + )); + + $this->info['background'] = new HTMLPurifier_AttrDef_CSS_Background($config); + + $this->info['border-color'] = new HTMLPurifier_AttrDef_CSS_Multiple($border_color); + + $border_width = + $this->info['border-top-width'] = + $this->info['border-bottom-width'] = + $this->info['border-left-width'] = + $this->info['border-right-width'] = new HTMLPurifier_AttrDef_CSS_Composite(array( + new HTMLPurifier_AttrDef_Enum(array('thin', 'medium', 'thick')), + new HTMLPurifier_AttrDef_CSS_Length(true) //disallow negative + )); + + $this->info['border-width'] = new HTMLPurifier_AttrDef_CSS_Multiple($border_width); + + $this->info['letter-spacing'] = new HTMLPurifier_AttrDef_CSS_Composite(array( + new HTMLPurifier_AttrDef_Enum(array('normal')), + new HTMLPurifier_AttrDef_CSS_Length() + )); + + $this->info['word-spacing'] = new HTMLPurifier_AttrDef_CSS_Composite(array( + new HTMLPurifier_AttrDef_Enum(array('normal')), + new HTMLPurifier_AttrDef_CSS_Length() + )); + + $this->info['font-size'] = new HTMLPurifier_AttrDef_CSS_Composite(array( + new HTMLPurifier_AttrDef_Enum(array('xx-small', 'x-small', + 'small', 'medium', 'large', 'x-large', 'xx-large', + 'larger', 'smaller')), + new HTMLPurifier_AttrDef_CSS_Percentage(), + new HTMLPurifier_AttrDef_CSS_Length() + )); + + $this->info['line-height'] = new HTMLPurifier_AttrDef_CSS_Composite(array( + new HTMLPurifier_AttrDef_Enum(array('normal')), + new HTMLPurifier_AttrDef_CSS_Number(true), // no negatives + new HTMLPurifier_AttrDef_CSS_Length(true), + new HTMLPurifier_AttrDef_CSS_Percentage(true) + )); + + $margin = + $this->info['margin-top'] = + $this->info['margin-bottom'] = + $this->info['margin-left'] = + $this->info['margin-right'] = new HTMLPurifier_AttrDef_CSS_Composite(array( + new HTMLPurifier_AttrDef_CSS_Length(), + new HTMLPurifier_AttrDef_CSS_Percentage(), + new HTMLPurifier_AttrDef_Enum(array('auto')) + )); + + $this->info['margin'] = new HTMLPurifier_AttrDef_CSS_Multiple($margin); + + // non-negative + $padding = + $this->info['padding-top'] = + $this->info['padding-bottom'] = + $this->info['padding-left'] = + $this->info['padding-right'] = new HTMLPurifier_AttrDef_CSS_Composite(array( + new HTMLPurifier_AttrDef_CSS_Length(true), + new HTMLPurifier_AttrDef_CSS_Percentage(true) + )); + + $this->info['padding'] = new HTMLPurifier_AttrDef_CSS_Multiple($padding); + + $this->info['text-indent'] = new HTMLPurifier_AttrDef_CSS_Composite(array( + new HTMLPurifier_AttrDef_CSS_Length(), + new HTMLPurifier_AttrDef_CSS_Percentage() + )); + + $this->info['width'] = + $this->info['height'] = + new HTMLPurifier_AttrDef_CSS_Composite(array( + new HTMLPurifier_AttrDef_CSS_Length(true), + new HTMLPurifier_AttrDef_CSS_Percentage(true), + new HTMLPurifier_AttrDef_Enum(array('auto')) + )); + + $this->info['text-decoration'] = new HTMLPurifier_AttrDef_CSS_TextDecoration(); + + $this->info['font-family'] = new HTMLPurifier_AttrDef_CSS_FontFamily(); + + // this could use specialized code + $this->info['font-weight'] = new HTMLPurifier_AttrDef_Enum( + array('normal', 'bold', 'bolder', 'lighter', '100', '200', '300', + '400', '500', '600', '700', '800', '900'), false); + + // MUST be called after other font properties, as it references + // a CSSDefinition object + $this->info['font'] = new HTMLPurifier_AttrDef_CSS_Font($config); + + // same here + $this->info['border'] = + $this->info['border-bottom'] = + $this->info['border-top'] = + $this->info['border-left'] = + $this->info['border-right'] = new HTMLPurifier_AttrDef_CSS_Border($config); + + $this->info['border-collapse'] = new HTMLPurifier_AttrDef_Enum(array( + 'collapse', 'seperate')); + + $this->info['caption-side'] = new HTMLPurifier_AttrDef_Enum(array( + 'top', 'bottom')); + + $this->info['table-layout'] = new HTMLPurifier_AttrDef_Enum(array( + 'auto', 'fixed')); + + $this->info['vertical-align'] = new HTMLPurifier_AttrDef_CSS_Composite(array( + new HTMLPurifier_AttrDef_Enum(array('baseline', 'sub', 'super', + 'top', 'text-top', 'middle', 'bottom', 'text-bottom')), + new HTMLPurifier_AttrDef_CSS_Length(), + new HTMLPurifier_AttrDef_CSS_Percentage() + )); + + } + +} + +?> \ No newline at end of file diff --git a/lib/htmlpurifier/HTMLPurifier/ChildDef.php b/lib/htmlpurifier/HTMLPurifier/ChildDef.php new file mode 100644 index 0000000000..bed43cacd3 --- /dev/null +++ b/lib/htmlpurifier/HTMLPurifier/ChildDef.php @@ -0,0 +1,55 @@ + diff --git a/lib/htmlpurifier/HTMLPurifier/ChildDef/Chameleon.php b/lib/htmlpurifier/HTMLPurifier/ChildDef/Chameleon.php new file mode 100644 index 0000000000..afe0299fa7 --- /dev/null +++ b/lib/htmlpurifier/HTMLPurifier/ChildDef/Chameleon.php @@ -0,0 +1,51 @@ +inline = new HTMLPurifier_ChildDef_Optional($inline); + $this->block = new HTMLPurifier_ChildDef_Optional($block); + } + + function validateChildren($tokens_of_children, $config, &$context) { + if ($context->get('IsInline') === false) { + return $this->block->validateChildren( + $tokens_of_children, $config, $context); + } else { + return $this->inline->validateChildren( + $tokens_of_children, $config, $context); + } + } +} + +?> \ No newline at end of file diff --git a/lib/htmlpurifier/HTMLPurifier/ChildDef/Custom.php b/lib/htmlpurifier/HTMLPurifier/ChildDef/Custom.php new file mode 100644 index 0000000000..de18cd7070 --- /dev/null +++ b/lib/htmlpurifier/HTMLPurifier/ChildDef/Custom.php @@ -0,0 +1,75 @@ +dtd_regex = $dtd_regex; + $this->_compileRegex(); + } + /** + * Compiles the PCRE regex from a DTD regex ($dtd_regex to $_pcre_regex) + */ + function _compileRegex() { + $raw = str_replace(' ', '', $this->dtd_regex); + if ($raw{0} != '(') { + $raw = "($raw)"; + } + $reg = str_replace(',', ',?', $raw); + $reg = preg_replace('/([#a-zA-Z0-9_.-]+)/', '(,?\\0)', $reg); + $this->_pcre_regex = $reg; + } + function validateChildren($tokens_of_children, $config, &$context) { + $list_of_children = ''; + $nesting = 0; // depth into the nest + foreach ($tokens_of_children as $token) { + if (!empty($token->is_whitespace)) continue; + + $is_child = ($nesting == 0); // direct + + if ($token->type == 'start') { + $nesting++; + } elseif ($token->type == 'end') { + $nesting--; + } + + if ($is_child) { + $list_of_children .= $token->name . ','; + } + } + $list_of_children = rtrim($list_of_children, ','); + + $okay = + preg_match( + '/^'.$this->_pcre_regex.'$/', + $list_of_children + ); + + return (bool) $okay; + } +} + +?> \ No newline at end of file diff --git a/lib/htmlpurifier/HTMLPurifier/ChildDef/Empty.php b/lib/htmlpurifier/HTMLPurifier/ChildDef/Empty.php new file mode 100644 index 0000000000..1ab4fdd657 --- /dev/null +++ b/lib/htmlpurifier/HTMLPurifier/ChildDef/Empty.php @@ -0,0 +1,22 @@ + \ No newline at end of file diff --git a/lib/htmlpurifier/HTMLPurifier/ChildDef/Optional.php b/lib/htmlpurifier/HTMLPurifier/ChildDef/Optional.php new file mode 100644 index 0000000000..cc8883263e --- /dev/null +++ b/lib/htmlpurifier/HTMLPurifier/ChildDef/Optional.php @@ -0,0 +1,23 @@ + \ No newline at end of file diff --git a/lib/htmlpurifier/HTMLPurifier/ChildDef/Required.php b/lib/htmlpurifier/HTMLPurifier/ChildDef/Required.php new file mode 100644 index 0000000000..c6f706e29a --- /dev/null +++ b/lib/htmlpurifier/HTMLPurifier/ChildDef/Required.php @@ -0,0 +1,107 @@ + $x) { + $elements[$i] = true; + if (empty($i)) unset($elements[$i]); + } + } + $this->elements = $elements; + $this->gen = new HTMLPurifier_Generator(); + } + var $allow_empty = false; + var $type = 'required'; + function validateChildren($tokens_of_children, $config, &$context) { + // if there are no tokens, delete parent node + if (empty($tokens_of_children)) return false; + + // the new set of children + $result = array(); + + // current depth into the nest + $nesting = 0; + + // whether or not we're deleting a node + $is_deleting = false; + + // whether or not parsed character data is allowed + // this controls whether or not we silently drop a tag + // or generate escaped HTML from it + $pcdata_allowed = isset($this->elements['#PCDATA']); + + // a little sanity check to make sure it's not ALL whitespace + $all_whitespace = true; + + // some configuration + $escape_invalid_children = $config->get('Core', 'EscapeInvalidChildren'); + + foreach ($tokens_of_children as $token) { + if (!empty($token->is_whitespace)) { + $result[] = $token; + continue; + } + $all_whitespace = false; // phew, we're not talking about whitespace + + $is_child = ($nesting == 0); + + if ($token->type == 'start') { + $nesting++; + } elseif ($token->type == 'end') { + $nesting--; + } + + if ($is_child) { + $is_deleting = false; + if (!isset($this->elements[$token->name])) { + $is_deleting = true; + if ($pcdata_allowed && $token->type == 'text') { + $result[] = $token; + } elseif ($pcdata_allowed && $escape_invalid_children) { + $result[] = new HTMLPurifier_Token_Text( + $this->gen->generateFromToken($token, $config) + ); + } + continue; + } + } + if (!$is_deleting || ($pcdata_allowed && $token->type == 'text')) { + $result[] = $token; + } elseif ($pcdata_allowed && $escape_invalid_children) { + $result[] = + new HTMLPurifier_Token_Text( + $this->gen->generateFromToken( $token, $config ) + ); + } else { + // drop silently + } + } + if (empty($result)) return false; + if ($all_whitespace) return false; + if ($tokens_of_children == $result) return true; + return $result; + } +} + +?> \ No newline at end of file diff --git a/lib/htmlpurifier/HTMLPurifier/ChildDef/StrictBlockquote.php b/lib/htmlpurifier/HTMLPurifier/ChildDef/StrictBlockquote.php new file mode 100644 index 0000000000..9280a9f50a --- /dev/null +++ b/lib/htmlpurifier/HTMLPurifier/ChildDef/StrictBlockquote.php @@ -0,0 +1,76 @@ +getHTMLDefinition(); + if (!$this->init) { + // allow all inline elements + $this->real_elements = $this->elements; + $this->fake_elements = $def->info_content_sets['Flow']; + $this->fake_elements['#PCDATA'] = true; + $this->init = true; + } + + // trick the parent class into thinking it allows more + $this->elements = $this->fake_elements; + $result = parent::validateChildren($tokens_of_children, $config, $context); + $this->elements = $this->real_elements; + + if ($result === false) return array(); + if ($result === true) $result = $tokens_of_children; + + $block_wrap_start = new HTMLPurifier_Token_Start($def->info_block_wrapper); + $block_wrap_end = new HTMLPurifier_Token_End( $def->info_block_wrapper); + $is_inline = false; + $depth = 0; + $ret = array(); + + // assuming that there are no comment tokens + foreach ($result as $i => $token) { + $token = $result[$i]; + // ifs are nested for readability + if (!$is_inline) { + if (!$depth) { + if ( + $token->type == 'text' || + !isset($this->elements[$token->name]) + ) { + $is_inline = true; + $ret[] = $block_wrap_start; + } + } + } else { + if (!$depth) { + // starting tokens have been inline text / empty + if ($token->type == 'start' || $token->type == 'empty') { + if (isset($this->elements[$token->name])) { + // ended + $ret[] = $block_wrap_end; + $is_inline = false; + } + } + } + } + $ret[] = $token; + if ($token->type == 'start') $depth++; + if ($token->type == 'end') $depth--; + } + if ($is_inline) $ret[] = $block_wrap_end; + return $ret; + } +} + +?> \ No newline at end of file diff --git a/lib/htmlpurifier/HTMLPurifier/ChildDef/Table.php b/lib/htmlpurifier/HTMLPurifier/ChildDef/Table.php new file mode 100644 index 0000000000..3534cdd0a6 --- /dev/null +++ b/lib/htmlpurifier/HTMLPurifier/ChildDef/Table.php @@ -0,0 +1,142 @@ +type == 'start') { + $nesting++; + } elseif ($token->type == 'end') { + $nesting--; + } + + // handle node collection + if ($is_collecting) { + if ($is_child) { + // okay, let's stash the tokens away + // first token tells us the type of the collection + switch ($collection[$tag_index]->name) { + case 'tr': + case 'tbody': + $content[] = $collection; + break; + case 'caption': + if ($caption !== false) break; + $caption = $collection; + break; + case 'thead': + case 'tfoot': + // access the appropriate variable, $thead or $tfoot + $var = $collection[$tag_index]->name; + if ($$var === false) { + $$var = $collection; + } else { + // transmutate the first and less entries into + // tbody tags, and then put into content + $collection[$tag_index]->name = 'tbody'; + $collection[count($collection)-1]->name = 'tbody'; + $content[] = $collection; + } + break; + case 'colgroup': + $cols[] = $collection; + break; + } + $collection = array(); + $is_collecting = false; + $tag_index = 0; + } else { + // add the node to the collection + $collection[] = $token; + } + } + + // terminate + if ($token === false) break; + + if ($is_child) { + // determine what we're dealing with + if ($token->name == 'col') { + // the only empty tag in the possie, we can handle it + // immediately + $cols[] = array_merge($collection, array($token)); + $collection = array(); + $tag_index = 0; + continue; + } + switch($token->name) { + case 'caption': + case 'colgroup': + case 'thead': + case 'tfoot': + case 'tbody': + case 'tr': + $is_collecting = true; + $collection[] = $token; + continue; + default: + if ($token->type == 'text' && $token->is_whitespace) { + $collection[] = $token; + $tag_index++; + } + continue; + } + } + } + + if (empty($content)) return false; + + $ret = array(); + if ($caption !== false) $ret = array_merge($ret, $caption); + if ($cols !== false) foreach ($cols as $token_array) $ret = array_merge($ret, $token_array); + if ($thead !== false) $ret = array_merge($ret, $thead); + if ($tfoot !== false) $ret = array_merge($ret, $tfoot); + foreach ($content as $token_array) $ret = array_merge($ret, $token_array); + if (!empty($collection) && $is_collecting == false){ + // grab the trailing space + $ret = array_merge($ret, $collection); + } + + array_pop($tokens_of_children); // remove phantom token + + return ($ret === $tokens_of_children) ? true : $ret; + + } +} + +?> \ No newline at end of file diff --git a/lib/htmlpurifier/HTMLPurifier/Config.php b/lib/htmlpurifier/HTMLPurifier/Config.php new file mode 100644 index 0000000000..c94e01f636 --- /dev/null +++ b/lib/htmlpurifier/HTMLPurifier/Config.php @@ -0,0 +1,222 @@ +conf = $definition->defaults; // set up, copy in defaults + $this->def = $definition; // keep a copy around for checking + } + + /** + * Convenience constructor that creates a config object based on a mixed var + * @static + * @param mixed $config Variable that defines the state of the config + * object. Can be: a HTMLPurifier_Config() object, + * an array of directives based on loadArray(), + * or a string filename of an ini file. + * @return Configured HTMLPurifier_Config object + */ + function create($config) { + if (is_a($config, 'HTMLPurifier_Config')) return $config; + $ret = HTMLPurifier_Config::createDefault(); + if (is_string($config)) $ret->loadIni($config); + elseif (is_array($config)) $ret->loadArray($config); + return $ret; + } + + /** + * Convenience constructor that creates a default configuration object. + * @static + * @return Default HTMLPurifier_Config object. + */ + function createDefault() { + $definition =& HTMLPurifier_ConfigSchema::instance(); + $config = new HTMLPurifier_Config($definition); + return $config; + } + + /** + * Retreives a value from the configuration. + * @param $namespace String namespace + * @param $key String key + */ + function get($namespace, $key, $from_alias = false) { + if (!isset($this->def->info[$namespace][$key])) { + trigger_error('Cannot retrieve value of undefined directive', + E_USER_WARNING); + return; + } + if ($this->def->info[$namespace][$key]->class == 'alias') { + trigger_error('Cannot get value from aliased directive, use real name', + E_USER_ERROR); + return; + } + return $this->conf[$namespace][$key]; + } + + /** + * Retreives an array of directives to values from a given namespace + * @param $namespace String namespace + */ + function getBatch($namespace) { + if (!isset($this->def->info[$namespace])) { + trigger_error('Cannot retrieve undefined namespace', + E_USER_WARNING); + return; + } + return $this->conf[$namespace]; + } + + /** + * Sets a value to configuration. + * @param $namespace String namespace + * @param $key String key + * @param $value Mixed value + */ + function set($namespace, $key, $value, $from_alias = false) { + if (!isset($this->def->info[$namespace][$key])) { + trigger_error('Cannot set undefined directive to value', + E_USER_WARNING); + return; + } + if ($this->def->info[$namespace][$key]->class == 'alias') { + if ($from_alias) { + trigger_error('Double-aliases not allowed, please fix '. + 'ConfigSchema bug'); + } + $this->set($this->def->info[$namespace][$key]->namespace, + $this->def->info[$namespace][$key]->name, + $value, true); + return; + } + $value = $this->def->validate( + $value, + $this->def->info[$namespace][$key]->type, + $this->def->info[$namespace][$key]->allow_null + ); + if (is_string($value)) { + // resolve value alias if defined + if (isset($this->def->info[$namespace][$key]->aliases[$value])) { + $value = $this->def->info[$namespace][$key]->aliases[$value]; + } + if ($this->def->info[$namespace][$key]->allowed !== true) { + // check to see if the value is allowed + if (!isset($this->def->info[$namespace][$key]->allowed[$value])) { + trigger_error('Value not supported', E_USER_WARNING); + return; + } + } + } + if ($this->def->isError($value)) { + trigger_error('Value is of invalid type', E_USER_WARNING); + return; + } + $this->conf[$namespace][$key] = $value; + if ($namespace == 'HTML' || $namespace == 'Attr') { + // reset HTML definition if relevant attributes changed + $this->html_definition = null; + } + if ($namespace == 'CSS') { + $this->css_definition = null; + } + } + + /** + * Retrieves reference to the HTML definition. + * @param $raw Return a copy that has not been setup yet. Must be + * called before it's been setup, otherwise won't work. + */ + function &getHTMLDefinition($raw = false) { + if ( + empty($this->html_definition) || // hasn't ever been setup + ($raw && $this->html_definition->setup) // requesting new one + ) { + $this->html_definition = new HTMLPurifier_HTMLDefinition($this); + if ($raw) return $this->html_definition; // no setup! + } + if (!$this->html_definition->setup) $this->html_definition->setup(); + return $this->html_definition; + } + + /** + * Retrieves reference to the CSS definition + */ + function &getCSSDefinition() { + if ($this->css_definition === null) { + $this->css_definition = new HTMLPurifier_CSSDefinition(); + $this->css_definition->setup($this); + } + return $this->css_definition; + } + + /** + * Loads configuration values from an array with the following structure: + * Namespace.Directive => Value + * @param $config_array Configuration associative array + */ + function loadArray($config_array) { + foreach ($config_array as $key => $value) { + $key = str_replace('_', '.', $key); + if (strpos($key, '.') !== false) { + // condensed form + list($namespace, $directive) = explode('.', $key); + $this->set($namespace, $directive, $value); + } else { + $namespace = $key; + $namespace_values = $value; + foreach ($namespace_values as $directive => $value) { + $this->set($namespace, $directive, $value); + } + } + } + } + + /** + * Loads configuration values from an ini file + * @param $filename Name of ini file + */ + function loadIni($filename) { + $array = parse_ini_file($filename, true); + $this->loadArray($array); + } + +} + +?> diff --git a/lib/htmlpurifier/HTMLPurifier/ConfigDef.php b/lib/htmlpurifier/HTMLPurifier/ConfigDef.php new file mode 100644 index 0000000000..b92640dc61 --- /dev/null +++ b/lib/htmlpurifier/HTMLPurifier/ConfigDef.php @@ -0,0 +1,10 @@ + \ No newline at end of file diff --git a/lib/htmlpurifier/HTMLPurifier/ConfigDef/Directive.php b/lib/htmlpurifier/HTMLPurifier/ConfigDef/Directive.php new file mode 100644 index 0000000000..39026540b3 --- /dev/null +++ b/lib/htmlpurifier/HTMLPurifier/ConfigDef/Directive.php @@ -0,0 +1,74 @@ +type = $type; + if ($descriptions !== null) $this->descriptions = $descriptions; + if ( $allow_null !== null) $this->allow_null = $allow_null; + if ( $allowed !== null) $this->allowed = $allowed; + if ( $aliases !== null) $this->aliases = $aliases; + } + + /** + * Allowed type of the directive. Values are: + * - string + * - istring (case insensitive string) + * - int + * - float + * - bool + * - lookup (array of value => true) + * - list (regular numbered index array) + * - hash (array of key => value) + * - mixed (anything goes) + */ + var $type = 'mixed'; + + /** + * Plaintext descriptions of the configuration entity is. Organized by + * file and line number, so multiple descriptions are allowed. + */ + var $descriptions = array(); + + /** + * Is null allowed? Has no effect for mixed type. + * @bool + */ + var $allow_null = false; + + /** + * Lookup table of allowed values of the element, bool true if all allowed. + */ + var $allowed = true; + + /** + * Hash of value aliases, i.e. values that are equivalent. + */ + var $aliases = array(); + + /** + * Adds a description to the array + */ + function addDescription($file, $line, $description) { + if (!isset($this->descriptions[$file])) $this->descriptions[$file] = array(); + $this->descriptions[$file][$line] = $description; + } + +} + +?> \ No newline at end of file diff --git a/lib/htmlpurifier/HTMLPurifier/ConfigDef/DirectiveAlias.php b/lib/htmlpurifier/HTMLPurifier/ConfigDef/DirectiveAlias.php new file mode 100644 index 0000000000..81a4451413 --- /dev/null +++ b/lib/htmlpurifier/HTMLPurifier/ConfigDef/DirectiveAlias.php @@ -0,0 +1,27 @@ +namespace = $namespace; + $this->name = $name; + } +} + +?> \ No newline at end of file diff --git a/lib/htmlpurifier/HTMLPurifier/ConfigDef/Namespace.php b/lib/htmlpurifier/HTMLPurifier/ConfigDef/Namespace.php new file mode 100644 index 0000000000..f53892b47e --- /dev/null +++ b/lib/htmlpurifier/HTMLPurifier/ConfigDef/Namespace.php @@ -0,0 +1,23 @@ +description = $description; + } + + var $class = 'namespace'; + + /** + * String description of what kinds of directives go in this namespace. + */ + var $description; + +} + +?> \ No newline at end of file diff --git a/lib/htmlpurifier/HTMLPurifier/ConfigSchema.php b/lib/htmlpurifier/HTMLPurifier/ConfigSchema.php new file mode 100644 index 0000000000..9f1f3e3eb3 --- /dev/null +++ b/lib/htmlpurifier/HTMLPurifier/ConfigSchema.php @@ -0,0 +1,386 @@ + 'String', + 'istring' => 'Case-insensitive string', + 'int' => 'Integer', + 'float' => 'Float', + 'bool' => 'Boolean', + 'lookup' => 'Lookup array', + 'list' => 'Array list', + 'hash' => 'Associative array', + 'mixed' => 'Mixed' + ); + + /** + * Initializes the default namespaces. + */ + function initialize() { + $this->defineNamespace('Core', 'Core features that are always available.'); + $this->defineNamespace('Attr', 'Features regarding attribute validation.'); + $this->defineNamespace('URI', 'Features regarding Uniform Resource Identifiers.'); + $this->defineNamespace('HTML', 'Configuration regarding allowed HTML.'); + $this->defineNamespace('CSS', 'Configuration regarding allowed CSS.'); + $this->defineNamespace('Test', 'Developer testing configuration for our unit tests.'); + } + + /** + * Retrieves an instance of the application-wide configuration definition. + * @static + */ + function &instance($prototype = null) { + static $instance; + if ($prototype !== null) { + $instance = $prototype; + } elseif ($instance === null || $prototype === true) { + $instance = new HTMLPurifier_ConfigSchema(); + $instance->initialize(); + } + return $instance; + } + + /** + * Defines a directive for configuration + * @static + * @warning Will fail of directive's namespace is defined + * @param $namespace Namespace the directive is in + * @param $name Key of directive + * @param $default Default value of directive + * @param $type Allowed type of the directive. See + * HTMLPurifier_DirectiveDef::$type for allowed values + * @param $description Description of directive for documentation + */ + function define( + $namespace, $name, $default, $type, + $description + ) { + $def =& HTMLPurifier_ConfigSchema::instance(); + if (!isset($def->info[$namespace])) { + trigger_error('Cannot define directive for undefined namespace', + E_USER_ERROR); + return; + } + if (!ctype_alnum($name)) { + trigger_error('Directive name must be alphanumeric', + E_USER_ERROR); + return; + } + if (empty($description)) { + trigger_error('Description must be non-empty', + E_USER_ERROR); + return; + } + if (isset($def->info[$namespace][$name])) { + if ( + $def->info[$namespace][$name]->type !== $type || + $def->defaults[$namespace][$name] !== $default + ) { + trigger_error('Inconsistent default or type, cannot redefine'); + return; + } + } else { + // process modifiers + $type_values = explode('/', $type, 2); + $type = $type_values[0]; + $modifier = isset($type_values[1]) ? $type_values[1] : false; + $allow_null = ($modifier === 'null'); + + if (!isset($def->types[$type])) { + trigger_error('Invalid type for configuration directive', + E_USER_ERROR); + return; + } + $default = $def->validate($default, $type, $allow_null); + if ($def->isError($default)) { + trigger_error('Default value does not match directive type', + E_USER_ERROR); + return; + } + $def->info[$namespace][$name] = + new HTMLPurifier_ConfigDef_Directive(); + $def->info[$namespace][$name]->type = $type; + $def->info[$namespace][$name]->allow_null = $allow_null; + $def->defaults[$namespace][$name] = $default; + } + $backtrace = debug_backtrace(); + $file = $def->mungeFilename($backtrace[0]['file']); + $line = $backtrace[0]['line']; + $def->info[$namespace][$name]->addDescription($file,$line,$description); + } + + /** + * Defines a namespace for directives to be put into. + * @static + * @param $namespace Namespace's name + * @param $description Description of the namespace + */ + function defineNamespace($namespace, $description) { + $def =& HTMLPurifier_ConfigSchema::instance(); + if (isset($def->info[$namespace])) { + trigger_error('Cannot redefine namespace', E_USER_ERROR); + return; + } + if (!ctype_alnum($namespace)) { + trigger_error('Namespace name must be alphanumeric', + E_USER_ERROR); + return; + } + if (empty($description)) { + trigger_error('Description must be non-empty', + E_USER_ERROR); + return; + } + $def->info[$namespace] = array(); + $def->info_namespace[$namespace] = new HTMLPurifier_ConfigDef_Namespace(); + $def->info_namespace[$namespace]->description = $description; + $def->defaults[$namespace] = array(); + } + + /** + * Defines a directive value alias. + * + * Directive value aliases are convenient for developers because it lets + * them set a directive to several values and get the same result. + * @static + * @param $namespace Directive's namespace + * @param $name Name of Directive + * @param $alias Name of aliased value + * @param $real Value aliased value will be converted into + */ + function defineValueAliases($namespace, $name, $aliases) { + $def =& HTMLPurifier_ConfigSchema::instance(); + if (!isset($def->info[$namespace][$name])) { + trigger_error('Cannot set value alias for non-existant directive', + E_USER_ERROR); + return; + } + foreach ($aliases as $alias => $real) { + if (!$def->info[$namespace][$name] !== true && + !isset($def->info[$namespace][$name]->allowed[$real]) + ) { + trigger_error('Cannot define alias to value that is not allowed', + E_USER_ERROR); + return; + } + if (isset($def->info[$namespace][$name]->allowed[$alias])) { + trigger_error('Cannot define alias over allowed value', + E_USER_ERROR); + return; + } + $def->info[$namespace][$name]->aliases[$alias] = $real; + } + } + + /** + * Defines a set of allowed values for a directive. + * @static + * @param $namespace Namespace of directive + * @param $name Name of directive + * @param $allowed_values Arraylist of allowed values + */ + function defineAllowedValues($namespace, $name, $allowed_values) { + $def =& HTMLPurifier_ConfigSchema::instance(); + if (!isset($def->info[$namespace][$name])) { + trigger_error('Cannot define allowed values for undefined directive', + E_USER_ERROR); + return; + } + $directive =& $def->info[$namespace][$name]; + $type = $directive->type; + if ($type != 'string' && $type != 'istring') { + trigger_error('Cannot define allowed values for directive whose type is not string', + E_USER_ERROR); + return; + } + if ($directive->allowed === true) { + $directive->allowed = array(); + } + foreach ($allowed_values as $value) { + $directive->allowed[$value] = true; + } + if ($def->defaults[$namespace][$name] !== null && + !isset($directive->allowed[$def->defaults[$namespace][$name]])) { + trigger_error('Default value must be in allowed range of variables', + E_USER_ERROR); + $directive->allowed = true; // undo undo! + return; + } + } + + /** + * Defines a directive alias for backwards compatibility + * @static + * @param $namespace + * @param $name Directive that will be aliased + * @param $new_namespace + * @param $new_name Directive that the alias will be to + */ + function defineAlias($namespace, $name, $new_namespace, $new_name) { + $def =& HTMLPurifier_ConfigSchema::instance(); + if (!isset($def->info[$namespace])) { + trigger_error('Cannot define directive alias in undefined namespace', + E_USER_ERROR); + return; + } + if (!ctype_alnum($name)) { + trigger_error('Directive name must be alphanumeric', + E_USER_ERROR); + return; + } + if (isset($def->info[$namespace][$name])) { + trigger_error('Cannot define alias over directive', + E_USER_ERROR); + return; + } + if (!isset($def->info[$new_namespace][$new_name])) { + trigger_error('Cannot define alias to undefined directive', + E_USER_ERROR); + return; + } + if ($def->info[$new_namespace][$new_name]->class == 'alias') { + trigger_error('Cannot define alias to alias', + E_USER_ERROR); + return; + } + $def->info[$namespace][$name] = + new HTMLPurifier_ConfigDef_DirectiveAlias( + $new_namespace, $new_name); + } + + /** + * Validate a variable according to type. Return null if invalid. + */ + function validate($var, $type, $allow_null = false) { + if (!isset($this->types[$type])) { + trigger_error('Invalid type', E_USER_ERROR); + return; + } + if ($allow_null && $var === null) return null; + switch ($type) { + case 'mixed': + return $var; + case 'istring': + case 'string': + if (!is_string($var)) break; + if ($type === 'istring') $var = strtolower($var); + return $var; + case 'int': + if (is_string($var) && ctype_digit($var)) $var = (int) $var; + elseif (!is_int($var)) break; + return $var; + case 'float': + if (is_string($var) && is_numeric($var)) $var = (float) $var; + elseif (!is_float($var)) break; + return $var; + case 'bool': + if (is_int($var) && ($var === 0 || $var === 1)) { + $var = (bool) $var; + } elseif (is_string($var)) { + if ($var == 'on' || $var == 'true' || $var == '1') { + $var = true; + } elseif ($var == 'off' || $var == 'false' || $var == '0') { + $var = false; + } else { + break; + } + } elseif (!is_bool($var)) break; + return $var; + case 'list': + case 'hash': + case 'lookup': + if (is_string($var)) { + // simplistic string to array method that only works + // for simple lists of tag names or alphanumeric characters + $var = explode(',',$var); + // remove spaces + foreach ($var as $i => $j) $var[$i] = trim($j); + } + if (!is_array($var)) break; + $keys = array_keys($var); + if ($keys === array_keys($keys)) { + if ($type == 'list') return $var; + elseif ($type == 'lookup') { + $new = array(); + foreach ($var as $key) { + $new[$key] = true; + } + return $new; + } else break; + } + if ($type === 'lookup') { + foreach ($var as $key => $value) { + $var[$key] = true; + } + } + return $var; + } + $error = new HTMLPurifier_Error(); + return $error; + } + + /** + * Takes an absolute path and munges it into a more manageable relative path + */ + function mungeFilename($filename) { + $offset = strrpos($filename, 'HTMLPurifier'); + $filename = substr($filename, $offset); + $filename = str_replace('\\', '/', $filename); + return $filename; + } + + /** + * Checks if var is an HTMLPurifier_Error object + */ + function isError($var) { + if (!is_object($var)) return false; + if (!is_a($var, 'HTMLPurifier_Error')) return false; + return true; + } +} + +?> diff --git a/lib/htmlpurifier/HTMLPurifier/ContentSets.php b/lib/htmlpurifier/HTMLPurifier/ContentSets.php new file mode 100644 index 0000000000..de5c532e18 --- /dev/null +++ b/lib/htmlpurifier/HTMLPurifier/ContentSets.php @@ -0,0 +1,148 @@ + true) indexed by name. + * @note This is in HTMLPurifier_HTMLDefinition->info_content_sets + * @public + */ + var $lookup = array(); + + /** + * Synchronized list of defined content sets (keys of info) + */ + var $keys = array(); + /** + * Synchronized list of defined content values (values of info) + */ + var $values = array(); + + /** + * Merges in module's content sets, expands identifiers in the content + * sets and populates the keys, values and lookup member variables. + * @param $modules List of HTMLPurifier_HTMLModule + */ + function HTMLPurifier_ContentSets($modules) { + if (!is_array($modules)) $modules = array($modules); + // populate content_sets based on module hints + // sorry, no way of overloading + foreach ($modules as $module_i => $module) { + foreach ($module->content_sets as $key => $value) { + if (isset($this->info[$key])) { + // add it into the existing content set + $this->info[$key] = $this->info[$key] . ' | ' . $value; + } else { + $this->info[$key] = $value; + } + } + } + // perform content_set expansions + $this->keys = array_keys($this->info); + foreach ($this->info as $i => $set) { + // only performed once, so infinite recursion is not + // a problem + $this->info[$i] = + str_replace( + $this->keys, + // must be recalculated each time due to + // changing substitutions + array_values($this->info), + $set); + } + $this->values = array_values($this->info); + + // generate lookup tables + foreach ($this->info as $name => $set) { + $this->lookup[$name] = $this->convertToLookup($set); + } + } + + /** + * Accepts a definition; generates and assigns a ChildDef for it + * @param $def HTMLPurifier_ElementDef reference + * @param $module Module that defined the ElementDef + */ + function generateChildDef(&$def, $module) { + if (!empty($def->child)) return; // already done! + $content_model = $def->content_model; + if (is_string($content_model)) { + $def->content_model = str_replace( + $this->keys, $this->values, $content_model); + } + $def->child = $this->getChildDef($def, $module); + } + + /** + * Instantiates a ChildDef based on content_model and content_model_type + * member variables in HTMLPurifier_ElementDef + * @note This will also defer to modules for custom HTMLPurifier_ChildDef + * subclasses that need content set expansion + * @param $def HTMLPurifier_ElementDef to have ChildDef extracted + * @return HTMLPurifier_ChildDef corresponding to ElementDef + */ + function getChildDef($def, $module) { + $value = $def->content_model; + if (is_object($value)) { + trigger_error( + 'Literal object child definitions should be stored in '. + 'ElementDef->child not ElementDef->content_model', + E_USER_NOTICE + ); + return $value; + } + switch ($def->content_model_type) { + case 'required': + return new HTMLPurifier_ChildDef_Required($value); + case 'optional': + return new HTMLPurifier_ChildDef_Optional($value); + case 'empty': + return new HTMLPurifier_ChildDef_Empty(); + case 'custom': + return new HTMLPurifier_ChildDef_Custom($value); + } + // defer to its module + $return = false; + if ($module->defines_child_def) { // save a func call + $return = $module->getChildDef($def); + } + if ($return !== false) return $return; + // error-out + trigger_error( + 'Could not determine which ChildDef class to instantiate', + E_USER_ERROR + ); + return false; + } + + /** + * Converts a string list of elements separated by pipes into + * a lookup array. + * @param $string List of elements + * @return Lookup array of elements + */ + function convertToLookup($string) { + $array = explode('|', str_replace(' ', '', $string)); + $ret = array(); + foreach ($array as $i => $k) { + $ret[$k] = true; + } + return $ret; + } + +} + +?> \ No newline at end of file diff --git a/lib/htmlpurifier/HTMLPurifier/Context.php b/lib/htmlpurifier/HTMLPurifier/Context.php new file mode 100644 index 0000000000..ce6fe51e05 --- /dev/null +++ b/lib/htmlpurifier/HTMLPurifier/Context.php @@ -0,0 +1,76 @@ +_storage[$name])) { + trigger_error('Name collision, cannot re-register', + E_USER_ERROR); + return; + } + $this->_storage[$name] =& $ref; + } + + /** + * Retrieves a variable reference from the context. + * @param $name String name + */ + function &get($name) { + if (!isset($this->_storage[$name])) { + trigger_error('Attempted to retrieve non-existent variable', + E_USER_ERROR); + $var = null; // so we can return by reference + return $var; + } + return $this->_storage[$name]; + } + + /** + * Destorys a variable in the context. + * @param $name String name + */ + function destroy($name) { + if (!isset($this->_storage[$name])) { + trigger_error('Attempted to destroy non-existent variable', + E_USER_ERROR); + return; + } + unset($this->_storage[$name]); + } + + /** + * Checks whether or not the variable exists. + * @param $name String name + */ + function exists($name) { + return isset($this->_storage[$name]); + } + + /** + * Loads a series of variables from an associative array + * @param $context_array Assoc array of variables to load + */ + function loadArray(&$context_array) { + foreach ($context_array as $key => $discard) { + $this->register($key, $context_array[$key]); + } + } + +} + +?> \ No newline at end of file diff --git a/lib/htmlpurifier/HTMLPurifier/ElementDef.php b/lib/htmlpurifier/HTMLPurifier/ElementDef.php new file mode 100644 index 0000000000..21bc5f36a3 --- /dev/null +++ b/lib/htmlpurifier/HTMLPurifier/ElementDef.php @@ -0,0 +1,122 @@ +setup(), this array may also + * contain an array at index 0 that indicates which attribute + * collections to load into the full array. It may also + * contain string indentifiers in lieu of HTMLPurifier_AttrDef, + * see HTMLPurifier_AttrTypes on how they are expanded during + * HTMLPurifier_HTMLDefinition->setup() processing. + * @public + */ + var $attr = array(); + + /** + * Indexed list of tag's HTMLPurifier_AttrTransform to be done before validation + * @public + */ + var $attr_transform_pre = array(); + + /** + * Indexed list of tag's HTMLPurifier_AttrTransform to be done after validation + * @public + */ + var $attr_transform_post = array(); + + + + /** + * HTMLPurifier_ChildDef of this tag. + * @public + */ + var $child; + + /** + * Abstract string representation of internal ChildDef rules. See + * HTMLPurifier_ContentSets for how this is parsed and then transformed + * into an HTMLPurifier_ChildDef. + * @public + */ + var $content_model; + + /** + * Value of $child->type, used to determine which ChildDef to use, + * used in combination with $content_model. + * @public + */ + var $content_model_type; + + + + /** + * Lookup table of tags that close this tag. Used during parsing + * to make sure we don't attempt to nest unclosed tags. + * @public + */ + var $auto_close = array(); + + /** + * Does the element have a content model (#PCDATA | Inline)*? This + * is important for chameleon ins and del processing in + * HTMLPurifier_ChildDef_Chameleon. Dynamically set: modules don't + * have to worry about this one. + * @public + */ + var $descendants_are_inline; + + /** + * Lookup table of tags excluded from all descendants of this tag. + * @public + */ + var $excludes = array(); + + /** + * Merges the values of another element definition into this one. + * Values from the new element def take precedence if a value is + * not mergeable. + */ + function mergeIn($def) { + + // later keys takes precedence + foreach($def->attr as $k => $v) { + if ($k == 0) { + // merge in the includes + // sorry, no way to override an include + foreach ($v as $v2) { + $def->attr[0][] = $v2; + } + continue; + } + $this->attr[$k] = $v; + } + foreach($def->attr_transform_pre as $k => $v) $this->attr_transform_pre[$k] = $v; + foreach($def->attr_transform_post as $k => $v) $this->attr_transform_post[$k] = $v; + foreach($def->auto_close as $k => $v) $this->auto_close[$k] = $v; + foreach($def->excludes as $k => $v) $this->excludes[$k] = $v; + + if(!is_null($def->child)) $this->child = $def->child; + if(!empty($def->content_model)) $this->content_model .= ' | ' . $def->content_model; + if(!empty($def->content_model_type)) $this->content_model_type = $def->content_model_type; + if(!is_null($def->descendants_are_inline)) $this->descendants_are_inline = $def->descendants_are_inline; + + } + +} + +?> diff --git a/lib/htmlpurifier/HTMLPurifier/Encoder.php b/lib/htmlpurifier/HTMLPurifier/Encoder.php new file mode 100644 index 0000000000..1a22b4525c --- /dev/null +++ b/lib/htmlpurifier/HTMLPurifier/Encoder.php @@ -0,0 +1,403 @@ +feature '. + 'that automatically resolves all entities), making it pretty useless '. + 'for anything except the most I18N-blind applications, although '. + '%Core.EscapeNonASCIICharacters offers fixes this trouble with '. + 'another tradeoff. This directive '. + 'only accepts ISO-8859-1 if iconv is not enabled.' +); + +HTMLPurifier_ConfigSchema::define( + 'Core', 'EscapeNonASCIICharacters', false, 'bool', + 'This directive overcomes a deficiency in %Core.Encoding by blindly '. + 'converting all non-ASCII characters into decimal numeric entities before '. + 'converting it to its native encoding. This means that even '. + 'characters that can be expressed in the non-UTF-8 encoding will '. + 'be entity-ized, which can be a real downer for encodings like Big5. '. + 'It also assumes that the ASCII repetoire is available, although '. + 'this is the case for almost all encodings. Anyway, use UTF-8! This '. + 'directive has been available since 1.4.0.' +); + +if ( !function_exists('iconv') ) { + // only encodings with native PHP support + HTMLPurifier_ConfigSchema::defineAllowedValues( + 'Core', 'Encoding', array( + 'utf-8', + 'iso-8859-1' + ) + ); + HTMLPurifier_ConfigSchema::defineValueAliases( + 'Core', 'Encoding', array( + 'iso8859-1' => 'iso-8859-1' + ) + ); +} + +HTMLPurifier_ConfigSchema::define( + 'Test', 'ForceNoIconv', false, 'bool', + 'When set to true, HTMLPurifier_Encoder will act as if iconv does not '. + 'exist and use only pure PHP implementations.' +); + +/** + * A UTF-8 specific character encoder that handles cleaning and transforming. + * @note All functions in this class should be static. + */ +class HTMLPurifier_Encoder +{ + + /** + * Constructor throws fatal error if you attempt to instantiate class + */ + function HTMLPurifier_Encoder() { + trigger_error('Cannot instantiate encoder, call methods statically', E_USER_ERROR); + } + + /** + * Cleans a UTF-8 string for well-formedness and SGML validity + * + * It will parse according to UTF-8 and return a valid UTF8 string, with + * non-SGML codepoints excluded. + * + * @static + * @note Just for reference, the non-SGML code points are 0 to 31 and + * 127 to 159, inclusive. However, we allow code points 9, 10 + * and 13, which are the tab, line feed and carriage return + * respectively. 128 and above the code points map to multibyte + * UTF-8 representations. + * + * @note Fallback code adapted from utf8ToUnicode by Henri Sivonen and + * hsivonen@iki.fi at under the + * LGPL license. Notes on what changed are inside, but in general, + * the original code transformed UTF-8 text into an array of integer + * Unicode codepoints. Understandably, transforming that back to + * a string would be somewhat expensive, so the function was modded to + * directly operate on the string. However, this discourages code + * reuse, and the logic enumerated here would be useful for any + * function that needs to be able to understand UTF-8 characters. + * As of right now, only smart lossless character encoding converters + * would need that, and I'm probably not going to implement them. + * Once again, PHP 6 should solve all our problems. + */ + function cleanUTF8($str, $force_php = false) { + + static $non_sgml_chars = array(); + if (empty($non_sgml_chars)) { + for ($i = 0; $i <= 31; $i++) { + // non-SGML ASCII chars + // save \r, \t and \n + if ($i == 9 || $i == 13 || $i == 10) continue; + $non_sgml_chars[chr($i)] = ''; + } + for ($i = 127; $i <= 159; $i++) { + $non_sgml_chars[HTMLPurifier_Encoder::unichr($i)] = ''; + } + } + + static $iconv = null; + if ($iconv === null) $iconv = function_exists('iconv'); + + if ($iconv && !$force_php) { + // do the shortcut way + $str = @iconv('UTF-8', 'UTF-8//IGNORE', $str); + return strtr($str, $non_sgml_chars); + } + + $mState = 0; // cached expected number of octets after the current octet + // until the beginning of the next UTF8 character sequence + $mUcs4 = 0; // cached Unicode character + $mBytes = 1; // cached expected number of octets in the current sequence + + // original code involved an $out that was an array of Unicode + // codepoints. Instead of having to convert back into UTF-8, we've + // decided to directly append valid UTF-8 characters onto a string + // $out once they're done. $char accumulates raw bytes, while $mUcs4 + // turns into the Unicode code point, so there's some redundancy. + + $out = ''; + $char = ''; + + $len = strlen($str); + for($i = 0; $i < $len; $i++) { + $in = ord($str{$i}); + $char .= $str[$i]; // append byte to char + if (0 == $mState) { + // When mState is zero we expect either a US-ASCII character + // or a multi-octet sequence. + if (0 == (0x80 & ($in))) { + // US-ASCII, pass straight through. + if (($in <= 31 || $in == 127) && + !($in == 9 || $in == 13 || $in == 10) // save \r\t\n + ) { + // control characters, remove + } else { + $out .= $char; + } + // reset + $char = ''; + $mBytes = 1; + } elseif (0xC0 == (0xE0 & ($in))) { + // First octet of 2 octet sequence + $mUcs4 = ($in); + $mUcs4 = ($mUcs4 & 0x1F) << 6; + $mState = 1; + $mBytes = 2; + } elseif (0xE0 == (0xF0 & ($in))) { + // First octet of 3 octet sequence + $mUcs4 = ($in); + $mUcs4 = ($mUcs4 & 0x0F) << 12; + $mState = 2; + $mBytes = 3; + } elseif (0xF0 == (0xF8 & ($in))) { + // First octet of 4 octet sequence + $mUcs4 = ($in); + $mUcs4 = ($mUcs4 & 0x07) << 18; + $mState = 3; + $mBytes = 4; + } elseif (0xF8 == (0xFC & ($in))) { + // First octet of 5 octet sequence. + // + // This is illegal because the encoded codepoint must be + // either: + // (a) not the shortest form or + // (b) outside the Unicode range of 0-0x10FFFF. + // Rather than trying to resynchronize, we will carry on + // until the end of the sequence and let the later error + // handling code catch it. + $mUcs4 = ($in); + $mUcs4 = ($mUcs4 & 0x03) << 24; + $mState = 4; + $mBytes = 5; + } elseif (0xFC == (0xFE & ($in))) { + // First octet of 6 octet sequence, see comments for 5 + // octet sequence. + $mUcs4 = ($in); + $mUcs4 = ($mUcs4 & 1) << 30; + $mState = 5; + $mBytes = 6; + } else { + // Current octet is neither in the US-ASCII range nor a + // legal first octet of a multi-octet sequence. + $mState = 0; + $mUcs4 = 0; + $mBytes = 1; + $char = ''; + } + } else { + // When mState is non-zero, we expect a continuation of the + // multi-octet sequence + if (0x80 == (0xC0 & ($in))) { + // Legal continuation. + $shift = ($mState - 1) * 6; + $tmp = $in; + $tmp = ($tmp & 0x0000003F) << $shift; + $mUcs4 |= $tmp; + + if (0 == --$mState) { + // End of the multi-octet sequence. mUcs4 now contains + // the final Unicode codepoint to be output + + // Check for illegal sequences and codepoints. + + // From Unicode 3.1, non-shortest form is illegal + if (((2 == $mBytes) && ($mUcs4 < 0x0080)) || + ((3 == $mBytes) && ($mUcs4 < 0x0800)) || + ((4 == $mBytes) && ($mUcs4 < 0x10000)) || + (4 < $mBytes) || + // From Unicode 3.2, surrogate characters = illegal + (($mUcs4 & 0xFFFFF800) == 0xD800) || + // Codepoints outside the Unicode range are illegal + ($mUcs4 > 0x10FFFF) + ) { + + } elseif (0xFEFF != $mUcs4 && // omit BOM + !($mUcs4 >= 128 && $mUcs4 <= 159) // omit non-SGML + ) { + $out .= $char; + } + // initialize UTF8 cache (reset) + $mState = 0; + $mUcs4 = 0; + $mBytes = 1; + $char = ''; + } + } else { + // ((0xC0 & (*in) != 0x80) && (mState != 0)) + // Incomplete multi-octet sequence. + // used to result in complete fail, but we'll reset + $mState = 0; + $mUcs4 = 0; + $mBytes = 1; + $char =''; + } + } + } + return $out; + } + + /** + * Translates a Unicode codepoint into its corresponding UTF-8 character. + * @static + * @note Based on Feyd's function at + * , + * which is in public domain. + * @note While we're going to do code point parsing anyway, a good + * optimization would be to refuse to translate code points that + * are non-SGML characters. However, this could lead to duplication. + * @note This is very similar to the unichr function in + * maintenance/generate-entity-file.php (although this is superior, + * due to its sanity checks). + */ + + // +----------+----------+----------+----------+ + // | 33222222 | 22221111 | 111111 | | + // | 10987654 | 32109876 | 54321098 | 76543210 | bit + // +----------+----------+----------+----------+ + // | | | | 0xxxxxxx | 1 byte 0x00000000..0x0000007F + // | | | 110yyyyy | 10xxxxxx | 2 byte 0x00000080..0x000007FF + // | | 1110zzzz | 10yyyyyy | 10xxxxxx | 3 byte 0x00000800..0x0000FFFF + // | 11110www | 10wwzzzz | 10yyyyyy | 10xxxxxx | 4 byte 0x00010000..0x0010FFFF + // +----------+----------+----------+----------+ + // | 00000000 | 00011111 | 11111111 | 11111111 | Theoretical upper limit of legal scalars: 2097151 (0x001FFFFF) + // | 00000000 | 00010000 | 11111111 | 11111111 | Defined upper limit of legal scalar codes + // +----------+----------+----------+----------+ + + function unichr($code) { + if($code > 1114111 or $code < 0 or + ($code >= 55296 and $code <= 57343) ) { + // bits are set outside the "valid" range as defined + // by UNICODE 4.1.0 + return ''; + } + + $x = $y = $z = $w = 0; + if ($code < 128) { + // regular ASCII character + $x = $code; + } else { + // set up bits for UTF-8 + $x = ($code & 63) | 128; + if ($code < 2048) { + $y = (($code & 2047) >> 6) | 192; + } else { + $y = (($code & 4032) >> 6) | 128; + if($code < 65536) { + $z = (($code >> 12) & 15) | 224; + } else { + $z = (($code >> 12) & 63) | 128; + $w = (($code >> 18) & 7) | 240; + } + } + } + // set up the actual character + $ret = ''; + if($w) $ret .= chr($w); + if($z) $ret .= chr($z); + if($y) $ret .= chr($y); + $ret .= chr($x); + + return $ret; + } + + /** + * Converts a string to UTF-8 based on configuration. + * @static + */ + function convertToUTF8($str, $config, &$context) { + static $iconv = null; + if ($iconv === null) $iconv = function_exists('iconv'); + $encoding = $config->get('Core', 'Encoding'); + if ($encoding === 'utf-8') return $str; + if ($iconv && !$config->get('Test', 'ForceNoIconv')) { + return @iconv($encoding, 'utf-8//IGNORE', $str); + } elseif ($encoding === 'iso-8859-1') { + return @utf8_encode($str); + } + trigger_error('Encoding not supported', E_USER_ERROR); + } + + /** + * Converts a string from UTF-8 based on configuration. + * @static + * @note Currently, this is a lossy conversion, with unexpressable + * characters being omitted. + */ + function convertFromUTF8($str, $config, &$context) { + static $iconv = null; + if ($iconv === null) $iconv = function_exists('iconv'); + $encoding = $config->get('Core', 'Encoding'); + if ($encoding === 'utf-8') return $str; + if ($config->get('Core', 'EscapeNonASCIICharacters')) { + $str = HTMLPurifier_Encoder::convertToASCIIDumbLossless($str); + } + if ($iconv && !$config->get('Test', 'ForceNoIconv')) { + return @iconv('utf-8', $encoding . '//IGNORE', $str); + } elseif ($encoding === 'iso-8859-1') { + return @utf8_decode($str); + } + trigger_error('Encoding not supported', E_USER_ERROR); + } + + /** + * Lossless (character-wise) conversion of HTML to ASCII + * @static + * @param $str UTF-8 string to be converted to ASCII + * @returns ASCII encoded string with non-ASCII character entity-ized + * @warning Adapted from MediaWiki, claiming fair use: this is a common + * algorithm. If you disagree with this license fudgery, + * implement it yourself. + * @note Uses decimal numeric entities since they are best supported. + * @note This is a DUMB function: it has no concept of keeping + * character entities that the projected character encoding + * can allow. We could possibly implement a smart version + * but that would require it to also know which Unicode + * codepoints the charset supported (not an easy task). + * @note Sort of with cleanUTF8() but it assumes that $str is + * well-formed UTF-8 + */ + function convertToASCIIDumbLossless($str) { + $bytesleft = 0; + $result = ''; + $working = 0; + $len = strlen($str); + for( $i = 0; $i < $len; $i++ ) { + $bytevalue = ord( $str[$i] ); + if( $bytevalue <= 0x7F ) { //0xxx xxxx + $result .= chr( $bytevalue ); + $bytesleft = 0; + } elseif( $bytevalue <= 0xBF ) { //10xx xxxx + $working = $working << 6; + $working += ($bytevalue & 0x3F); + $bytesleft--; + if( $bytesleft <= 0 ) { + $result .= "&#" . $working . ";"; + } + } elseif( $bytevalue <= 0xDF ) { //110x xxxx + $working = $bytevalue & 0x1F; + $bytesleft = 1; + } elseif( $bytevalue <= 0xEF ) { //1110 xxxx + $working = $bytevalue & 0x0F; + $bytesleft = 2; + } else { //1111 0xxx + $working = $bytevalue & 0x07; + $bytesleft = 3; + } + } + return $result; + } + + +} + +?> \ No newline at end of file diff --git a/lib/htmlpurifier/HTMLPurifier/EntityLookup.php b/lib/htmlpurifier/HTMLPurifier/EntityLookup.php new file mode 100644 index 0000000000..f950cc2231 --- /dev/null +++ b/lib/htmlpurifier/HTMLPurifier/EntityLookup.php @@ -0,0 +1,46 @@ +table = unserialize(file_get_contents($file)); + } + + /** + * Retrieves sole instance of the object. + * @static + * @param Optional prototype of custom lookup table to overload with. + */ + function instance($prototype = false) { + // no references, since PHP doesn't copy unless modified + static $instance = null; + if ($prototype) { + $instance = $prototype; + } elseif (!$instance) { + $instance = new HTMLPurifier_EntityLookup(); + $instance->setup(); + } + return $instance; + } + +} + +?> \ No newline at end of file diff --git a/lib/htmlpurifier/HTMLPurifier/EntityLookup/entities.ser b/lib/htmlpurifier/HTMLPurifier/EntityLookup/entities.ser new file mode 100644 index 0000000000..f2b8b8f2db --- /dev/null +++ b/lib/htmlpurifier/HTMLPurifier/EntityLookup/entities.ser @@ -0,0 +1 @@ +a:246:{s:4:"nbsp";s:2:"Â ";s:5:"iexcl";s:2:"Â¡";s:4:"cent";s:2:"Â¢";s:5:"pound";s:2:"Â£";s:6:"curren";s:2:"Â¤";s:3:"yen";s:2:"Â¥";s:6:"brvbar";s:2:"Â¦";s:4:"sect";s:2:"Â§";s:3:"uml";s:2:"Â¨";s:4:"copy";s:2:"Â©";s:4:"ordf";s:2:"Âª";s:5:"laquo";s:2:"Â«";s:3:"not";s:2:"Â¬";s:3:"shy";s:2:"Â";s:3:"reg";s:2:"Â®";s:4:"macr";s:2:"Â¯";s:3:"deg";s:2:"Â°";s:6:"plusmn";s:2:"Â±";s:5:"acute";s:2:"Â´";s:5:"micro";s:2:"Âµ";s:4:"para";s:2:"Â¶";s:6:"middot";s:2:"Â·";s:5:"cedil";s:2:"Â¸";s:4:"ordm";s:2:"Âº";s:5:"raquo";s:2:"Â»";s:6:"iquest";s:2:"Â¿";s:6:"Agrave";s:2:"Ã";s:6:"Aacute";s:2:"Ã";s:5:"Acirc";s:2:"Ã";s:6:"Atilde";s:2:"Ã";s:4:"Auml";s:2:"Ã";s:5:"Aring";s:2:"Ã";s:5:"AElig";s:2:"Ã";s:6:"Ccedil";s:2:"Ã";s:6:"Egrave";s:2:"Ã";s:6:"Eacute";s:2:"Ã";s:5:"Ecirc";s:2:"Ã";s:4:"Euml";s:2:"Ã";s:6:"Igrave";s:2:"Ã";s:6:"Iacute";s:2:"Ã";s:5:"Icirc";s:2:"Ã";s:4:"Iuml";s:2:"Ã";s:3:"ETH";s:2:"Ã";s:6:"Ntilde";s:2:"Ã";s:6:"Ograve";s:2:"Ã";s:6:"Oacute";s:2:"Ã";s:5:"Ocirc";s:2:"Ã";s:6:"Otilde";s:2:"Ã";s:4:"Ouml";s:2:"Ã";s:5:"times";s:2:"Ã";s:6:"Oslash";s:2:"Ã";s:6:"Ugrave";s:2:"Ã";s:6:"Uacute";s:2:"Ã";s:5:"Ucirc";s:2:"Ã";s:4:"Uuml";s:2:"Ã";s:6:"Yacute";s:2:"Ã";s:5:"THORN";s:2:"Ã";s:5:"szlig";s:2:"Ã";s:6:"agrave";s:2:"Ã ";s:6:"aacute";s:2:"Ã¡";s:5:"acirc";s:2:"Ã¢";s:6:"atilde";s:2:"Ã£";s:4:"auml";s:2:"Ã¤";s:5:"aring";s:2:"Ã¥";s:5:"aelig";s:2:"Ã¦";s:6:"ccedil";s:2:"Ã§";s:6:"egrave";s:2:"Ã¨";s:6:"eacute";s:2:"Ã©";s:5:"ecirc";s:2:"Ãª";s:4:"euml";s:2:"Ã«";s:6:"igrave";s:2:"Ã¬";s:6:"iacute";s:2:"Ã";s:5:"icirc";s:2:"Ã®";s:4:"iuml";s:2:"Ã¯";s:3:"eth";s:2:"Ã°";s:6:"ntilde";s:2:"Ã±";s:6:"ograve";s:2:"Ã²";s:6:"oacute";s:2:"Ã³";s:5:"ocirc";s:2:"Ã´";s:6:"otilde";s:2:"Ãµ";s:4:"ouml";s:2:"Ã¶";s:6:"divide";s:2:"Ã·";s:6:"oslash";s:2:"Ã¸";s:6:"ugrave";s:2:"Ã¹";s:6:"uacute";s:2:"Ãº";s:5:"ucirc";s:2:"Ã»";s:4:"uuml";s:2:"Ã¼";s:6:"yacute";s:2:"Ã½";s:5:"thorn";s:2:"Ã¾";s:4:"yuml";s:2:"Ã¿";s:4:"quot";s:1:""";s:3:"amp";s:1:"&";s:2:"lt";s:1:"<";s:2:"gt";s:1:">";s:4:"apos";s:1:"'";s:5:"OElig";s:2:"Å";s:5:"oelig";s:2:"Å";s:6:"Scaron";s:2:"Å ";s:6:"scaron";s:2:"Å¡";s:4:"Yuml";s:2:"Å¸";s:4:"circ";s:2:"Ë";s:5:"tilde";s:2:"Ë";s:4:"ensp";s:3:"â";s:4:"emsp";s:3:"â";s:6:"thinsp";s:3:"â";s:4:"zwnj";s:3:"â";s:3:"zwj";s:3:"â";s:3:"lrm";s:3:"â";s:3:"rlm";s:3:"â";s:5:"ndash";s:3:"â";s:5:"mdash";s:3:"â";s:5:"lsquo";s:3:"â";s:5:"rsquo";s:3:"â";s:5:"sbquo";s:3:"â";s:5:"ldquo";s:3:"â";s:5:"rdquo";s:3:"â";s:5:"bdquo";s:3:"â";s:6:"dagger";s:3:"â ";s:6:"Dagger";s:3:"â¡";s:6:"permil";s:3:"â°";s:6:"lsaquo";s:3:"â¹";s:6:"rsaquo";s:3:"âº";s:4:"euro";s:3:"â¬";s:4:"fnof";s:2:"Æ";s:5:"Alpha";s:2:"Î";s:4:"Beta";s:2:"Î";s:5:"Gamma";s:2:"Î";s:5:"Delta";s:2:"Î";s:7:"Epsilon";s:2:"Î";s:4:"Zeta";s:2:"Î";s:3:"Eta";s:2:"Î";s:5:"Theta";s:2:"Î";s:4:"Iota";s:2:"Î";s:5:"Kappa";s:2:"Î";s:6:"Lambda";s:2:"Î";s:2:"Mu";s:2:"Î";s:2:"Nu";s:2:"Î";s:2:"Xi";s:2:"Î";s:7:"Omicron";s:2:"Î";s:2:"Pi";s:2:"Î ";s:3:"Rho";s:2:"Î¡";s:5:"Sigma";s:2:"Î£";s:3:"Tau";s:2:"Î¤";s:7:"Upsilon";s:2:"Î¥";s:3:"Phi";s:2:"Î¦";s:3:"Chi";s:2:"Î§";s:3:"Psi";s:2:"Î¨";s:5:"Omega";s:2:"Î©";s:5:"alpha";s:2:"Î±";s:4:"beta";s:2:"Î²";s:5:"gamma";s:2:"Î³";s:5:"delta";s:2:"Î´";s:7:"epsilon";s:2:"Îµ";s:4:"zeta";s:2:"Î¶";s:3:"eta";s:2:"Î·";s:5:"theta";s:2:"Î¸";s:4:"iota";s:2:"Î¹";s:5:"kappa";s:2:"Îº";s:6:"lambda";s:2:"Î»";s:2:"mu";s:2:"Î¼";s:2:"nu";s:2:"Î½";s:2:"xi";s:2:"Î¾";s:7:"omicron";s:2:"Î¿";s:2:"pi";s:2:"Ï";s:3:"rho";s:2:"Ï";s:6:"sigmaf";s:2:"Ï";s:5:"sigma";s:2:"Ï";s:3:"tau";s:2:"Ï";s:7:"upsilon";s:2:"Ï";s:3:"phi";s:2:"Ï";s:3:"chi";s:2:"Ï";s:3:"psi";s:2:"Ï";s:5:"omega";s:2:"Ï";s:8:"thetasym";s:2:"Ï";s:5:"upsih";s:2:"Ï";s:3:"piv";s:2:"Ï";s:4:"bull";s:3:"â¢";s:6:"hellip";s:3:"â¦";s:5:"prime";s:3:"â²";s:5:"Prime";s:3:"â³";s:5:"oline";s:3:"â¾";s:5:"frasl";s:3:"â";s:6:"weierp";s:3:"â";s:5:"image";s:3:"â";s:4:"real";s:3:"â";s:5:"trade";s:3:"â¢";s:7:"alefsym";s:3:"âµ";s:4:"larr";s:3:"â";s:4:"uarr";s:3:"â";s:4:"rarr";s:3:"â";s:4:"darr";s:3:"â";s:4:"harr";s:3:"â";s:5:"crarr";s:3:"âµ";s:4:"lArr";s:3:"â";s:4:"uArr";s:3:"â";s:4:"rArr";s:3:"â";s:4:"dArr";s:3:"â";s:4:"hArr";s:3:"â";s:6:"forall";s:3:"â";s:4:"part";s:3:"â";s:5:"exist";s:3:"â";s:5:"empty";s:3:"â";s:5:"nabla";s:3:"â";s:4:"isin";s:3:"â";s:5:"notin";s:3:"â";s:2:"ni";s:3:"â";s:4:"prod";s:3:"â";s:3:"sum";s:3:"â";s:5:"minus";s:3:"â";s:6:"lowast";s:3:"â";s:5:"radic";s:3:"â";s:4:"prop";s:3:"â";s:5:"infin";s:3:"â";s:3:"ang";s:3:"â ";s:3:"and";s:3:"â§";s:2:"or";s:3:"â¨";s:3:"cap";s:3:"â©";s:3:"cup";s:3:"âª";s:3:"int";s:3:"â«";s:3:"sim";s:3:"â¼";s:4:"cong";s:3:"â";s:5:"asymp";s:3:"â";s:2:"ne";s:3:"â ";s:5:"equiv";s:3:"â¡";s:2:"le";s:3:"â¤";s:2:"ge";s:3:"â¥";s:3:"sub";s:3:"â";s:3:"sup";s:3:"â";s:4:"nsub";s:3:"â";s:4:"sube";s:3:"â";s:4:"supe";s:3:"â";s:5:"oplus";s:3:"â";s:6:"otimes";s:3:"â";s:4:"perp";s:3:"â¥";s:4:"sdot";s:3:"â";s:5:"lceil";s:3:"â";s:5:"rceil";s:3:"â";s:6:"lfloor";s:3:"â";s:6:"rfloor";s:3:"â";s:4:"lang";s:3:"â©";s:4:"rang";s:3:"âª";s:3:"loz";s:3:"â";s:6:"spades";s:3:"â ";s:5:"clubs";s:3:"â£";s:6:"hearts";s:3:"â¥";s:5:"diams";s:3:"â¦";} \ No newline at end of file diff --git a/lib/htmlpurifier/HTMLPurifier/EntityParser.php b/lib/htmlpurifier/HTMLPurifier/EntityParser.php new file mode 100644 index 0000000000..069c5ce17e --- /dev/null +++ b/lib/htmlpurifier/HTMLPurifier/EntityParser.php @@ -0,0 +1,158 @@ + '"', + 38 => '&', + 39 => "'", + 60 => '<', + 62 => '>' + ); + + /** + * Stripped entity names to decimal conversion table for special entities. + * @protected + */ + var $_special_ent2dec = + array( + 'quot' => 34, + 'amp' => 38, + 'lt' => 60, + 'gt' => 62 + ); + + /** + * Substitutes non-special entities with their parsed equivalents. Since + * running this whenever you have parsed character is t3h 5uck, we run + * it before everything else. + * + * @protected + * @param $string String to have non-special entities parsed. + * @returns Parsed string. + */ + function substituteNonSpecialEntities($string) { + // it will try to detect missing semicolons, but don't rely on it + return preg_replace_callback( + $this->_substituteEntitiesRegex, + array($this, 'nonSpecialEntityCallback'), + $string + ); + } + + /** + * Callback function for substituteNonSpecialEntities() that does the work. + * + * @warning Though this is public in order to let the callback happen, + * calling it directly is not recommended. + * @param $matches PCRE matches array, with 0 the entire match, and + * either index 1, 2 or 3 set with a hex value, dec value, + * or string (respectively). + * @returns Replacement string. + */ + + function nonSpecialEntityCallback($matches) { + // replaces all but big five + $entity = $matches[0]; + $is_num = (@$matches[0][1] === '#'); + if ($is_num) { + $is_hex = (@$entity[2] === 'x'); + $code = $is_hex ? hexdec($matches[1]) : (int) $matches[2]; + + // abort for special characters + if (isset($this->_special_dec2str[$code])) return $entity; + + return HTMLPurifier_Encoder::unichr($code); + } else { + if (isset($this->_special_ent2dec[$matches[3]])) return $entity; + if (!$this->_entity_lookup) { + require_once 'HTMLPurifier/EntityLookup.php'; + $this->_entity_lookup = HTMLPurifier_EntityLookup::instance(); + } + if (isset($this->_entity_lookup->table[$matches[3]])) { + return $this->_entity_lookup->table[$matches[3]]; + } else { + return $entity; + } + } + } + + /** + * Substitutes only special entities with their parsed equivalents. + * + * @notice We try to avoid calling this function because otherwise, it + * would have to be called a lot (for every parsed section). + * + * @protected + * @param $string String to have non-special entities parsed. + * @returns Parsed string. + */ + function substituteSpecialEntities($string) { + return preg_replace_callback( + $this->_substituteEntitiesRegex, + array($this, 'specialEntityCallback'), + $string); + } + + /** + * Callback function for substituteSpecialEntities() that does the work. + * + * This callback has same syntax as nonSpecialEntityCallback(). + * + * @warning Though this is public in order to let the callback happen, + * calling it directly is not recommended. + * @param $matches PCRE-style matches array, with 0 the entire match, and + * either index 1, 2 or 3 set with a hex value, dec value, + * or string (respectively). + * @returns Replacement string. + */ + function specialEntityCallback($matches) { + $entity = $matches[0]; + $is_num = (@$matches[0][1] === '#'); + if ($is_num) { + $is_hex = (@$entity[2] === 'x'); + $int = $is_hex ? hexdec($matches[1]) : (int) $matches[2]; + return isset($this->_special_dec2str[$int]) ? + $this->_special_dec2str[$int] : + $entity; + } else { + return isset($this->_special_ent2dec[$matches[3]]) ? + $this->_special_ent2dec[$matches[3]] : + $entity; + } + } + +} + +?> \ No newline at end of file diff --git a/lib/htmlpurifier/HTMLPurifier/Error.php b/lib/htmlpurifier/HTMLPurifier/Error.php new file mode 100644 index 0000000000..adc81dc56d --- /dev/null +++ b/lib/htmlpurifier/HTMLPurifier/Error.php @@ -0,0 +1,8 @@ + \ No newline at end of file diff --git a/lib/htmlpurifier/HTMLPurifier/Filter.php b/lib/htmlpurifier/HTMLPurifier/Filter.php new file mode 100644 index 0000000000..94c5ae7bb2 --- /dev/null +++ b/lib/htmlpurifier/HTMLPurifier/Filter.php @@ -0,0 +1,39 @@ +preFilter, + * 2->preFilter, 3->preFilter, purify, 3->postFilter, 2->postFilter, + * 1->postFilter. + */ + +class HTMLPurifier_Filter +{ + + /** + * Name of the filter for identification purposes + */ + var $name; + + /** + * Pre-processor function, handles HTML before HTML Purifier + */ + function preFilter($html, $config, &$context) {} + + /** + * Post-processor function, handles HTML after HTML Purifier + */ + function postFilter($html, $config, &$context) {} + +} + +?> \ No newline at end of file diff --git a/lib/htmlpurifier/HTMLPurifier/Filter/YouTube.php b/lib/htmlpurifier/HTMLPurifier/Filter/YouTube.php new file mode 100644 index 0000000000..433f17cf47 --- /dev/null +++ b/lib/htmlpurifier/HTMLPurifier/Filter/YouTube.php @@ -0,0 +1,34 @@ +]+>.+?'. + 'http://www.youtube.com/v/([A-Za-z0-9\-_]+).+?#s'; + $pre_replace = '\1'; + return preg_replace($pre_regex, $pre_replace, $html); + } + + function postFilter($html, $config, &$context) { + $post_regex = '#([A-Za-z0-9\-_]+)#'; + $post_replace = '

'; + return preg_replace($post_regex, $post_replace, $html); + } + +} + +?> \ No newline at end of file diff --git a/lib/htmlpurifier/HTMLPurifier/Generator.php b/lib/htmlpurifier/HTMLPurifier/Generator.php new file mode 100644 index 0000000000..b6a9aa24d7 --- /dev/null +++ b/lib/htmlpurifier/HTMLPurifier/Generator.php @@ -0,0 +1,158 @@ +Determines whether or not to run Tidy on the final output for pretty '. + 'formatting reasons, such as indentation and wrap.

This can greatly '. + 'improve readability for editors who are hand-editing the HTML, but is '. + 'by no means necessary as HTML Purifier has already fixed all major '. + 'errors the HTML may have had. Tidy is a non-default extension, and this directive '. + 'will silently fail if Tidy is not available.

If you are looking to make '. + 'the overall look of your page\'s source better, I recommend running Tidy '. + 'on the entire page rather than just user-content (after all, the '. + 'indentation relative to the containing blocks will be incorrect).

This '. + 'directive was available since 1.1.1.

' +); + +/** + * Generates HTML from tokens. + */ +class HTMLPurifier_Generator +{ + + /** + * Bool cache of %Core.CleanUTF8DuringGeneration + * @private + */ + var $_clean_utf8 = false; + + /** + * Bool cache of %Core.XHTML + * @private + */ + var $_xhtml = true; + + /** + * Generates HTML from an array of tokens. + * @param $tokens Array of HTMLPurifier_Token + * @param $config HTMLPurifier_Config object + * @return Generated HTML + */ + function generateFromTokens($tokens, $config, &$context) { + $html = ''; + if (!$config) $config = HTMLPurifier_Config::createDefault(); + $this->_clean_utf8 = $config->get('Core', 'CleanUTF8DuringGeneration'); + $this->_xhtml = $config->get('Core', 'XHTML'); + if (!$tokens) return ''; + foreach ($tokens as $token) { + $html .= $this->generateFromToken($token); + } + if ($config->get('Core', 'TidyFormat') && extension_loaded('tidy')) { + + $tidy_options = array( + 'indent'=> true, + 'output-xhtml' => $this->_xhtml, + 'show-body-only' => true, + 'indent-spaces' => 2, + 'wrap' => 68, + ); + if (version_compare(PHP_VERSION, '5', '<')) { + tidy_set_encoding('utf8'); + foreach ($tidy_options as $key => $value) { + tidy_setopt($key, $value); + } + tidy_parse_string($html); + tidy_clean_repair(); + $html = tidy_get_output(); + } else { + $tidy = new Tidy; + $tidy->parseString($html, $tidy_options, 'utf8'); + $tidy->cleanRepair(); + $html = (string) $tidy; + } + } + return $html; + } + + /** + * Generates HTML from a single token. + * @param $token HTMLPurifier_Token object. + * @return Generated HTML + */ + function generateFromToken($token) { + if (!isset($token->type)) return ''; + if ($token->type == 'start') { + $attr = $this->generateAttributes($token->attr); + return '<' . $token->name . ($attr ? ' ' : '') . $attr . '>'; + + } elseif ($token->type == 'end') { + return 'name . '>'; + + } elseif ($token->type == 'empty') { + $attr = $this->generateAttributes($token->attr); + return '<' . $token->name . ($attr ? ' ' : '') . $attr . + ( $this->_xhtml ? ' /': '' ) + . '>'; + + } elseif ($token->type == 'text') { + return $this->escape($token->data); + + } else { + return ''; + + } + } + + /** + * Generates attribute declarations from attribute array. + * @param $assoc_array_of_attributes Attribute array + * @return Generate HTML fragment for insertion. + */ + function generateAttributes($assoc_array_of_attributes) { + $html = ''; + foreach ($assoc_array_of_attributes as $key => $value) { + if (!$this->_xhtml) { + // remove namespaced attributes + if (strpos($key, ':') !== false) continue; + // also needed: check for attribute minimization + } + $html .= $key.'="'.$this->escape($value).'" '; + } + return rtrim($html); + } + + /** + * Escapes raw text data. + * @param $string String data to escape for HTML. + * @return String escaped data. + */ + function escape($string) { + if ($this->_clean_utf8) $string = HTMLPurifier_Lexer::cleanUTF8($string); + return htmlspecialchars($string, ENT_COMPAT, 'UTF-8'); + } + +} + +?> \ No newline at end of file diff --git a/lib/htmlpurifier/HTMLPurifier/HTMLDefinition.php b/lib/htmlpurifier/HTMLPurifier/HTMLDefinition.php new file mode 100644 index 0000000000..3af445ceb0 --- /dev/null +++ b/lib/htmlpurifier/HTMLPurifier/HTMLDefinition.php @@ -0,0 +1,281 @@ +<blockquote>Foo</blockquote> '. + 'would become <blockquote><p>Foo</p></blockquote>. The '. + '<p> tags can be replaced '. + 'with whatever you desire, as long as it is a block level element. '. + 'This directive has been available since 1.3.0.' +); + +HTMLPurifier_ConfigSchema::define( + 'HTML', 'Parent', 'div', 'string', + 'String name of element that HTML fragment passed to library will be '. + 'inserted in. An interesting variation would be using span as the '. + 'parent element, meaning that only inline tags would be allowed. '. + 'This directive has been available since 1.3.0.' +); + +HTMLPurifier_ConfigSchema::define( + 'HTML', 'AllowedElements', null, 'lookup/null', + 'If HTML Purifier\'s tag set is unsatisfactory for your needs, you '. + 'can overload it with your own list of tags to allow. Note that this '. + 'method is subtractive: it does its job by taking away from HTML Purifier '. + 'usual feature set, so you cannot add a tag that HTML Purifier never '. + 'supported in the first place (like embed, form or head). If you change this, you '. + 'probably also want to change %HTML.AllowedAttributes. '. + 'Warning: If another directive conflicts with the '. + 'elements here, that directive will win and override. '. + 'This directive has been available since 1.3.0.' +); + +HTMLPurifier_ConfigSchema::define( + 'HTML', 'AllowedAttributes', null, 'lookup/null', + 'IF HTML Purifier\'s attribute set is unsatisfactory, overload it! '. + 'The syntax is \'tag.attr\' or \'*.attr\' for the global attributes '. + '(style, id, class, dir, lang, xml:lang).'. + 'Warning: If another directive conflicts with the '. + 'elements here, that directive will win and override. For '. + 'example, %HTML.EnableAttrID will take precedence over *.id in this '. + 'directive. You must set that directive to true before you can use '. + 'IDs at all. This directive has been available since 1.3.0.' +); + +/** + * Definition of the purified HTML that describes allowed children, + * attributes, and many other things. + * + * Conventions: + * + * All member variables that are prefixed with info + * (including the main $info array) are used by HTML Purifier internals + * and should not be directly edited when customizing the HTMLDefinition. + * They can usually be set via configuration directives or custom + * modules. + * + * On the other hand, member variables without the info prefix are used + * internally by the HTMLDefinition and MUST NOT be used by other HTML + * Purifier internals. Many of them, however, are public, and may be + * edited by userspace code to tweak the behavior of HTMLDefinition. + * + * HTMLPurifier_Printer_HTMLDefinition is a notable exception to this + * rule: in the interest of comprehensiveness, it will sniff everything. + */ +class HTMLPurifier_HTMLDefinition +{ + + /** FULLY-PUBLIC VARIABLES */ + + /** + * Associative array of element names to HTMLPurifier_ElementDef + * @public + */ + var $info = array(); + + /** + * Associative array of global attribute name to attribute definition. + * @public + */ + var $info_global_attr = array(); + + /** + * String name of parent element HTML will be going into. + * @public + */ + var $info_parent = 'div'; + + /** + * Definition for parent element, allows parent element to be a + * tag that's not allowed inside the HTML fragment. + * @public + */ + var $info_parent_def; + + /** + * String name of element used to wrap inline elements in block context + * @note This is rarely used except for BLOCKQUOTEs in strict mode + * @public + */ + var $info_block_wrapper = 'p'; + + /** + * Associative array of deprecated tag name to HTMLPurifier_TagTransform + * @public + */ + var $info_tag_transform = array(); + + /** + * Indexed list of HTMLPurifier_AttrTransform to be performed before validation. + * @public + */ + var $info_attr_transform_pre = array(); + + /** + * Indexed list of HTMLPurifier_AttrTransform to be performed after validation. + * @public + */ + var $info_attr_transform_post = array(); + + /** + * Nested lookup array of content set name (Block, Inline) to + * element name to whether or not it belongs in that content set. + * @public + */ + var $info_content_sets = array(); + + + + /** PUBLIC BUT INTERNAL VARIABLES */ + + var $setup = false; /**< Has setup() been called yet? */ + var $config; /**< Temporary instance of HTMLPurifier_Config */ + + var $manager; /**< Instance of HTMLPurifier_HTMLModuleManager */ + + /** + * Performs low-cost, preliminary initialization. + * @param $config Instance of HTMLPurifier_Config + */ + function HTMLPurifier_HTMLDefinition(&$config) { + $this->config =& $config; + $this->manager = new HTMLPurifier_HTMLModuleManager(); + } + + /** + * Processes internals into form usable by HTMLPurifier internals. + * Modifying the definition after calling this function should not + * be done. + */ + function setup() { + + // multiple call guard + if ($this->setup) {return;} else {$this->setup = true;} + + $this->processModules(); + $this->setupConfigStuff(); + + unset($this->config); + unset($this->manager); + + } + + /** + * Extract out the information from the manager + */ + function processModules() { + + $this->manager->setup($this->config); + + foreach ($this->manager->activeModules as $module) { + foreach($module->info_tag_transform as $k => $v) $this->info_tag_transform[$k] = $v; + foreach($module->info_attr_transform_pre as $k => $v) $this->info_attr_transform_pre[$k] = $v; + foreach($module->info_attr_transform_post as $k => $v) $this->info_attr_transform_post[$k]= $v; + } + + $this->info = $this->manager->getElements($this->config); + $this->info_content_sets = $this->manager->contentSets->lookup; + + } + + /** + * Sets up stuff based on config. We need a better way of doing this. + */ + function setupConfigStuff() { + + $block_wrapper = $this->config->get('HTML', 'BlockWrapper'); + if (isset($this->info_content_sets['Block'][$block_wrapper])) { + $this->info_block_wrapper = $block_wrapper; + } else { + trigger_error('Cannot use non-block element as block wrapper.', + E_USER_ERROR); + } + + $parent = $this->config->get('HTML', 'Parent'); + $def = $this->manager->getElement($parent, $this->config); + if ($def) { + $this->info_parent = $parent; + $this->info_parent_def = $def; + } else { + trigger_error('Cannot use unrecognized element as parent.', + E_USER_ERROR); + $this->info_parent_def = $this->manager->getElement( + $this->info_parent, $this->config); + } + + // support template text + $support = "(for information on implementing this, see the ". + "support forums) "; + + // setup allowed elements, SubtractiveWhitelist module + $allowed_elements = $this->config->get('HTML', 'AllowedElements'); + if (is_array($allowed_elements)) { + foreach ($this->info as $name => $d) { + if(!isset($allowed_elements[$name])) unset($this->info[$name]); + unset($allowed_elements[$name]); + } + // emit errors + foreach ($allowed_elements as $element => $d) { + trigger_error("Element '$element' is not supported $support", E_USER_WARNING); + } + } + + $allowed_attributes = $this->config->get('HTML', 'AllowedAttributes'); + $allowed_attributes_mutable = $allowed_attributes; // by copy! + if (is_array($allowed_attributes)) { + foreach ($this->info_global_attr as $attr_key => $info) { + if (!isset($allowed_attributes["*.$attr_key"])) { + unset($this->info_global_attr[$attr_key]); + } elseif (isset($allowed_attributes_mutable["*.$attr_key"])) { + unset($allowed_attributes_mutable["*.$attr_key"]); + } + } + foreach ($this->info as $tag => $info) { + foreach ($info->attr as $attr => $attr_info) { + if (!isset($allowed_attributes["$tag.$attr"]) && + !isset($allowed_attributes["*.$attr"])) { + unset($this->info[$tag]->attr[$attr]); + } else { + if (isset($allowed_attributes_mutable["$tag.$attr"])) { + unset($allowed_attributes_mutable["$tag.$attr"]); + } elseif (isset($allowed_attributes_mutable["*.$attr"])) { + unset($allowed_attributes_mutable["*.$attr"]); + } + } + } + } + // emit errors + foreach ($allowed_attributes_mutable as $elattr => $d) { + list($element, $attribute) = explode('.', $elattr); + if ($element == '*') { + trigger_error("Global attribute '$attribute' is not ". + "supported in any elements $support", + E_USER_WARNING); + } else { + trigger_error("Attribute '$attribute' in element '$element' not supported $support", + E_USER_WARNING); + } + } + } + + } + + +} + +?> diff --git a/lib/htmlpurifier/HTMLPurifier/HTMLModule.php b/lib/htmlpurifier/HTMLPurifier/HTMLModule.php new file mode 100644 index 0000000000..930b605d11 --- /dev/null +++ b/lib/htmlpurifier/HTMLPurifier/HTMLModule.php @@ -0,0 +1,125 @@ +info, since the object's data is only info, + * with extra behavior associated with it. + * @public + */ + var $attr_collections = array(); + + /** + * Associative array of deprecated tag name to HTMLPurifier_TagTransform + * @public + */ + var $info_tag_transform = array(); + + /** + * List of HTMLPurifier_AttrTransform to be performed before validation. + * @public + */ + var $info_attr_transform_pre = array(); + + /** + * List of HTMLPurifier_AttrTransform to be performed after validation. + * @public + */ + var $info_attr_transform_post = array(); + + /** + * Boolean flag that indicates whether or not getChildDef is implemented. + * For optimization reasons: may save a call to a function. Be sure + * to set it if you do implement getChildDef(), otherwise it will have + * no effect! + * @public + */ + var $defines_child_def = false; + + /** + * Retrieves a proper HTMLPurifier_ChildDef subclass based on + * content_model and content_model_type member variables of + * the HTMLPurifier_ElementDef class. There is a similar function + * in HTMLPurifier_HTMLDefinition. + * @param $def HTMLPurifier_ElementDef instance + * @return HTMLPurifier_ChildDef subclass + * @public + */ + function getChildDef($def) {return false;} + + /** + * Hook method that lets module perform arbitrary operations on + * HTMLPurifier_HTMLDefinition before the module gets processed. + * @param $definition Reference to HTMLDefinition being setup + */ + function preProcess(&$definition) {} + + /** + * Hook method that lets module perform arbitrary operations + * on HTMLPurifier_HTMLDefinition after the module gets processed. + * @param $definition Reference to HTMLDefinition being setup + */ + function postProcess(&$definition) {} + + /** + * Hook method that is called when a module gets registered to + * the definition. + * @param $definition Reference to HTMLDefinition being setup + */ + function setup(&$definition) {} + +} + +?> \ No newline at end of file diff --git a/lib/htmlpurifier/HTMLPurifier/HTMLModule/Bdo.php b/lib/htmlpurifier/HTMLPurifier/HTMLModule/Bdo.php new file mode 100644 index 0000000000..17e5e987fd --- /dev/null +++ b/lib/htmlpurifier/HTMLPurifier/HTMLModule/Bdo.php @@ -0,0 +1,43 @@ + 'bdo'); + var $attr_collections = array( + 'I18N' => array('dir' => false) + ); + + function HTMLPurifier_HTMLModule_Bdo() { + $dir = new HTMLPurifier_AttrDef_Enum(array('ltr','rtl'), false); + $this->attr_collections['I18N']['dir'] = $dir; + $this->info['bdo'] = new HTMLPurifier_ElementDef(); + $this->info['bdo']->attr = array( + 0 => array('Core', 'Lang'), + 'dir' => $dir, // required + // The Abstract Module specification has the attribute + // inclusions wrong for bdo: bdo allows + // xml:lang too (and we'll toss in lang for good measure, + // though it is not allowed for XHTML 1.1, this will + // be managed with a global attribute transform) + ); + $this->info['bdo']->content_model = '#PCDATA | Inline'; + $this->info['bdo']->content_model_type = 'optional'; + // provides fallback behavior if dir's missing (dir is required) + $this->info['bdo']->attr_transform_post['required-dir'] = + new HTMLPurifier_AttrTransform_BdoDir(); + } + +} + +?> \ No newline at end of file diff --git a/lib/htmlpurifier/HTMLPurifier/HTMLModule/CommonAttributes.php b/lib/htmlpurifier/HTMLPurifier/HTMLModule/CommonAttributes.php new file mode 100644 index 0000000000..8f17c2f0a3 --- /dev/null +++ b/lib/htmlpurifier/HTMLPurifier/HTMLModule/CommonAttributes.php @@ -0,0 +1,31 @@ + array( + 0 => array('Style'), + // 'xml:space' => false, + 'class' => 'NMTOKENS', + 'id' => 'ID', + 'title' => 'CDATA', + ), + 'Lang' => array( + 'xml:lang' => false, // see constructor + ), + 'I18N' => array( + 0 => array('Lang'), // proprietary, for xml:lang/lang + ), + 'Common' => array( + 0 => array('Core', 'I18N') + ) + ); + + function HTMLPurifier_HTMLModule_CommonAttributes() { + $this->attr_collections['Lang']['xml:lang'] = new HTMLPurifier_AttrDef_Lang(); + } +} + +?> \ No newline at end of file diff --git a/lib/htmlpurifier/HTMLPurifier/HTMLModule/Edit.php b/lib/htmlpurifier/HTMLPurifier/HTMLModule/Edit.php new file mode 100644 index 0000000000..6a415906e6 --- /dev/null +++ b/lib/htmlpurifier/HTMLPurifier/HTMLModule/Edit.php @@ -0,0 +1,46 @@ + 'del | ins'); + + function HTMLPurifier_HTMLModule_Edit() { + foreach ($this->elements as $element) { + $this->info[$element] = new HTMLPurifier_ElementDef(); + $this->info[$element]->attr = array( + 0 => array('Common'), + 'cite' => 'URI', + // 'datetime' => 'Datetime' // Datetime not implemented + ); + // Inline context ! Block context (exclamation mark is + // separator, see getChildDef for parsing) + $this->info[$element]->content_model = + '#PCDATA | Inline ! #PCDATA | Flow'; + // HTML 4.01 specifies that ins/del must not contain block + // elements when used in an inline context, chameleon is + // a complicated workaround to acheive this effect + $this->info[$element]->content_model_type = 'chameleon'; + } + } + + var $defines_child_def = true; + function getChildDef($def) { + if ($def->content_model_type != 'chameleon') return false; + $value = explode('!', $def->content_model); + return new HTMLPurifier_ChildDef_Chameleon($value[0], $value[1]); + } + +} + +?> \ No newline at end of file diff --git a/lib/htmlpurifier/HTMLPurifier/HTMLModule/Hypertext.php b/lib/htmlpurifier/HTMLPurifier/HTMLModule/Hypertext.php new file mode 100644 index 0000000000..e285e8ba1f --- /dev/null +++ b/lib/htmlpurifier/HTMLPurifier/HTMLModule/Hypertext.php @@ -0,0 +1,37 @@ + 'a'); + + function HTMLPurifier_HTMLModule_Hypertext() { + $this->info['a'] = new HTMLPurifier_ElementDef(); + $this->info['a']->attr = array( + 0 => array('Common'), + // 'accesskey' => 'Character', + // 'charset' => 'Charset', + 'href' => 'URI', + //'hreflang' => 'LanguageCode', + 'rel' => new HTMLPurifier_AttrDef_HTML_LinkTypes('rel'), + 'rev' => new HTMLPurifier_AttrDef_HTML_LinkTypes('rev'), + //'tabindex' => 'Number', + //'type' => 'ContentType', + ); + $this->info['a']->content_model = '#PCDATA | Inline'; + $this->info['a']->content_model_type = 'optional'; + $this->info['a']->excludes = array('a' => true); + } + +} + +?> \ No newline at end of file diff --git a/lib/htmlpurifier/HTMLPurifier/HTMLModule/Image.php b/lib/htmlpurifier/HTMLPurifier/HTMLModule/Image.php new file mode 100644 index 0000000000..3852836de7 --- /dev/null +++ b/lib/htmlpurifier/HTMLPurifier/HTMLModule/Image.php @@ -0,0 +1,38 @@ + 'img'); + + function HTMLPurifier_HTMLModule_Image() { + $this->info['img'] = new HTMLPurifier_ElementDef(); + $this->info['img']->attr = array( + 0 => array('Common'), + 'alt' => 'Text', + 'height' => 'Length', + 'longdesc' => 'URI', + 'src' => new HTMLPurifier_AttrDef_URI(true), // embedded + 'width' => 'Length' + ); + $this->info['img']->content_model_type = 'empty'; + $this->info['img']->attr_transform_post[] = + new HTMLPurifier_AttrTransform_ImgRequired(); + } + +} + +?> \ No newline at end of file diff --git a/lib/htmlpurifier/HTMLPurifier/HTMLModule/Legacy.php b/lib/htmlpurifier/HTMLPurifier/HTMLModule/Legacy.php new file mode 100644 index 0000000000..a0613a2f7e --- /dev/null +++ b/lib/htmlpurifier/HTMLPurifier/HTMLModule/Legacy.php @@ -0,0 +1,60 @@ +elements as $name) { + $this->info[$name] = new HTMLPurifier_ElementDef(); + // for u, s, strike, as more elements get added, add + // conditionals as necessary + $this->info[$name]->content_model = 'Inline | #PCDATA'; + $this->info[$name]->content_model_type = 'optional'; + $this->info[$name]->attr[0] = array('Common'); + } + + // setup modifications to old elements + foreach ($this->non_standalone_elements as $name) { + $this->info[$name] = new HTMLPurifier_ElementDef(); + $this->info[$name]->standalone = false; + } + + $this->info['li']->attr['value'] = new HTMLPurifier_AttrDef_Integer(); + $this->info['ol']->attr['start'] = new HTMLPurifier_AttrDef_Integer(); + + $this->info['address']->content_model = 'Inline | #PCDATA | p'; + $this->info['address']->content_model_type = 'optional'; + $this->info['address']->child = false; + + $this->info['blockquote']->content_model = 'Flow | #PCDATA'; + $this->info['blockquote']->content_model_type = 'optional'; + $this->info['blockquote']->child = false; + + } + +} + +?> \ No newline at end of file diff --git a/lib/htmlpurifier/HTMLPurifier/HTMLModule/List.php b/lib/htmlpurifier/HTMLPurifier/HTMLModule/List.php new file mode 100644 index 0000000000..c74982df4e --- /dev/null +++ b/lib/htmlpurifier/HTMLPurifier/HTMLModule/List.php @@ -0,0 +1,46 @@ + 'dl | ol | ul', 'Flow' => 'List'); + + function HTMLPurifier_HTMLModule_List() { + foreach ($this->elements as $element) { + $this->info[$element] = new HTMLPurifier_ElementDef(); + $this->info[$element]->attr = array(0 => array('Common')); + if ($element == 'li' || $element == 'dd') { + $this->info[$element]->content_model = '#PCDATA | Flow'; + $this->info[$element]->content_model_type = 'optional'; + } elseif ($element == 'ol' || $element == 'ul') { + $this->info[$element]->content_model = 'li'; + $this->info[$element]->content_model_type = 'required'; + } + } + $this->info['dt']->content_model = '#PCDATA | Inline'; + $this->info['dt']->content_model_type = 'optional'; + $this->info['dl']->content_model = 'dt | dd'; + $this->info['dl']->content_model_type = 'required'; + // this could be a LOT more robust + $this->info['li']->auto_close = array('li' => true); + } + +} + +?> \ No newline at end of file diff --git a/lib/htmlpurifier/HTMLPurifier/HTMLModule/Presentation.php b/lib/htmlpurifier/HTMLPurifier/HTMLModule/Presentation.php new file mode 100644 index 0000000000..42d9c11e46 --- /dev/null +++ b/lib/htmlpurifier/HTMLPurifier/HTMLModule/Presentation.php @@ -0,0 +1,41 @@ + 'hr', + 'Inline' => 'b | big | i | small | sub | sup | tt' + ); + + function HTMLPurifier_HTMLModule_Presentation() { + foreach ($this->elements as $element) { + $this->info[$element] = new HTMLPurifier_ElementDef(); + $this->info[$element]->attr = array(0 => array('Common')); + if ($element == 'hr') { + $this->info[$element]->content_model_type = 'empty'; + } else { + $this->info[$element]->content_model = '#PCDATA | Inline'; + $this->info[$element]->content_model_type = 'optional'; + } + } + } + +} + +?> \ No newline at end of file diff --git a/lib/htmlpurifier/HTMLPurifier/HTMLModule/StyleAttribute.php b/lib/htmlpurifier/HTMLPurifier/HTMLModule/StyleAttribute.php new file mode 100644 index 0000000000..5ee5d1cf65 --- /dev/null +++ b/lib/htmlpurifier/HTMLPurifier/HTMLModule/StyleAttribute.php @@ -0,0 +1,27 @@ + array('style' => false), // see constructor + 'Core' => array(0 => array('Style')) + ); + + function HTMLPurifier_HTMLModule_StyleAttribute() { + $this->attr_collections['Style']['style'] = new HTMLPurifier_AttrDef_CSS(); + } + +} + +?> \ No newline at end of file diff --git a/lib/htmlpurifier/HTMLPurifier/HTMLModule/Tables.php b/lib/htmlpurifier/HTMLPurifier/HTMLModule/Tables.php new file mode 100644 index 0000000000..ea41f5b103 --- /dev/null +++ b/lib/htmlpurifier/HTMLPurifier/HTMLModule/Tables.php @@ -0,0 +1,88 @@ + 'table'); + + function HTMLPurifier_HTMLModule_Tables() { + foreach ($this->elements as $e) { + $this->info[$e] = new HTMLPurifier_ElementDef(); + $this->info[$e]->attr = array(0 => array('Common')); + $attr =& $this->info[$e]->attr; + if ($e == 'caption') continue; + if ($e == 'table'){ + $attr['border'] = 'Pixels'; + $attr['cellpadding'] = 'Length'; + $attr['cellspacing'] = 'Length'; + $attr['frame'] = new HTMLPurifier_AttrDef_Enum(array( + 'void', 'above', 'below', 'hsides', 'lhs', 'rhs', + 'vsides', 'box', 'border' + ), false); + $attr['rules'] = new HTMLPurifier_AttrDef_Enum(array( + 'none', 'groups', 'rows', 'cols', 'all' + ), false); + $attr['summary'] = 'Text'; + $attr['width'] = 'Length'; + continue; + } + if ($e == 'col' || $e == 'colgroup') { + $attr['span'] = 'Number'; + $attr['width'] = 'MultiLength'; + } + if ($e == 'td' || $e == 'th') { + $attr['abbr'] = 'Text'; + $attr['colspan'] = 'Number'; + $attr['rowspan'] = 'Number'; + } + $attr['align'] = new HTMLPurifier_AttrDef_Enum(array( + 'left', 'center', 'right', 'justify', 'char' + ), false); + $attr['valign'] = new HTMLPurifier_AttrDef_Enum(array( + 'top', 'middle', 'bottom', 'baseline' + ), false); + $attr['charoff'] = 'Length'; + } + $this->info['caption']->content_model = '#PCDATA | Inline'; + $this->info['caption']->content_model_type = 'optional'; + + // Is done directly because it doesn't leverage substitution + // mechanisms. True model is: + // 'caption?, ( col* | colgroup* ), (( thead?, tfoot?, tbody+ ) | ( tr+ ))' + $this->info['table']->child = new HTMLPurifier_ChildDef_Table(); + + $this->info['td']->content_model = + $this->info['th']->content_model = '#PCDATA | Flow'; + $this->info['td']->content_model_type = + $this->info['th']->content_model_type = 'optional'; + + $this->info['tr']->content_model = 'td | th'; + $this->info['tr']->content_model_type = 'required'; + + $this->info['col']->content_model_type = 'empty'; + + $this->info['colgroup']->content_model = 'col'; + $this->info['colgroup']->content_model_type = 'optional'; + + $this->info['tbody']->content_model = + $this->info['thead']->content_model = + $this->info['tfoot']->content_model = 'tr'; + $this->info['tbody']->content_model_type = + $this->info['thead']->content_model_type = + $this->info['tfoot']->content_model_type = 'required'; + + } + +} + +?> \ No newline at end of file diff --git a/lib/htmlpurifier/HTMLPurifier/HTMLModule/Text.php b/lib/htmlpurifier/HTMLPurifier/HTMLModule/Text.php new file mode 100644 index 0000000000..bac05986c6 --- /dev/null +++ b/lib/htmlpurifier/HTMLPurifier/HTMLModule/Text.php @@ -0,0 +1,78 @@ + 'h1 | h2 | h3 | h4 | h5 | h6', + 'Block' => 'address | blockquote | div | p | pre | nolink | tex | algebra', //moodle modification + 'Inline' => 'abbr | acronym | br | cite | code | dfn | em | kbd | q | samp | span | strong | var', + 'Flow' => 'Heading | Block | Inline' + ); + + function HTMLPurifier_HTMLModule_Text() { + foreach ($this->elements as $element) { + $this->info[$element] = new HTMLPurifier_ElementDef(); + // attributes + if ($element == 'br') { + $this->info[$element]->attr = array(0 => array('Core')); + } elseif ($element == 'blockquote' || $element == 'q') { + $this->info[$element]->attr = array(0 => array('Common'), 'cite' => 'URI'); + } else { + $this->info[$element]->attr = array(0 => array('Common')); + } + // content models + if ($element == 'br') { + $this->info[$element]->content_model_type = 'empty'; + } elseif ($element == 'blockquote') { + $this->info[$element]->content_model = 'Heading | Block | List'; + $this->info[$element]->content_model_type = 'optional'; + } elseif ($element == 'div') { + $this->info[$element]->content_model = '#PCDATA | Flow'; + $this->info[$element]->content_model_type = 'optional'; + } else { + $this->info[$element]->content_model = '#PCDATA | Inline'; + $this->info[$element]->content_model_type = 'optional'; + } + } + // SGML permits exclusions for all descendants, but this is + // not possible with DTDs or XML Schemas. W3C has elected to + // use complicated compositions of content_models to simulate + // exclusion for children, but we go the simpler, SGML-style + // route of flat-out exclusions. Note that the Abstract Module + // is blithely unaware of such distinctions. + $this->info['pre']->excludes = array_flip(array( + 'img', 'big', 'small', + 'object', 'applet', 'font', 'basefont' // generally not allowed + )); + $this->info['p']->auto_close = array_flip(array( + 'address', 'blockquote', 'dd', 'dir', 'div', 'dl', 'dt', + 'h1', 'h2', 'h3', 'h4', 'h5', 'h6', 'hr', 'ol', 'p', 'pre', + 'table', 'ul', 'nolink', 'tex', 'algebra' //moodle modification + )); + } + +} + +?> \ No newline at end of file diff --git a/lib/htmlpurifier/HTMLPurifier/HTMLModule/TransformToStrict.php b/lib/htmlpurifier/HTMLPurifier/HTMLModule/TransformToStrict.php new file mode 100644 index 0000000000..cdbe3733f2 --- /dev/null +++ b/lib/htmlpurifier/HTMLPurifier/HTMLModule/TransformToStrict.php @@ -0,0 +1,108 @@ + false, + 'menu' => false, + 'dir' => false, + 'center'=> false + ); + + var $attr_collections = array( + 'Lang' => array( + 'lang' => false // placeholder + ) + ); + + var $info_attr_transform_post = array( + 'lang' => false // placeholder + ); + + function HTMLPurifier_HTMLModule_TransformToStrict() { + + // deprecated tag transforms + $this->info_tag_transform['font'] = new HTMLPurifier_TagTransform_Font(); + $this->info_tag_transform['menu'] = new HTMLPurifier_TagTransform_Simple('ul'); + $this->info_tag_transform['dir'] = new HTMLPurifier_TagTransform_Simple('ul'); + $this->info_tag_transform['center'] = new HTMLPurifier_TagTransform_Center(); + + foreach ($this->elements as $name) { + $this->info[$name] = new HTMLPurifier_ElementDef(); + $this->info[$name]->standalone = false; + } + + // deprecated attribute transforms + $this->info['h1']->attr_transform_pre['align'] = + $this->info['h2']->attr_transform_pre['align'] = + $this->info['h3']->attr_transform_pre['align'] = + $this->info['h4']->attr_transform_pre['align'] = + $this->info['h5']->attr_transform_pre['align'] = + $this->info['h6']->attr_transform_pre['align'] = + $this->info['p'] ->attr_transform_pre['align'] = + new HTMLPurifier_AttrTransform_TextAlign(); + + // xml:lang <=> lang mirroring, implement in TransformToStrict, + // this is overridden in TransformToXHTML11 + $this->info_attr_transform_post['lang'] = new HTMLPurifier_AttrTransform_Lang(); + $this->attr_collections['Lang']['lang'] = new HTMLPurifier_AttrDef_Lang(); + + // this should not be applied to XHTML 1.0 Transitional, ONLY + // XHTML 1.0 Strict. We may need three classes + $this->info['blockquote']->content_model_type = 'strictblockquote'; + $this->info['blockquote']->child = false; // recalculate please! + + $this->info['table']->attr_transform_pre['bgcolor'] = + $this->info['tr']->attr_transform_pre['bgcolor'] = + $this->info['td']->attr_transform_pre['bgcolor'] = + $this->info['th']->attr_transform_pre['bgcolor'] = new HTMLPurifier_AttrTransform_BgColor(); + + $this->info['img']->attr_transform_pre['border'] = new HTMLPurifier_AttrTransform_Border(); + + $this->info['img']->attr_transform_pre['name'] = + $this->info['a']->attr_transform_pre['name'] = new HTMLPurifier_AttrTransform_Name(); + + $this->info['td']->attr_transform_pre['width'] = + $this->info['th']->attr_transform_pre['width'] = + $this->info['hr']->attr_transform_pre['width'] = new HTMLPurifier_AttrTransform_Length('width'); + + $this->info['td']->attr_transform_pre['height'] = + $this->info['th']->attr_transform_pre['height'] = new HTMLPurifier_AttrTransform_Length('height'); + + } + + var $defines_child_def = true; + function getChildDef($def) { + if ($def->content_model_type != 'strictblockquote') return false; + return new HTMLPurifier_ChildDef_StrictBlockquote($def->content_model); + } + +} + +?> \ No newline at end of file diff --git a/lib/htmlpurifier/HTMLPurifier/HTMLModule/TransformToXHTML11.php b/lib/htmlpurifier/HTMLPurifier/HTMLModule/TransformToXHTML11.php new file mode 100644 index 0000000000..0915f5b6e5 --- /dev/null +++ b/lib/htmlpurifier/HTMLPurifier/HTMLModule/TransformToXHTML11.php @@ -0,0 +1,30 @@ + array( + 'lang' => false // remove it + ) + ); + + var $info_attr_transform_post = array( + 'lang' => false // remove it + ); + +} + +?> \ No newline at end of file diff --git a/lib/htmlpurifier/HTMLPurifier/HTMLModuleManager.php b/lib/htmlpurifier/HTMLPurifier/HTMLModuleManager.php new file mode 100644 index 0000000000..e0090472ca --- /dev/null +++ b/lib/htmlpurifier/HTMLPurifier/HTMLModuleManager.php @@ -0,0 +1,558 @@ +attrTypes = new HTMLPurifier_AttrTypes(); + + if (!$blank) $this->initialize(); + + } + + function initialize() { + $this->initialized = true; + + // load default modules to the recognized modules list (not active) + $modules = array( + // define + 'CommonAttributes', + 'Text', 'Hypertext', 'List', 'Presentation', + 'Edit', 'Bdo', 'Tables', 'Image', 'StyleAttribute', + // define-redefine + 'Legacy', + // redefine + 'TransformToStrict', 'TransformToXHTML11' + ); + foreach ($modules as $module) { + $this->addModule($module); + } + + // Safe modules for supported doctypes. These are included + // in the valid and active module lists by default + $this->collections['Safe'] = array( + '_Common' => array( // leading _ indicates private + 'CommonAttributes', 'Text', 'Hypertext', 'List', + 'Presentation', 'Edit', 'Bdo', 'Tables', 'Image', + 'StyleAttribute' + ), + // HTML definitions, defer to XHTML definitions + 'HTML 4.01 Transitional' => array(array('XHTML 1.0 Transitional')), + 'HTML 4.01 Strict' => array(array('XHTML 1.0 Strict')), + // XHTML definitions + 'XHTML 1.0 Transitional' => array( array('XHTML 1.0 Strict'), 'Legacy' ), + 'XHTML 1.0 Strict' => array(array('_Common')), + 'XHTML 1.1' => array(array('_Common')), + ); + + // Modules that specify elements that are unsafe from untrusted + // third-parties. These should be registered in $validModules but + // almost never $activeModules unless you really know what you're + // doing. + $this->collections['Unsafe'] = array(); + + // Modules to import if lenient mode (attempt to convert everything + // to a valid representation) is on. These must not be in $validModules + // unless specified so. + $this->collections['Lenient'] = array( + 'HTML 4.01 Strict' => array(array('XHTML 1.0 Strict')), + 'XHTML 1.0 Strict' => array('TransformToStrict'), + 'XHTML 1.1' => array(array('XHTML 1.0 Strict'), 'TransformToXHTML11') + ); + + // Modules to import if correctional mode (correct everything that + // is feasible to strict mode) is on. These must not be in $validModules + // unless specified so. + $this->collections['Correctional'] = array( + 'HTML 4.01 Transitional' => array(array('XHTML 1.0 Transitional')), + 'XHTML 1.0 Transitional' => array('TransformToStrict'), // probably want a different one + ); + + // User-space modules, custom code or whatever + $this->collections['Extension'] = array(); + + // setup active versus valid modules. ORDER IS IMPORTANT! + // definition modules + $this->makeCollectionActive('Safe'); + $this->makeCollectionValid('Unsafe'); + // redefinition modules + $this->makeCollectionActive('Lenient'); + $this->makeCollectionActive('Correctional'); + + $this->autoDoctype = '*'; + $this->autoCollection = 'Extension'; + + } + + /** + * Adds a module to the recognized module list. This does not + * do anything else: the module must be added to a corresponding + * collection to be "activated". + * @param $module Mixed: string module name, with or without + * HTMLPurifier_HTMLModule prefix, or instance of + * subclass of HTMLPurifier_HTMLModule. + */ + function addModule($module) { + if (is_string($module)) { + $original_module = $module; + if (!class_exists($module)) { + foreach ($this->prefixes as $prefix) { + $module = $prefix . $original_module; + if (class_exists($module)) break; + } + } + if (!class_exists($module)) { + trigger_error($original_module . ' module does not exist', + E_USER_ERROR); + return; + } + $module = new $module(); + } + $module->order = $this->counter++; // assign then increment + $this->modules[$module->name] = $module; + if ($this->autoDoctype !== false && $this->autoCollection !== false) { + $this->collections[$this->autoCollection][$this->autoDoctype][] = $module->name; + } + } + + /** + * Makes a collection active, while also making it valid if not + * already done so. See $activeModules for the semantics of "active". + * @param $collection_name Name of collection to activate + */ + function makeCollectionActive($collection_name) { + if (!in_array($collection_name, $this->validCollections)) { + $this->makeCollectionValid($collection_name); + } + $this->activeCollections[] = $collection_name; + } + + /** + * Makes a collection valid. See $validModules for the semantics of "valid" + */ + function makeCollectionValid($collection_name) { + $this->validCollections[] = $collection_name; + } + + /** + * Adds a class prefix that addModule() will use to resolve a + * string name to a concrete class + */ + function addPrefix($prefix) { + $this->prefixes[] = (string) $prefix; + } + + function setup($config) { + + // load up the autocollection + if ($this->autoCollection !== false) { + $this->makeCollectionActive($this->autoCollection); + } + + // retrieve the doctype + $this->doctype = $this->getDoctype($config); + if (isset($this->doctypeAliases[$this->doctype])) { + $this->doctype = $this->doctypeAliases[$this->doctype]; + } + + // process module collections to module name => module instance form + foreach ($this->collections as $col_i => $x) { + $this->processCollections($this->collections[$col_i]); + } + + $this->validModules = $this->assembleModules($this->validCollections); + $this->activeModules = $this->assembleModules($this->activeCollections); + + // setup lookup table based on all valid modules + foreach ($this->validModules as $module) { + foreach ($module->info as $name => $def) { + if (!isset($this->elementLookup[$name])) { + $this->elementLookup[$name] = array(); + } + $this->elementLookup[$name][] = $module->name; + } + } + + // note the different choice + $this->contentSets = new HTMLPurifier_ContentSets( + // content models that contain non-allowed elements are + // harmless because RemoveForeignElements will ensure + // they never get in anyway, and there is usually no + // reason why you should want to restrict a content + // model beyond what is mandated by the doctype. + // Note, however, that this means redefinitions of + // content models can't be tossed in validModels willy-nilly: + // that stuff still is regulated by configuration. + $this->validModules + ); + $this->attrCollections = new HTMLPurifier_AttrCollections( + $this->attrTypes, + // only explicitly allowed modules are allowed to affect + // the global attribute collections. This mean's there's + // a distinction between loading the Bdo module, and the + // bdo element: Bdo will enable the dir attribute on all + // elements, while bdo will only define the bdo element, + // which will not have an editable directionality. This might + // catch people who are loading only elements by surprise, so + // we should consider loading an entire module if all the + // elements it defines are requested by the user, especially + // if it affects the global attribute collections. + $this->activeModules + ); + + } + + /** + * Takes a list of collections and merges together all the defined + * modules for the current doctype from those collections. + * @param $collections List of collection suffixes we should grab + * modules from (like 'Safe' or 'Lenient') + */ + function assembleModules($collections) { + $modules = array(); + $numOfCollectionsUsed = 0; + foreach ($collections as $name) { + $disable_global = false; + if (!isset($this->collections[$name])) { + trigger_error("$name collection is undefined", E_USER_ERROR); + continue; + } + $cols = $this->collections[$name]; + if (isset($cols[$this->doctype])) { + if (isset($cols[$this->doctype]['*'])) { + unset($cols[$this->doctype]['*']); + $disable_global = true; + } + $modules += $cols[$this->doctype]; + $numOfCollectionsUsed++; + } + // accept catch-all doctype + if ( + $this->doctype !== '*' && + isset($cols['*']) && + !$disable_global + ) { + $modules += $cols['*']; + } + } + + if ($numOfCollectionsUsed < 1) { + // possible XSS injection if user-specified doctypes + // are allowed + trigger_error("Doctype {$this->doctype} does not exist, ". + "check for typos (if you desire a doctype that allows ". + "no elements, use an empty array collection)", E_USER_ERROR); + } + return $modules; + } + + /** + * Takes a collection and performs inclusions and substitutions for it. + * @param $cols Reference to collections class member variable + */ + function processCollections(&$cols) { + + // $cols is the set of collections + // $col_i is the name (index) of a collection + // $col is a collection/list of modules + + // perform inclusions + foreach ($cols as $col_i => $col) { + $seen = array(); + if (!empty($col[0]) && is_array($col[0])) { + $seen[$col_i] = true; // recursion reporting + $includes = $col[0]; + unset($cols[$col_i][0]); // remove inclusions value, recursion guard + } else { + $includes = array(); + } + if (empty($includes)) continue; + for ($i = 0; isset($includes[$i]); $i++) { + $inc = $includes[$i]; + if (isset($seen[$inc])) { + trigger_error( + "Circular inclusion detected in $col_i collection", + E_USER_ERROR + ); + continue; + } else { + $seen[$inc] = true; + } + if (!isset($cols[$inc])) { + trigger_error( + "Collection $col_i tried to include undefined ". + "collection $inc", E_USER_ERROR); + continue; + } + foreach ($cols[$inc] as $module) { + if (is_array($module)) { // another inclusion! + foreach ($module as $inc2) $includes[] = $inc2; + continue; + } + $cols[$col_i][] = $module; // merge in the other modules + } + } + } + + // replace with real modules, invert module from list to + // assoc array of module name to module instance + foreach ($cols as $col_i => $col) { + $ignore_global = false; + $order = array(); + foreach ($col as $module_i => $module) { + unset($cols[$col_i][$module_i]); + if (is_array($module)) { + trigger_error("Illegal inclusion array at index". + " $module_i found collection $col_i, inclusion". + " arrays must be at start of collection (index 0)", + E_USER_ERROR); + continue; + } + if ($module_i === '*' && $module === false) { + $ignore_global = true; + continue; + } + if (!isset($this->modules[$module])) { + trigger_error( + "Collection $col_i references undefined ". + "module $module", + E_USER_ERROR + ); + continue; + } + $module = $this->modules[$module]; + $cols[$col_i][$module->name] = $module; + $order[$module->name] = $module->order; + } + array_multisort( + $order, SORT_ASC, SORT_NUMERIC, $cols[$col_i] + ); + if ($ignore_global) $cols[$col_i]['*'] = false; + } + + // delete pseudo-collections + foreach ($cols as $col_i => $col) { + if ($col_i[0] == '_') unset($cols[$col_i]); + } + + } + + /** + * Retrieves the doctype from the configuration object + */ + function getDoctype($config) { + $doctype = $config->get('HTML', 'Doctype'); + if ($doctype !== null) { + return $doctype; + } + if (!$this->initialized) { + // don't do HTML-oriented backwards compatibility stuff + // use either the auto-doctype, or the catch-all doctype + return $this->autoDoctype ? $this->autoDoctype : '*'; + } + // this is backwards-compatibility stuff + if ($config->get('Core', 'XHTML')) { + $doctype = 'XHTML 1.0'; + } else { + $doctype = 'HTML 4.01'; + } + if ($config->get('HTML', 'Strict')) { + $doctype .= ' Strict'; + } else { + $doctype .= ' Transitional'; + } + return $doctype; + } + + /** + * Retrieves merged element definitions for all active elements. + * @note We may want to generate an elements array during setup + * and pass that on, because a specific combination of + * elements may trigger the loading of a module. + * @param $config Instance of HTMLPurifier_Config, for determining + * stray elements. + */ + function getElements($config) { + + $elements = array(); + foreach ($this->activeModules as $module) { + foreach ($module->elements as $name) { + $elements[$name] = $this->getElement($name, $config); + } + } + + // standalone elements now loaded + + return $elements; + + } + + /** + * Retrieves a single merged element definition + * @param $name Name of element + * @param $config Instance of HTMLPurifier_Config, may not be necessary. + */ + function getElement($name, $config) { + + $def = false; + + $modules = $this->validModules; + + if (!isset($this->elementLookup[$name])) { + return false; + } + + foreach($this->elementLookup[$name] as $module_name) { + + $module = $modules[$module_name]; + $new_def = $module->info[$name]; + + if (!$def && $new_def->standalone) { + $def = $new_def; + } elseif ($def) { + $def->mergeIn($new_def); + } else { + // could "save it for another day": + // non-standalone definitions that don't have a standalone + // to merge into could be deferred to the end + continue; + } + + // attribute value expansions + $this->attrCollections->performInclusions($def->attr); + $this->attrCollections->expandIdentifiers($def->attr, $this->attrTypes); + + // descendants_are_inline, for ChildDef_Chameleon + if (is_string($def->content_model) && + strpos($def->content_model, 'Inline') !== false) { + if ($name != 'del' && $name != 'ins') { + // this is for you, ins/del + $def->descendants_are_inline = true; + } + } + + $this->contentSets->generateChildDef($def, $module); + } + + return $def; + + } + +} + +?> \ No newline at end of file diff --git a/lib/htmlpurifier/HTMLPurifier/IDAccumulator.php b/lib/htmlpurifier/HTMLPurifier/IDAccumulator.php new file mode 100644 index 0000000000..40ff2384bb --- /dev/null +++ b/lib/htmlpurifier/HTMLPurifier/IDAccumulator.php @@ -0,0 +1,42 @@ +ids[$id])) return false; + return $this->ids[$id] = true; + } + + /** + * Load a list of IDs into the lookup table + * @param $array_of_ids Array of IDs to load + * @note This function doesn't care about duplicates + */ + function load($array_of_ids) { + foreach ($array_of_ids as $id) { + $this->ids[$id] = true; + } + } + +} + +?> \ No newline at end of file diff --git a/lib/htmlpurifier/HTMLPurifier/Language.php b/lib/htmlpurifier/HTMLPurifier/Language.php new file mode 100644 index 0000000000..ca6fe03138 --- /dev/null +++ b/lib/htmlpurifier/HTMLPurifier/Language.php @@ -0,0 +1,56 @@ +_loaded) return; + $factory = HTMLPurifier_LanguageFactory::instance(); + $factory->loadLanguage($this->code); + foreach ($factory->keys as $key) { + $this->$key = $factory->cache[$this->code][$key]; + } + $this->_loaded = true; + } + + /** + * Retrieves a localised message. Does not perform any operations. + * @param $key string identifier of message + * @return string localised message + */ + function getMessage($key) { + if (!$this->_loaded) $this->load(); + if (!isset($this->messages[$key])) return ''; + return $this->messages[$key]; + } + +} + +?> \ No newline at end of file diff --git a/lib/htmlpurifier/HTMLPurifier/Language/classes/en-x-test.php b/lib/htmlpurifier/HTMLPurifier/Language/classes/en-x-test.php new file mode 100644 index 0000000000..303ba4bae0 --- /dev/null +++ b/lib/htmlpurifier/HTMLPurifier/Language/classes/en-x-test.php @@ -0,0 +1,12 @@ + \ No newline at end of file diff --git a/lib/htmlpurifier/HTMLPurifier/Language/messages/en-x-test.php b/lib/htmlpurifier/HTMLPurifier/Language/messages/en-x-test.php new file mode 100644 index 0000000000..115662bda9 --- /dev/null +++ b/lib/htmlpurifier/HTMLPurifier/Language/messages/en-x-test.php @@ -0,0 +1,11 @@ + 'HTML Purifier X' +); + +?> \ No newline at end of file diff --git a/lib/htmlpurifier/HTMLPurifier/Language/messages/en.php b/lib/htmlpurifier/HTMLPurifier/Language/messages/en.php new file mode 100644 index 0000000000..7650b81803 --- /dev/null +++ b/lib/htmlpurifier/HTMLPurifier/Language/messages/en.php @@ -0,0 +1,12 @@ + 'HTML Purifier', +'pizza' => 'Pizza', // for unit testing purposes + +); + +?> \ No newline at end of file diff --git a/lib/htmlpurifier/HTMLPurifier/LanguageFactory.php b/lib/htmlpurifier/HTMLPurifier/LanguageFactory.php new file mode 100644 index 0000000000..7097ced767 --- /dev/null +++ b/lib/htmlpurifier/HTMLPurifier/LanguageFactory.php @@ -0,0 +1,196 @@ +cache[$language_code][$key] = $value + * @value array map + */ + var $cache; + + /** + * Valid keys in the HTMLPurifier_Language object. Designates which + * variables to slurp out of a message file. + * @value array list + */ + var $keys = array('fallback', 'messages'); + + /** + * Instance of HTMLPurifier_AttrDef_Lang to validate language codes + * @value object HTMLPurifier_AttrDef_Lang + */ + var $validator; + + /** + * Cached copy of dirname(__FILE__), directory of current file without + * trailing slash + * @value string filename + */ + var $dir; + + /** + * Keys whose contents are a hash map and can be merged + * @value array lookup + */ + var $mergeable_keys_map = array('messages' => true); + + /** + * Keys whose contents are a list and can be merged + * @value array lookup + */ + var $mergeable_keys_list = array(); + + /** + * Retrieve sole instance of the factory. + * @static + * @param $prototype Optional prototype to overload sole instance with, + * or bool true to reset to default factory. + */ + function &instance($prototype = null) { + static $instance = null; + if ($prototype !== null) { + $instance = $prototype; + } elseif ($instance === null || $prototype == true) { + $instance = new HTMLPurifier_LanguageFactory(); + $instance->setup(); + } + return $instance; + } + + /** + * Sets up the singleton, much like a constructor + * @note Prevents people from getting this outside of the singleton + */ + function setup() { + $this->validator = new HTMLPurifier_AttrDef_Lang(); + $this->dir = dirname(__FILE__); + } + + /** + * Creates a language object, handles class fallbacks + * @param $code string language code + */ + function create($code) { + + $config = $context = false; // hope it doesn't use these! + $code = $this->validator->validate($code, $config, $context); + if ($code === false) $code = 'en'; // malformed code becomes English + + $pcode = str_replace('-', '_', $code); // make valid PHP classname + static $depth = 0; // recursion protection + + if ($code == 'en') { + $class = 'HTMLPurifier_Language'; + $file = $this->dir . '/Language.php'; + } else { + $class = 'HTMLPurifier_Language_' . $pcode; + $file = $this->dir . '/Language/classes/' . $code . '.php'; + // PHP5/APC deps bug workaround can go here + // you can bypass the conditional include by loading the + // file yourself + if (file_exists($file) && !class_exists($class)) { + include_once $file; + } + } + + if (!class_exists($class)) { + // go fallback + $fallback = HTMLPurifier_Language::getFallbackFor($code); + $depth++; + $lang = Language::factory( $fallback ); + $depth--; + } else { + $lang = new $class; + } + $lang->code = $code; + + return $lang; + + } + + /** + * Returns the fallback language for language + * @note Loads the original language into cache + * @param $code string language code + */ + function getFallbackFor($code) { + $this->loadLanguage($code); + return $this->cache[$code]['fallback']; + } + + /** + * Loads language into the cache, handles message file and fallbacks + * @param $code string language code + */ + function loadLanguage($code) { + static $languages_seen = array(); // recursion guard + + // abort if we've already loaded it + if (isset($this->cache[$code])) return; + + // generate filename + $filename = $this->dir . '/Language/messages/' . $code . '.php'; + + // default fallback : may be overwritten by the ensuing include + $fallback = ($code != 'en') ? 'en' : false; + + // load primary localisation + if (!file_exists($filename)) { + // skip the include: will rely solely on fallback + $filename = $this->dir . '/Language/messages/en.php'; + $cache = array(); + } else { + include $filename; + $cache = compact($this->keys); + } + + // load fallback localisation + if (!empty($fallback)) { + + // infinite recursion guard + if (isset($languages_seen[$code])) { + trigger_error('Circular fallback reference in language ' . + $code, E_USER_ERROR); + $fallback = 'en'; + } + $language_seen[$code] = true; + + // load the fallback recursively + $this->loadLanguage($fallback); + $fallback_cache = $this->cache[$fallback]; + + // merge fallback with current language + foreach ( $this->keys as $key ) { + if (isset($cache[$key]) && isset($fallback_cache[$key])) { + if (isset($this->mergeable_keys_map[$key])) { + $cache[$key] = $cache[$key] + $fallback_cache[$key]; + } elseif (isset($this->mergeable_keys_list[$key])) { + $cache[$key] = array_merge( $fallback_cache[$key], $cache[$key] ); + } + } else { + $cache[$key] = $fallback_cache[$key]; + } + } + + } + + // save to cache for later retrieval + $this->cache[$code] = $cache; + + return; + } + +} + +?> \ No newline at end of file diff --git a/lib/htmlpurifier/HTMLPurifier/Lexer.php b/lib/htmlpurifier/HTMLPurifier/Lexer.php new file mode 100644 index 0000000000..e7242e1e36 --- /dev/null +++ b/lib/htmlpurifier/HTMLPurifier/Lexer.php @@ -0,0 +1,237 @@ +_entity_parser = new HTMLPurifier_EntityParser(); + } + + + /** + * Most common entity to raw value conversion table for special entities. + * @protected + */ + var $_special_entity2str = + array( + '"' => '"', + '&' => '&', + '<' => '<', + '>' => '>', + ''' => "'", + ''' => "'", + ''' => "'" + ); + + /** + * Parses special entities into the proper characters. + * + * This string will translate escaped versions of the special characters + * into the correct ones. + * + * @warning + * You should be able to treat the output of this function as + * completely parsed, but that's only because all other entities should + * have been handled previously in substituteNonSpecialEntities() + * + * @param $string String character data to be parsed. + * @returns Parsed character data. + */ + function parseData($string) { + + // following functions require at least one character + if ($string === '') return ''; + + // subtracts amps that cannot possibly be escaped + $num_amp = substr_count($string, '&') - substr_count($string, '& ') - + ($string[strlen($string)-1] === '&' ? 1 : 0); + + if (!$num_amp) return $string; // abort if no entities + $num_esc_amp = substr_count($string, '&'); + $string = strtr($string, $this->_special_entity2str); + + // code duplication for sake of optimization, see above + $num_amp_2 = substr_count($string, '&') - substr_count($string, '& ') - + ($string[strlen($string)-1] === '&' ? 1 : 0); + + if ($num_amp_2 <= $num_esc_amp) return $string; + + // hmm... now we have some uncommon entities. Use the callback. + $string = $this->_entity_parser->substituteSpecialEntities($string); + return $string; + } + + /** + * Lexes an HTML string into tokens. + * + * @param $string String HTML. + * @return HTMLPurifier_Token array representation of HTML. + */ + function tokenizeHTML($string, $config, &$context) { + trigger_error('Call to abstract class', E_USER_ERROR); + } + + /** + * Retrieves or sets the default Lexer as a Prototype Factory. + * + * Depending on what PHP version you are running, the abstract base + * Lexer class will determine which concrete Lexer is best for you: + * HTMLPurifier_Lexer_DirectLex for PHP 4, and HTMLPurifier_Lexer_DOMLex + * for PHP 5 and beyond. + * + * Passing the optional prototype lexer parameter will override the + * default with your own implementation. A copy/reference of the prototype + * lexer will now be returned when you request a new lexer. + * + * @static + * + * @note + * Though it is possible to call this factory method from subclasses, + * such usage is not recommended. + * + * @param $prototype Optional prototype lexer. + * @return Concrete lexer. + */ + function create($prototype = null) { + // we don't really care if it's a reference or a copy + static $lexer = null; + if ($prototype) { + $lexer = $prototype; + } + if (empty($lexer)) { + if (version_compare(PHP_VERSION, "5", ">=") && // check for PHP5 + class_exists('DOMDocument')) { // check for DOM support + require_once 'HTMLPurifier/Lexer/DOMLex.php'; + $lexer = new HTMLPurifier_Lexer_DOMLex(); + } else { + require_once 'HTMLPurifier/Lexer/DirectLex.php'; + $lexer = new HTMLPurifier_Lexer_DirectLex(); + } + } + return $lexer; + } + + /** + * Translates CDATA sections into regular sections (through escaping). + * + * @static + * @protected + * @param $string HTML string to process. + * @returns HTML with CDATA sections escaped. + */ + function escapeCDATA($string) { + return preg_replace_callback( + '//', + array('HTMLPurifier_Lexer', 'CDATACallback'), + $string + ); + } + + /** + * Callback function for escapeCDATA() that does the work. + * + * @static + * @warning Though this is public in order to let the callback happen, + * calling it directly is not recommended. + * @params $matches PCRE matches array, with index 0 the entire match + * and 1 the inside of the CDATA section. + * @returns Escaped internals of the CDATA section. + */ + function CDATACallback($matches) { + // not exactly sure why the character set is needed, but whatever + return htmlspecialchars($matches[1], ENT_COMPAT, 'UTF-8'); + } + + /** + * Takes a piece of HTML and normalizes it by converting entities, fixing + * encoding, extracting bits, and other good stuff. + */ + function normalize($html, $config, &$context) { + + // extract body from document if applicable + if ($config->get('Core', 'AcceptFullDocuments')) { + $html = $this->extractBody($html); + } + + // escape CDATA + $html = $this->escapeCDATA($html); + + // expand entities that aren't the big five + $html = $this->_entity_parser->substituteNonSpecialEntities($html); + + // clean into wellformed UTF-8 string for an SGML context: this has + // to be done after entity expansion because the entities sometimes + // represent non-SGML characters (horror, horror!) + $html = HTMLPurifier_Encoder::cleanUTF8($html); + + return $html; + } + + /** + * Takes a string of HTML (fragment or document) and returns the content + */ + function extractBody($html) { + $matches = array(); + $result = preg_match('!]*>(.+?)!is', $html, $matches); + if ($result) { + return $matches[1]; + } else { + return $html; + } + } + +} + +?> \ No newline at end of file diff --git a/lib/htmlpurifier/HTMLPurifier/Lexer/DOMLex.php b/lib/htmlpurifier/HTMLPurifier/Lexer/DOMLex.php new file mode 100644 index 0000000000..9286b023d0 --- /dev/null +++ b/lib/htmlpurifier/HTMLPurifier/Lexer/DOMLex.php @@ -0,0 +1,152 @@ +factory = new HTMLPurifier_TokenFactory(); + } + + public function tokenizeHTML($string, $config, &$context) { + + $string = $this->normalize($string, $config, $context); + + // preprocess string, essential for UTF-8 + $string = + ''. + ''. + ''. + '

'.$string.'

'; + + $doc = new DOMDocument(); + $doc->encoding = 'UTF-8'; // technically does nothing, but whatever + + // DOM will toss errors if the HTML its parsing has really big + // problems, so we're going to mute them. This can cause problems + // if a custom error handler that doesn't implement error_reporting + // is set, as noted by a Drupal plugin of HTML Purifier. Consider + // making our own error reporter to temporarily load in + @$doc->loadHTML($string); + + $tokens = array(); + $this->tokenizeDOM( + $doc->getElementsByTagName('html')->item(0)-> // html + getElementsByTagName('body')->item(0)-> // body + getElementsByTagName('div')->item(0) // div + , $tokens); + return $tokens; + } + + /** + * Recursive function that tokenizes a node, putting it into an accumulator. + * + * @param $node DOMNode to be tokenized. + * @param $tokens Array-list of already tokenized tokens. + * @param $collect Says whether or start and close are collected, set to + * false at first recursion because it's the implicit DIV + * tag you're dealing with. + * @returns Tokens of node appended to previously passed tokens. + */ + protected function tokenizeDOM($node, &$tokens, $collect = false) { + // recursive goodness! + + // intercept non element nodes. WE MUST catch all of them, + // but we're not getting the character reference nodes because + // those should have been preprocessed + if ($node->nodeType === XML_TEXT_NODE || + $node->nodeType === XML_CDATA_SECTION_NODE) { + $tokens[] = $this->factory->createText($node->data); + return; + } elseif ($node->nodeType === XML_COMMENT_NODE) { + $tokens[] = $this->factory->createComment($node->data); + return; + } elseif ( + // not-well tested: there may be other nodes we have to grab + $node->nodeType !== XML_ELEMENT_NODE + ) { + return; + } + + $attr = $node->hasAttributes() ? + $this->transformAttrToAssoc($node->attributes) : + array(); + + // We still have to make sure that the element actually IS empty + if (!$node->childNodes->length) { + if ($collect) { + $tokens[] = $this->factory->createEmpty($node->tagName, $attr); + } + } else { + if ($collect) { // don't wrap on first iteration + $tokens[] = $this->factory->createStart( + $tag_name = $node->tagName, // somehow, it get's dropped + $attr + ); + } + foreach ($node->childNodes as $node) { + // remember, it's an accumulator. Otherwise, we'd have + // to use array_merge + $this->tokenizeDOM($node, $tokens, true); + } + if ($collect) { + $tokens[] = $this->factory->createEnd($tag_name); + } + } + + } + + /** + * Converts a DOMNamedNodeMap of DOMAttr objects into an assoc array. + * + * @param $attribute_list DOMNamedNodeMap of DOMAttr objects. + * @returns Associative array of attributes. + */ + protected function transformAttrToAssoc($node_map) { + // NamedNodeMap is documented very well, so we're using undocumented + // features, namely, the fact that it implements Iterator and + // has a ->length attribute + if ($node_map->length === 0) return array(); + $array = array(); + foreach ($node_map as $attr) { + $array[$attr->name] = $attr->value; + } + return $array; + } + +} + +?> \ No newline at end of file diff --git a/lib/htmlpurifier/HTMLPurifier/Lexer/DirectLex.php b/lib/htmlpurifier/HTMLPurifier/Lexer/DirectLex.php new file mode 100644 index 0000000000..65d95a7cf9 --- /dev/null +++ b/lib/htmlpurifier/HTMLPurifier/Lexer/DirectLex.php @@ -0,0 +1,309 @@ +normalize($html, $config, $context); + + $cursor = 0; // our location in the text + $inside_tag = false; // whether or not we're parsing the inside of a tag + $array = array(); // result array + + // infinite loop protection + // has to be pretty big, since html docs can be big + // we're allow two hundred thousand tags... more than enough? + $loops = 0; + + while(true) { + + // infinite loop protection + if (++$loops > 200000) return array(); + + $position_next_lt = strpos($html, '<', $cursor); + $position_next_gt = strpos($html, '>', $cursor); + + // triggers on "asdf" but not "asdf " + if ($position_next_lt === $cursor) { + $inside_tag = true; + $cursor++; + } + + if (!$inside_tag && $position_next_lt !== false) { + // We are not inside tag and there still is another tag to parse + $array[] = new + HTMLPurifier_Token_Text( + $this->parseData( + substr( + $html, $cursor, $position_next_lt - $cursor + ) + ) + ); + $cursor = $position_next_lt + 1; + $inside_tag = true; + continue; + } elseif (!$inside_tag) { + // We are not inside tag but there are no more tags + // If we're already at the end, break + if ($cursor === strlen($html)) break; + // Create Text of rest of string + $array[] = new + HTMLPurifier_Token_Text( + $this->parseData( + substr( + $html, $cursor + ) + ) + ); + break; + } elseif ($inside_tag && $position_next_gt !== false) { + // We are in tag and it is well formed + // Grab the internals of the tag + $strlen_segment = $position_next_gt - $cursor; + $segment = substr($html, $cursor, $strlen_segment); + + // Check if it's a comment + if ( + substr($segment, 0, 3) == '!--' && + substr($segment, $strlen_segment-2, 2) == '--' + ) { + $array[] = new + HTMLPurifier_Token_Comment( + substr( + $segment, 3, $strlen_segment - 5 + ) + ); + $inside_tag = false; + $cursor = $position_next_gt + 1; + continue; + } + + // Check if it's an end tag + $is_end_tag = (strpos($segment,'/') === 0); + if ($is_end_tag) { + $type = substr($segment, 1); + $array[] = new HTMLPurifier_Token_End($type); + $inside_tag = false; + $cursor = $position_next_gt + 1; + continue; + } + + // Check if it is explicitly self closing, if so, remove + // trailing slash. Remember, we could have a tag like
, so + // any later token processing scripts must convert improperly + // classified EmptyTags from StartTags. + $is_self_closing= (strpos($segment,'/') === $strlen_segment-1); + if ($is_self_closing) { + $strlen_segment--; + $segment = substr($segment, 0, $strlen_segment); + } + + // Check if there are any attributes + $position_first_space = strcspn($segment, $this->_whitespace); + + if ($position_first_space >= $strlen_segment) { + if ($is_self_closing) { + $array[] = new HTMLPurifier_Token_Empty($segment); + } else { + $array[] = new HTMLPurifier_Token_Start($segment); + } + $inside_tag = false; + $cursor = $position_next_gt + 1; + continue; + } + + // Grab out all the data + $type = substr($segment, 0, $position_first_space); + $attribute_string = + trim( + substr( + $segment, $position_first_space + ) + ); + if ($attribute_string) { + $attr = $this->parseAttributeString( + $attribute_string + , $config, $context + ); + } else { + $attr = array(); + } + + if ($is_self_closing) { + $array[] = new HTMLPurifier_Token_Empty($type, $attr); + } else { + $array[] = new HTMLPurifier_Token_Start($type, $attr); + } + $cursor = $position_next_gt + 1; + $inside_tag = false; + continue; + } else { + $array[] = new + HTMLPurifier_Token_Text( + '<' . + $this->parseData( + substr($html, $cursor) + ) + ); + break; + } + break; + } + return $array; + } + + /** + * Takes the inside of an HTML tag and makes an assoc array of attributes. + * + * @param $string Inside of tag excluding name. + * @returns Assoc array of attributes. + */ + function parseAttributeString($string, $config, &$context) { + $string = (string) $string; // quick typecast + + if ($string == '') return array(); // no attributes + + // let's see if we can abort as quickly as possible + // one equal sign, no spaces => one attribute + $num_equal = substr_count($string, '='); + $has_space = strpos($string, ' '); + if ($num_equal === 0 && !$has_space) { + // bool attribute + return array($string => $string); + } elseif ($num_equal === 1 && !$has_space) { + // only one attribute + list($key, $quoted_value) = explode('=', $string); + $quoted_value = trim($quoted_value); + if (!$key) return array(); + if (!$quoted_value) return array($key => ''); + $first_char = @$quoted_value[0]; + $last_char = @$quoted_value[strlen($quoted_value)-1]; + + $same_quote = ($first_char == $last_char); + $open_quote = ($first_char == '"' || $first_char == "'"); + + if ( $same_quote && $open_quote) { + // well behaved + $value = substr($quoted_value, 1, strlen($quoted_value) - 2); + } else { + // not well behaved + if ($open_quote) { + $value = substr($quoted_value, 1); + } else { + $value = $quoted_value; + } + } + return array($key => $value); + } + + // setup loop environment + $array = array(); // return assoc array of attributes + $cursor = 0; // current position in string (moves forward) + $size = strlen($string); // size of the string (stays the same) + + // if we have unquoted attributes, the parser expects a terminating + // space, so let's guarantee that there's always a terminating space. + $string .= ' '; + + // infinite loop protection + $loops = 0; + + while(true) { + + // infinite loop protection + if (++$loops > 1000) return array(); + + if ($cursor >= $size) { + break; + } + + $cursor += ($value = strspn($string, $this->_whitespace, $cursor)); + + // grab the key + + $key_begin = $cursor; //we're currently at the start of the key + + // scroll past all characters that are the key (not whitespace or =) + $cursor += strcspn($string, $this->_whitespace . '=', $cursor); + + $key_end = $cursor; // now at the end of the key + + $key = substr($string, $key_begin, $key_end - $key_begin); + + if (!$key) continue; // empty key + + // scroll past all whitespace + $cursor += strspn($string, $this->_whitespace, $cursor); + + if ($cursor >= $size) { + $array[$key] = $key; + break; + } + + // if the next character is an equal sign, we've got a regular + // pair, otherwise, it's a bool attribute + $first_char = @$string[$cursor]; + + if ($first_char == '=') { + // key="value" + + $cursor++; + $cursor += strspn($string, $this->_whitespace, $cursor); + + // we might be in front of a quote right now + + $char = @$string[$cursor]; + + if ($char == '"' || $char == "'") { + // it's quoted, end bound is $char + $cursor++; + $value_begin = $cursor; + $cursor = strpos($string, $char, $cursor); + $value_end = $cursor; + } else { + // it's not quoted, end bound is whitespace + $value_begin = $cursor; + $cursor += strcspn($string, $this->_whitespace, $cursor); + $value_end = $cursor; + } + + $value = substr($string, $value_begin, $value_end - $value_begin); + $array[$key] = $this->parseData($value); + $cursor++; + + } else { + // boolattr + if ($key !== '') { + $array[$key] = $key; + } + + } + } + return $array; + } + +} + +?> \ No newline at end of file diff --git a/lib/htmlpurifier/HTMLPurifier/Lexer/PEARSax3.php b/lib/htmlpurifier/HTMLPurifier/Lexer/PEARSax3.php new file mode 100644 index 0000000000..18777ef7e8 --- /dev/null +++ b/lib/htmlpurifier/HTMLPurifier/Lexer/PEARSax3.php @@ -0,0 +1,110 @@ +tokens = array(); + + $string = $this->normalize($string, $config, $context); + + $parser = new XML_HTMLSax3(); + $parser->set_object($this); + $parser->set_element_handler('openHandler','closeHandler'); + $parser->set_data_handler('dataHandler'); + $parser->set_escape_handler('escapeHandler'); + + // doesn't seem to work correctly for attributes + $parser->set_option('XML_OPTION_ENTITIES_PARSED', 1); + + $parser->parse($string); + + return $this->tokens; + + } + + /** + * Open tag event handler, interface is defined by PEAR package. + */ + function openHandler(&$parser, $name, $attrs, $closed) { + // entities are not resolved in attrs + foreach ($attrs as $key => $attr) { + $attrs[$key] = $this->parseData($attr); + } + if ($closed) { + $this->tokens[] = new HTMLPurifier_Token_Empty($name, $attrs); + } else { + $this->tokens[] = new HTMLPurifier_Token_Start($name, $attrs); + } + return true; + } + + /** + * Close tag event handler, interface is defined by PEAR package. + */ + function closeHandler(&$parser, $name) { + // HTMLSax3 seems to always send empty tags an extra close tag + // check and ignore if you see it: + // [TESTME] to make sure it doesn't overreach + if ($this->tokens[count($this->tokens)-1]->type == 'empty') { + return true; + } + $this->tokens[] = new HTMLPurifier_Token_End($name); + return true; + } + + /** + * Data event handler, interface is defined by PEAR package. + */ + function dataHandler(&$parser, $data) { + $this->tokens[] = new HTMLPurifier_Token_Text($data); + return true; + } + + /** + * Escaped text handler, interface is defined by PEAR package. + */ + function escapeHandler(&$parser, $data) { + if (strpos($data, '--') === 0) { + $this->tokens[] = new HTMLPurifier_Token_Comment($data); + } + // CDATA is handled elsewhere, but if it was handled here: + //if (strpos($data, '[CDATA[') === 0) { + // $this->tokens[] = new HTMLPurifier_Token_Text( + // substr($data, 7, strlen($data) - 9) ); + //} + return true; + } + +} + +?> \ No newline at end of file diff --git a/lib/htmlpurifier/HTMLPurifier/PercentEncoder.php b/lib/htmlpurifier/HTMLPurifier/PercentEncoder.php new file mode 100644 index 0000000000..7a12caaa76 --- /dev/null +++ b/lib/htmlpurifier/HTMLPurifier/PercentEncoder.php @@ -0,0 +1,47 @@ += 48 && $int <= 57) || // digits + ($int >= 65 && $int <= 90) || // uppercase letters + ($int >= 97 && $int <= 122) || // lowercase letters + $int == 126 || $int == 45 || $int == 46 || $int == 95 // ~-._ + ) { + $ret .= chr($int) . $text; + continue; + } + $encoding = strtoupper($encoding); + $ret .= '%' . $encoding . $text; + } + return $ret; + } + +} + +?> \ No newline at end of file diff --git a/lib/htmlpurifier/HTMLPurifier/Printer.php b/lib/htmlpurifier/HTMLPurifier/Printer.php new file mode 100644 index 0000000000..14135fd8db --- /dev/null +++ b/lib/htmlpurifier/HTMLPurifier/Printer.php @@ -0,0 +1,149 @@ +generator = new HTMLPurifier_Generator(); + } + + /** + * Main function that renders object or aspect of that object + * @param $config Configuration object + */ + function render($config) {} + + /** + * Returns a start tag + * @param $tag Tag name + * @param $attr Attribute array + */ + function start($tag, $attr = array()) { + return $this->generator->generateFromToken( + new HTMLPurifier_Token_Start($tag, $attr ? $attr : array()) + ); + } + + /** + * Returns an end teg + * @param $tag Tag name + */ + function end($tag) { + return $this->generator->generateFromToken( + new HTMLPurifier_Token_End($tag) + ); + } + + /** + * Prints a complete element with content inside + * @param $tag Tag name + * @param $contents Element contents + * @param $attr Tag attributes + * @param $escape Bool whether or not to escape contents + */ + function element($tag, $contents, $attr = array(), $escape = true) { + return $this->start($tag, $attr) . + ($escape ? $this->escape($contents) : $contents) . + $this->end($tag); + } + + /** + * Prints a simple key/value row in a table. + * @param $name Key + * @param $value Value + */ + function row($name, $value) { + if (is_bool($value)) $value = $value ? 'On' : 'Off'; + return + $this->start('tr') . "\n" . + $this->element('th', $name) . "\n" . + $this->element('td', $value) . "\n" . + $this->end('tr') + ; + } + + /** + * Escapes a string for HTML output. + * @param $string String to escape + */ + function escape($string) { + $string = HTMLPurifier_Encoder::cleanUTF8($string); + $string = htmlspecialchars($string, ENT_COMPAT, 'UTF-8'); + return $string; + } + + /** + * Takes a list of strings and turns them into a single list + * @param $array List of strings + * @param $polite Bool whether or not to add an end before the last + */ + function listify($array, $polite = false) { + if (empty($array)) return 'None'; + $ret = ''; + $i = count($array); + foreach ($array as $value) { + $i--; + $ret .= $value; + if ($i > 0 && !($polite && $i == 1)) $ret .= ', '; + if ($polite && $i == 1) $ret .= 'and '; + } + return $ret; + } + + /** + * Retrieves the class of an object without prefixes, as well as metadata + * @param $obj Object to determine class of + * @param $prefix Further prefix to remove + */ + function getClass($obj, $sec_prefix = '') { + static $five = null; + if ($five === null) $five = version_compare(PHP_VERSION, '5', '>='); + $prefix = 'HTMLPurifier_' . $sec_prefix; + if (!$five) $prefix = strtolower($prefix); + $class = str_replace($prefix, '', get_class($obj)); + $lclass = strtolower($class); + $class .= '('; + switch ($lclass) { + case 'enum': + $values = array(); + foreach ($obj->valid_values as $value => $bool) { + $values[] = $value; + } + $class .= implode(', ', $values); + break; + case 'composite': + $values = array(); + foreach ($obj->defs as $def) { + $values[] = $this->getClass($def, $sec_prefix); + } + $class .= implode(', ', $values); + break; + case 'multiple': + $class .= $this->getClass($obj->single, $sec_prefix) . ', '; + $class .= $obj->max; + break; + } + $class .= ')'; + return $class; + } + +} + +?> \ No newline at end of file diff --git a/lib/htmlpurifier/HTMLPurifier/Printer/CSSDefinition.php b/lib/htmlpurifier/HTMLPurifier/Printer/CSSDefinition.php new file mode 100644 index 0000000000..7745f5f444 --- /dev/null +++ b/lib/htmlpurifier/HTMLPurifier/Printer/CSSDefinition.php @@ -0,0 +1,40 @@ +def = $config->getCSSDefinition(); + $ret = ''; + + $ret .= $this->start('div', array('class' => 'HTMLPurifier_Printer')); + $ret .= $this->start('table'); + + $ret .= $this->element('caption', 'Properties ($info)'); + + $ret .= $this->start('thead'); + $ret .= $this->start('tr'); + $ret .= $this->element('th', 'Property', array('class' => 'heavy')); + $ret .= $this->element('th', 'Definition', array('class' => 'heavy', 'style' => 'width:auto;')); + $ret .= $this->end('tr'); + $ret .= $this->end('thead'); + + ksort($this->def->info); + foreach ($this->def->info as $property => $obj) { + $name = $this->getClass($obj, 'AttrDef_'); + $ret .= $this->row($property, $name); + } + + $ret .= $this->end('table'); + $ret .= $this->end('div'); + + return $ret; + } + +} + +?> \ No newline at end of file diff --git a/lib/htmlpurifier/HTMLPurifier/Printer/HTMLDefinition.php b/lib/htmlpurifier/HTMLPurifier/Printer/HTMLDefinition.php new file mode 100644 index 0000000000..a677c58bf6 --- /dev/null +++ b/lib/htmlpurifier/HTMLPurifier/Printer/HTMLDefinition.php @@ -0,0 +1,210 @@ +config =& $config; + + $this->def = $config->getHTMLDefinition(); + $def =& $this->def; + + $ret .= $this->start('div', array('class' => 'HTMLPurifier_Printer')); + $ret .= $this->start('table'); + $ret .= $this->element('caption', 'Environment'); + + $ret .= $this->row('Parent of fragment', $def->info_parent); + $ret .= $this->renderChildren($def->info_parent_def->child); + $ret .= $this->row('Block wrap name', $def->info_block_wrapper); + + $ret .= $this->start('tr'); + $ret .= $this->element('th', 'Global attributes'); + $ret .= $this->element('td', $this->listifyAttr($def->info_global_attr),0,0); + $ret .= $this->end('tr'); + + $ret .= $this->start('tr'); + $ret .= $this->element('th', 'Tag transforms'); + $list = array(); + foreach ($def->info_tag_transform as $old => $new) { + $new = $this->getClass($new, 'TagTransform_'); + $list[] = "<$old> with $new"; + } + $ret .= $this->element('td', $this->listify($list)); + $ret .= $this->end('tr'); + + $ret .= $this->start('tr'); + $ret .= $this->element('th', 'Pre-AttrTransform'); + $ret .= $this->element('td', $this->listifyObjectList($def->info_attr_transform_pre)); + $ret .= $this->end('tr'); + + $ret .= $this->start('tr'); + $ret .= $this->element('th', 'Post-AttrTransform'); + $ret .= $this->element('td', $this->listifyObjectList($def->info_attr_transform_post)); + $ret .= $this->end('tr'); + + $ret .= $this->end('table'); + + + $ret .= $this->renderInfo(); + + + $ret .= $this->end('div'); + + return $ret; + } + + /** + * Renders the Elements ($info) table + */ + function renderInfo() { + $ret = ''; + $ret .= $this->start('table'); + $ret .= $this->element('caption', 'Elements ($info)'); + ksort($this->def->info); + $ret .= $this->start('tr'); + $ret .= $this->element('th', 'Allowed tags', array('colspan' => 2, 'class' => 'heavy')); + $ret .= $this->end('tr'); + $ret .= $this->start('tr'); + $ret .= $this->element('td', $this->listifyTagLookup($this->def->info), array('colspan' => 2)); + $ret .= $this->end('tr'); + foreach ($this->def->info as $name => $def) { + $ret .= $this->start('tr'); + $ret .= $this->element('th', "<$name>", array('class'=>'heavy', 'colspan' => 2)); + $ret .= $this->end('tr'); + $ret .= $this->start('tr'); + $ret .= $this->element('th', 'Inline content'); + $ret .= $this->element('td', $def->descendants_are_inline ? 'Yes' : 'No'); + $ret .= $this->end('tr'); + if (!empty($def->excludes)) { + $ret .= $this->start('tr'); + $ret .= $this->element('th', 'Excludes'); + $ret .= $this->element('td', $this->listifyTagLookup($def->excludes)); + $ret .= $this->end('tr'); + } + if (!empty($def->attr_transform_pre)) { + $ret .= $this->start('tr'); + $ret .= $this->element('th', 'Pre-AttrTransform'); + $ret .= $this->element('td', $this->listifyObjectList($def->attr_transform_pre)); + $ret .= $this->end('tr'); + } + if (!empty($def->attr_transform_post)) { + $ret .= $this->start('tr'); + $ret .= $this->element('th', 'Post-AttrTransform'); + $ret .= $this->element('td', $this->listifyObjectList($def->attr_transform_post)); + $ret .= $this->end('tr'); + } + if (!empty($def->auto_close)) { + $ret .= $this->start('tr'); + $ret .= $this->element('th', 'Auto closed by'); + $ret .= $this->element('td', $this->listifyTagLookup($def->auto_close)); + $ret .= $this->end('tr'); + } + $ret .= $this->start('tr'); + $ret .= $this->element('th', 'Allowed attributes'); + $ret .= $this->element('td',$this->listifyAttr($def->attr),0,0); + $ret .= $this->end('tr'); + + $ret .= $this->renderChildren($def->child); + } + $ret .= $this->end('table'); + return $ret; + } + + /** + * Renders a row describing the allowed children of an element + * @param $def HTMLPurifier_ChildDef of pertinent element + */ + function renderChildren($def) { + $context = new HTMLPurifier_Context(); + $ret = ''; + $ret .= $this->start('tr'); + $elements = array(); + $attr = array(); + if (isset($def->elements)) { + if ($def->type == 'strictblockquote') { + $def->validateChildren(array(), $this->config, $context); + } + $elements = $def->elements; + } elseif ($def->type == 'chameleon') { + $attr['rowspan'] = 2; + } elseif ($def->type == 'empty') { + $elements = array(); + } elseif ($def->type == 'table') { + $elements = array_flip(array('col', 'caption', 'colgroup', 'thead', + 'tfoot', 'tbody', 'tr')); + } + $ret .= $this->element('th', 'Allowed children', $attr); + + if ($def->type == 'chameleon') { + + $ret .= $this->element('td', + 'Block: ' . + $this->escape($this->listifyTagLookup($def->block->elements)),0,0); + $ret .= $this->end('tr'); + $ret .= $this->start('tr'); + $ret .= $this->element('td', + 'Inline: ' . + $this->escape($this->listifyTagLookup($def->inline->elements)),0,0); + + } else { + $ret .= $this->element('td', + ''.ucfirst($def->type).': ' . + $this->escape($this->listifyTagLookup($elements)),0,0); + } + $ret .= $this->end('tr'); + return $ret; + } + + /** + * Listifies a tag lookup table. + * @param $array Tag lookup array in form of array('tagname' => true) + */ + function listifyTagLookup($array) { + ksort($array); + $list = array(); + foreach ($array as $name => $discard) { + if ($name !== '#PCDATA' && !isset($this->def->info[$name])) continue; + $list[] = $name; + } + return $this->listify($list); + } + + /** + * Listifies a list of objects by retrieving class names and internal state + * @param $array List of objects + * @todo Also add information about internal state + */ + function listifyObjectList($array) { + ksort($array); + $list = array(); + foreach ($array as $discard => $obj) { + $list[] = $this->getClass($obj, 'AttrTransform_'); + } + return $this->listify($list); + } + + /** + * Listifies a hash of attributes to AttrDef classes + * @param $array Array hash in form of array('attrname' => HTMLPurifier_AttrDef) + */ + function listifyAttr($array) { + ksort($array); + $list = array(); + foreach ($array as $name => $obj) { + if ($obj === false) continue; + $list[] = "$name = " . $this->getClass($obj, 'AttrDef_') . ''; + } + return $this->listify($list); + } + +} + +?> \ No newline at end of file diff --git a/lib/htmlpurifier/HTMLPurifier/Strategy.php b/lib/htmlpurifier/HTMLPurifier/Strategy.php new file mode 100644 index 0000000000..746b0a2d6e --- /dev/null +++ b/lib/htmlpurifier/HTMLPurifier/Strategy.php @@ -0,0 +1,33 @@ + \ No newline at end of file diff --git a/lib/htmlpurifier/HTMLPurifier/Strategy/Composite.php b/lib/htmlpurifier/HTMLPurifier/Strategy/Composite.php new file mode 100644 index 0000000000..bd86874798 --- /dev/null +++ b/lib/htmlpurifier/HTMLPurifier/Strategy/Composite.php @@ -0,0 +1,30 @@ +strategies as $strategy) { + $tokens = $strategy->execute($tokens, $config, $context); + } + return $tokens; + } + +} + +?> \ No newline at end of file diff --git a/lib/htmlpurifier/HTMLPurifier/Strategy/Core.php b/lib/htmlpurifier/HTMLPurifier/Strategy/Core.php new file mode 100644 index 0000000000..66e7bb3634 --- /dev/null +++ b/lib/htmlpurifier/HTMLPurifier/Strategy/Core.php @@ -0,0 +1,25 @@ +strategies[] = new HTMLPurifier_Strategy_RemoveForeignElements(); + $this->strategies[] = new HTMLPurifier_Strategy_MakeWellFormed(); + $this->strategies[] = new HTMLPurifier_Strategy_FixNesting(); + $this->strategies[] = new HTMLPurifier_Strategy_ValidateAttributes(); + } + +} + +?> \ No newline at end of file diff --git a/lib/htmlpurifier/HTMLPurifier/Strategy/FixNesting.php b/lib/htmlpurifier/HTMLPurifier/Strategy/FixNesting.php new file mode 100644 index 0000000000..08f907562f --- /dev/null +++ b/lib/htmlpurifier/HTMLPurifier/Strategy/FixNesting.php @@ -0,0 +1,292 @@ +getHTMLDefinition(); + + // insert implicit "parent" node, will be removed at end. + // ! we might want to move this to configuration + // DEFINITION CALL + $parent_name = $definition->info_parent; + array_unshift($tokens, new HTMLPurifier_Token_Start($parent_name)); + $tokens[] = new HTMLPurifier_Token_End($parent_name); + + // setup the context variables + $is_inline = false; // reference var that we alter + $context->register('IsInline', $is_inline); + + //####################################################################// + // Loop initialization + + // stack that contains the indexes of all parents, + // $stack[count($stack)-1] being the current parent + $stack = array(); + + // stack that contains all elements that are excluded + // same structure as $stack, but it is only populated when an element + // with exclusions is processed, i.e. there won't be empty exclusions. + $exclude_stack = array(); + + //####################################################################// + // Loop + + // iterate through all start nodes. Determining the start node + // is complicated so it has been omitted from the loop construct + for ($i = 0, $size = count($tokens) ; $i < $size; ) { + + //################################################################// + // Gather information on children + + // child token accumulator + $child_tokens = array(); + + // scroll to the end of this node, report number, and collect + // all children + for ($j = $i, $depth = 0; ; $j++) { + if ($tokens[$j]->type == 'start') { + $depth++; + // skip token assignment on first iteration, this is the + // token we currently are on + if ($depth == 1) continue; + } elseif ($tokens[$j]->type == 'end') { + $depth--; + // skip token assignment on last iteration, this is the + // end token of the token we're currently on + if ($depth == 0) break; + } + $child_tokens[] = $tokens[$j]; + } + + // $i is index of start token + // $j is index of end token + + //################################################################// + // Gather information on parent + + // calculate parent information + if ($count = count($stack)) { + $parent_index = $stack[$count-1]; + $parent_name = $tokens[$parent_index]->name; + if ($parent_index == 0) { + $parent_def = $definition->info_parent_def; + } else { + $parent_def = $definition->info[$parent_name]; + } + } else { + // unknown info, it won't be used anyway + $parent_index = $parent_name = $parent_def = null; + } + + // calculate context + if ($is_inline === false) { + // check if conditions make it inline + if (!empty($parent_def) && $parent_def->descendants_are_inline) { + $is_inline = $count - 1; + } + } else { + // check if we're out of inline + if ($count === $is_inline) { + $is_inline = false; + } + } + + //################################################################// + // Determine whether element is explicitly excluded SGML-style + + // determine whether or not element is excluded by checking all + // parent exclusions. The array should not be very large, two + // elements at most. + $excluded = false; + if (!empty($exclude_stack)) { + foreach ($exclude_stack as $lookup) { + if (isset($lookup[$tokens[$i]->name])) { + $excluded = true; + // no need to continue processing + break; + } + } + } + + //################################################################// + // Perform child validation + + if ($excluded) { + // there is an exclusion, remove the entire node + $result = false; + $excludes = array(); // not used, but good to initialize anyway + } else { + // DEFINITION CALL + if ($i === 0) { + // special processing for the first node + $def = $definition->info_parent_def; + } else { + $def = $definition->info[$tokens[$i]->name]; + + } + + if (!empty($def->child)) { + // have DTD child def validate children + $result = $def->child->validateChildren( + $child_tokens, $config, $context); + } else { + // weird, no child definition, get rid of everything + $result = false; + } + + // determine whether or not this element has any exclusions + $excludes = $def->excludes; + } + + // $result is now a bool or array + + //################################################################// + // Process result by interpreting $result + + if ($result === true) { + // leave the node as is + + // register start token as a parental node start + $stack[] = $i; + + // register exclusions if there are any + if (!empty($excludes)) $exclude_stack[] = $excludes; + + // move cursor to next possible start node + $i++; + + } elseif($result === false) { + // remove entire node + + // calculate length of inner tokens and current tokens + $length = $j - $i + 1; + + // perform removal + array_splice($tokens, $i, $length); + + // update size + $size -= $length; + + // there is no start token to register, + // current node is now the next possible start node + // unless it turns out that we need to do a double-check + + if (!$parent_def->child->allow_empty) { + // we need to do a double-check + $i = $parent_index; + array_pop($stack); + } + + // PROJECTED OPTIMIZATION: Process all children elements before + // reprocessing parent node. + + } else { + // replace node with $result + + // calculate length of inner tokens + $length = $j - $i - 1; + + // perform replacement + array_splice($tokens, $i + 1, $length, $result); + + // update size + $size -= $length; + $size += count($result); + + // register start token as a parental node start + $stack[] = $i; + + // register exclusions if there are any + if (!empty($excludes)) $exclude_stack[] = $excludes; + + // move cursor to next possible start node + $i++; + + } + + //################################################################// + // Scroll to next start node + + // We assume, at this point, that $i is the index of the token + // that is the first possible new start point for a node. + + // Test if the token indeed is a start tag, if not, move forward + // and test again. + $size = count($tokens); + while ($i < $size and $tokens[$i]->type != 'start') { + if ($tokens[$i]->type == 'end') { + // pop a token index off the stack if we ended a node + array_pop($stack); + // pop an exclusion lookup off exclusion stack if + // we ended node and that node had exclusions + if ($i == 0 || $i == $size - 1) { + // use specialized var if it's the super-parent + $s_excludes = $definition->info_parent_def->excludes; + } else { + $s_excludes = $definition->info[$tokens[$i]->name]->excludes; + } + if ($s_excludes) { + array_pop($exclude_stack); + } + } + $i++; + } + + } + + //####################################################################// + // Post-processing + + // remove implicit parent tokens at the beginning and end + array_shift($tokens); + array_pop($tokens); + + // remove context variables + $context->destroy('IsInline'); + + //####################################################################// + // Return + + return $tokens; + + } + +} + +?> diff --git a/lib/htmlpurifier/HTMLPurifier/Strategy/MakeWellFormed.php b/lib/htmlpurifier/HTMLPurifier/Strategy/MakeWellFormed.php new file mode 100644 index 0000000000..84580d3d34 --- /dev/null +++ b/lib/htmlpurifier/HTMLPurifier/Strategy/MakeWellFormed.php @@ -0,0 +1,158 @@ +getHTMLDefinition(); + $generator = new HTMLPurifier_Generator(); + $result = array(); + $current_nesting = array(); + $escape_invalid_tags = $config->get('Core', 'EscapeInvalidTags'); + foreach ($tokens as $token) { + if (empty( $token->is_tag )) { + $result[] = $token; + continue; + } + + // DEFINITION CALL + $info = $definition->info[$token->name]->child; + + // test if it claims to be a start tag but is empty + if ($info->type == 'empty' && + $token->type == 'start' ) { + + $result[] = new HTMLPurifier_Token_Empty($token->name, + $token->attr); + continue; + } + + // test if it claims to be empty but really is a start tag + if ($info->type != 'empty' && + $token->type == 'empty' ) { + + $result[] = new HTMLPurifier_Token_Start($token->name, + $token->attr); + $result[] = new HTMLPurifier_Token_End($token->name); + + continue; + } + + // automatically insert empty tags + if ($token->type == 'empty') { + $result[] = $token; + continue; + } + + // we give start tags precedence, so automatically accept unless... + // it's one of those special cases + if ($token->type == 'start') { + + // if there's a parent, check for special case + if (!empty($current_nesting)) { + + $parent = array_pop($current_nesting); + $parent_name = $parent->name; + $parent_info = $definition->info[$parent_name]; + + if (isset($parent_info->auto_close[$token->name])) { + $result[] = new HTMLPurifier_Token_End($parent_name); + $result[] = $token; + $current_nesting[] = $token; + continue; + } + + $current_nesting[] = $parent; // undo the pop + } + + $result[] = $token; + $current_nesting[] = $token; + continue; + } + + // sanity check + if ($token->type != 'end') continue; + + // okay, we're dealing with a closing tag + + // make sure that we have something open + if (empty($current_nesting)) { + if ($escape_invalid_tags) { + $result[] = new HTMLPurifier_Token_Text( + $generator->generateFromToken($token, $config, $context) + ); + } + continue; + } + + // first, check for the simplest case: everything closes neatly + + // current_nesting is modified + $current_parent = array_pop($current_nesting); + if ($current_parent->name == $token->name) { + $result[] = $token; + continue; + } + + // undo the array_pop + $current_nesting[] = $current_parent; + + // okay, so we're trying to close the wrong tag + + // scroll back the entire nest, trying to find our tag + // feature could be to specify how far you'd like to go + $size = count($current_nesting); + // -2 because -1 is the last element, but we already checked that + $skipped_tags = false; + for ($i = $size - 2; $i >= 0; $i--) { + if ($current_nesting[$i]->name == $token->name) { + // current nesting is modified + $skipped_tags = array_splice($current_nesting, $i); + break; + } + } + + // we still didn't find the tag, so translate to text + if ($skipped_tags === false) { + if ($escape_invalid_tags) { + $result[] = new HTMLPurifier_Token_Text( + $generator->generateFromToken($token, $config, $context) + ); + } + continue; + } + + // okay, we found it, close all the skipped tags + // note that skipped tags contains the element we need closed + $size = count($skipped_tags); + for ($i = $size - 1; $i >= 0; $i--) { + $result[] = new HTMLPurifier_Token_End($skipped_tags[$i]->name); + } + + // done! + + } + + // we're at the end now, fix all still unclosed tags + + if (!empty($current_nesting)) { + $size = count($current_nesting); + for ($i = $size - 1; $i >= 0; $i--) { + $result[] = + new HTMLPurifier_Token_End($current_nesting[$i]->name); + } + } + + return $result; + } + +} + +?> \ No newline at end of file diff --git a/lib/htmlpurifier/HTMLPurifier/Strategy/RemoveForeignElements.php b/lib/htmlpurifier/HTMLPurifier/Strategy/RemoveForeignElements.php new file mode 100644 index 0000000000..27caf3645f --- /dev/null +++ b/lib/htmlpurifier/HTMLPurifier/Strategy/RemoveForeignElements.php @@ -0,0 +1,86 @@ +img '. + 'tags, as the attribute validation strategy is not authorized to '. + 'remove elements from the document. This directive has been available '. + 'since 1.3.0, revert to pre-1.3.0 behavior by setting to false.' +); + +/** + * Removes all unrecognized tags from the list of tokens. + * + * This strategy iterates through all the tokens and removes unrecognized + * tokens. If a token is not recognized but a TagTransform is defined for + * that element, the element will be transformed accordingly. + */ + +class HTMLPurifier_Strategy_RemoveForeignElements extends HTMLPurifier_Strategy +{ + + function execute($tokens, $config, &$context) { + $definition = $config->getHTMLDefinition(); + $generator = new HTMLPurifier_Generator(); + $result = array(); + $escape_invalid_tags = $config->get('Core', 'EscapeInvalidTags'); + foreach($tokens as $token) { + if (!empty( $token->is_tag )) { + // DEFINITION CALL + if (isset($definition->info[$token->name])) { + // leave untouched, except for a few special cases: + + // hard-coded image special case, pre-emptively drop + // if not available. Probably not abstract-able + if ( $token->name == 'img' ) { + if (!isset($token->attr['src'])) { + continue; + } + if (!isset($definition->info['img']->attr['src'])) { + continue; + } + $token->attr['src'] = + $definition-> + info['img']-> + attr['src']-> + validate($token->attr['src'], + $config, $context); + if ($token->attr['src'] === false) continue; + } + + } elseif ( + isset($definition->info_tag_transform[$token->name]) + ) { + // there is a transformation for this tag + // DEFINITION CALL + $token = $definition-> + info_tag_transform[$token->name]-> + transform($token, $config, $context); + } elseif ($escape_invalid_tags) { + // invalid tag, generate HTML and insert in + $token = new HTMLPurifier_Token_Text( + $generator->generateFromToken($token, $config, $context) + ); + } else { + continue; + } + } elseif ($token->type == 'comment') { + // strip comments + continue; + } elseif ($token->type == 'text') { + } else { + continue; + } + $result[] = $token; + } + return $result; + } + +} + +?> \ No newline at end of file diff --git a/lib/htmlpurifier/HTMLPurifier/Strategy/ValidateAttributes.php b/lib/htmlpurifier/HTMLPurifier/Strategy/ValidateAttributes.php new file mode 100644 index 0000000000..07744f803d --- /dev/null +++ b/lib/htmlpurifier/HTMLPurifier/Strategy/ValidateAttributes.php @@ -0,0 +1,129 @@ +getHTMLDefinition(); + + // setup id_accumulator context + $id_accumulator = new HTMLPurifier_IDAccumulator(); + $id_accumulator->load($config->get('Attr', 'IDBlacklist')); + $context->register('IDAccumulator', $id_accumulator); + + // create alias to global definition array, see also $defs + // DEFINITION CALL + $d_defs = $definition->info_global_attr; + + foreach ($tokens as $key => $token) { + + // only process tokens that have attributes, + // namely start and empty tags + if ($token->type !== 'start' && $token->type !== 'empty') continue; + + // copy out attributes for easy manipulation + $attr = $token->attr; + + // do global transformations (pre) + // nothing currently utilizes this + foreach ($definition->info_attr_transform_pre as $transform) { + $attr = $transform->transform($attr, $config, $context); + } + + // do local transformations only applicable to this element (pre) + // ex.

+ foreach ($definition->info[$token->name]->attr_transform_pre + as $transform + ) { + $attr = $transform->transform($attr, $config, $context); + } + + // create alias to this element's attribute definition array, see + // also $d_defs (global attribute definition array) + // DEFINITION CALL + $defs = $definition->info[$token->name]->attr; + + // iterate through all the attribute keypairs + // Watch out for name collisions: $key has previously been used + foreach ($attr as $attr_key => $value) { + + // call the definition + if ( isset($defs[$attr_key]) ) { + // there is a local definition defined + if ($defs[$attr_key] === false) { + // We've explicitly been told not to allow this element. + // This is usually when there's a global definition + // that must be overridden. + // Theoretically speaking, we could have a + // AttrDef_DenyAll, but this is faster! + $result = false; + } else { + // validate according to the element's definition + $result = $defs[$attr_key]->validate( + $value, $config, $context + ); + } + } elseif ( isset($d_defs[$attr_key]) ) { + // there is a global definition defined, validate according + // to the global definition + $result = $d_defs[$attr_key]->validate( + $value, $config, $context + ); + } else { + // system never heard of the attribute? DELETE! + $result = false; + } + + // put the results into effect + if ($result === false || $result === null) { + // remove the attribute + unset($attr[$attr_key]); + } elseif (is_string($result)) { + // simple substitution + $attr[$attr_key] = $result; + } + + // we'd also want slightly more complicated substitution + // involving an array as the return value, + // although we're not sure how colliding attributes would + // resolve (certain ones would be completely overriden, + // others would prepend themselves). + } + + // post transforms + + // ex. to + foreach ($definition->info_attr_transform_post as $transform) { + $attr = $transform->transform($attr, $config, $context); + } + + // ex. to + foreach ($definition->info[$token->name]->attr_transform_post as $transform) { + $attr = $transform->transform($attr, $config, $context); + } + + // commit changes + // could interfere with flyweight implementation + $tokens[$key]->attr = $attr; + } + $context->destroy('IDAccumulator'); + + return $tokens; + } + +} + +?> \ No newline at end of file diff --git a/lib/htmlpurifier/HTMLPurifier/TagTransform.php b/lib/htmlpurifier/HTMLPurifier/TagTransform.php new file mode 100644 index 0000000000..f5dc5c97b6 --- /dev/null +++ b/lib/htmlpurifier/HTMLPurifier/TagTransform.php @@ -0,0 +1,29 @@ + \ No newline at end of file diff --git a/lib/htmlpurifier/HTMLPurifier/TagTransform/Center.php b/lib/htmlpurifier/HTMLPurifier/TagTransform/Center.php new file mode 100644 index 0000000000..571bb9df4d --- /dev/null +++ b/lib/htmlpurifier/HTMLPurifier/TagTransform/Center.php @@ -0,0 +1,34 @@ +type == 'end') { + $new_tag = new HTMLPurifier_Token_End($this->transform_to); + return $new_tag; + } + $attr = $tag->attr; + $prepend_css = 'text-align:center;'; + if (isset($attr['style'])) { + $attr['style'] = $prepend_css . $attr['style']; + } else { + $attr['style'] = $prepend_css; + } + $new_tag = $tag->copy(); + $new_tag->name = $this->transform_to; + $new_tag->attr = $attr; + return $new_tag; + } +} + +?> \ No newline at end of file diff --git a/lib/htmlpurifier/HTMLPurifier/TagTransform/Font.php b/lib/htmlpurifier/HTMLPurifier/TagTransform/Font.php new file mode 100644 index 0000000000..ae6d783809 --- /dev/null +++ b/lib/htmlpurifier/HTMLPurifier/TagTransform/Font.php @@ -0,0 +1,83 @@ + 'xx-small', + '2' => 'small', + '3' => 'medium', + '4' => 'large', + '5' => 'x-large', + '6' => 'xx-large', + '7' => '300%', + '-1' => 'smaller', + '+1' => 'larger', + '-2' => '60%', + '+2' => '150%', + '+4' => '300%' + ); + + function transform($tag, $config, &$context) { + + if ($tag->type == 'end') { + $new_tag = new HTMLPurifier_Token_End($this->transform_to); + return $new_tag; + } + + $attr = $tag->attr; + $prepend_style = ''; + + // handle color transform + if (isset($attr['color'])) { + $prepend_style .= 'color:' . $attr['color'] . ';'; + unset($attr['color']); + } + + // handle face transform + if (isset($attr['face'])) { + $prepend_style .= 'font-family:' . $attr['face'] . ';'; + unset($attr['face']); + } + + // handle size transform + if (isset($attr['size'])) { + if (isset($this->_size_lookup[$attr['size']])) { + $prepend_style .= 'font-size:' . + $this->_size_lookup[$attr['size']] . ';'; + } + unset($attr['size']); + } + + if ($prepend_style) { + $attr['style'] = isset($attr['style']) ? + $prepend_style . $attr['style'] : + $prepend_style; + } + + $new_tag = $tag->copy(); + $new_tag->name = $this->transform_to; + $new_tag->attr = $attr; + + return $new_tag; + + } +} + +?> \ No newline at end of file diff --git a/lib/htmlpurifier/HTMLPurifier/TagTransform/Simple.php b/lib/htmlpurifier/HTMLPurifier/TagTransform/Simple.php new file mode 100644 index 0000000000..6ffd0eabbb --- /dev/null +++ b/lib/htmlpurifier/HTMLPurifier/TagTransform/Simple.php @@ -0,0 +1,26 @@ +transform_to = $transform_to; + } + + function transform($tag, $config, &$context) { + $new_tag = $tag->copy(); + $new_tag->name = $this->transform_to; + return $new_tag; + } + +} + +?> \ No newline at end of file diff --git a/lib/htmlpurifier/HTMLPurifier/Token.php b/lib/htmlpurifier/HTMLPurifier/Token.php new file mode 100644 index 0000000000..555e76f1b2 --- /dev/null +++ b/lib/htmlpurifier/HTMLPurifier/Token.php @@ -0,0 +1,168 @@ +is_a(). @public */ + + /** + * Copies the tag into a new one (clone substitute). + * @return Copied token + */ + function copy() { + trigger_error('Cannot copy abstract class', E_USER_ERROR); + } +} + +/** + * Abstract class of a tag token (start, end or empty), and its behavior. + */ +class HTMLPurifier_Token_Tag extends HTMLPurifier_Token // abstract +{ + /** + * Static bool marker that indicates the class is a tag. + * + * This allows us to check objects with !empty($obj->is_tag) + * without having to use a function call is_a(). + * + * @public + */ + var $is_tag = true; + + /** + * The lower-case name of the tag, like 'a', 'b' or 'blockquote'. + * + * @note Strictly speaking, XML tags are case sensitive, so we shouldn't + * be lower-casing them, but these tokens cater to HTML tags, which are + * insensitive. + * + * @public + */ + var $name; + + /** + * Associative array of the tag's attributes. + */ + var $attr = array(); + + /** + * Non-overloaded constructor, which lower-cases passed tag name. + * + * @param $name String name. + * @param $attr Associative array of attributes. + */ + function HTMLPurifier_Token_Tag($name, $attr = array()) { + $this->name = ctype_lower($name) ? $name : strtolower($name); + foreach ($attr as $key => $value) { + // normalization only necessary when key is not lowercase + if (!ctype_lower($key)) { + $new_key = strtolower($key); + if (!isset($attr[$new_key])) { + $attr[$new_key] = $attr[$key]; + } + if ($new_key !== $key) { + unset($attr[$key]); + } + } + } + $this->attr = $attr; + } +} + +/** + * Concrete start token class. + */ +class HTMLPurifier_Token_Start extends HTMLPurifier_Token_Tag +{ + var $type = 'start'; + function copy() { + return new HTMLPurifier_Token_Start($this->name, $this->attr); + } +} + +/** + * Concrete empty token class. + */ +class HTMLPurifier_Token_Empty extends HTMLPurifier_Token_Tag +{ + var $type = 'empty'; + function copy() { + return new HTMLPurifier_Token_Empty($this->name, $this->attr); + } +} + +/** + * Concrete end token class. + * + * @warning This class accepts attributes even though end tags cannot. This + * is for optimization reasons, as under normal circumstances, the Lexers + * do not pass attributes. + */ +class HTMLPurifier_Token_End extends HTMLPurifier_Token_Tag +{ + var $type = 'end'; + function copy() { + return new HTMLPurifier_Token_End($this->name); + } +} + +/** + * Concrete text token class. + * + * Text tokens comprise of regular parsed character data (PCDATA) and raw + * character data (from the CDATA sections). Internally, their + * data is parsed with all entities expanded. Surprisingly, the text token + * does have a "tag name" called #PCDATA, which is how the DTD represents it + * in permissible child nodes. + */ +class HTMLPurifier_Token_Text extends HTMLPurifier_Token +{ + + var $name = '#PCDATA'; /**< PCDATA tag name compatible with DTD. @public */ + var $type = 'text'; + var $data; /**< Parsed character data of text. @public */ + var $is_whitespace; /**< Bool indicating if node is whitespace. @public */ + + /** + * Constructor, accepts data and determines if it is whitespace. + * + * @param $data String parsed character data. + */ + function HTMLPurifier_Token_Text($data) { + $this->data = $data; + $this->is_whitespace = ctype_space($data); + } + function copy() { + return new HTMLPurifier_Token_Text($this->data); + } + +} + +/** + * Concrete comment token class. Generally will be ignored. + */ +class HTMLPurifier_Token_Comment extends HTMLPurifier_Token +{ + var $data; /**< Character data within comment. @public */ + var $type = 'comment'; + /** + * Transparent constructor. + * + * @param $data String comment data. + */ + function HTMLPurifier_Token_Comment($data) { + $this->data = $data; + } + function copy() { + return new HTMLPurifier_Token_Comment($this->data); + } +} + +?> \ No newline at end of file diff --git a/lib/htmlpurifier/HTMLPurifier/TokenFactory.php b/lib/htmlpurifier/HTMLPurifier/TokenFactory.php new file mode 100644 index 0000000000..25cc4122a2 --- /dev/null +++ b/lib/htmlpurifier/HTMLPurifier/TokenFactory.php @@ -0,0 +1,96 @@ +p_start = new HTMLPurifier_Token_Start('', array()); + $this->p_end = new HTMLPurifier_Token_End(''); + $this->p_empty = new HTMLPurifier_Token_Empty('', array()); + $this->p_text = new HTMLPurifier_Token_Text(''); + $this->p_comment= new HTMLPurifier_Token_Comment(''); + } + + /** + * Creates a HTMLPurifier_Token_Start. + * @param $name Tag name + * @param $attr Associative array of attributes + * @return Generated HTMLPurifier_Token_Start + */ + public function createStart($name, $attr = array()) { + $p = clone $this->p_start; + $p->HTMLPurifier_Token_Tag($name, $attr); + return $p; + } + + /** + * Creates a HTMLPurifier_Token_End. + * @param $name Tag name + * @return Generated HTMLPurifier_Token_End + */ + public function createEnd($name) { + $p = clone $this->p_end; + $p->HTMLPurifier_Token_Tag($name); + return $p; + } + + /** + * Creates a HTMLPurifier_Token_Empty. + * @param $name Tag name + * @param $attr Associative array of attributes + * @return Generated HTMLPurifier_Token_Empty + */ + public function createEmpty($name, $attr = array()) { + $p = clone $this->p_empty; + $p->HTMLPurifier_Token_Tag($name, $attr); + return $p; + } + + /** + * Creates a HTMLPurifier_Token_Text. + * @param $data Data of text token + * @return Generated HTMLPurifier_Token_Text + */ + public function createText($data) { + $p = clone $this->p_text; + $p->HTMLPurifier_Token_Text($data); + return $p; + } + + /** + * Creates a HTMLPurifier_Token_Comment. + * @param $data Data of comment token + * @return Generated HTMLPurifier_Token_Comment + */ + public function createComment($data) { + $p = clone $this->p_comment; + $p->HTMLPurifier_Token_Comment($data); + return $p; + } + +} + +?> \ No newline at end of file diff --git a/lib/htmlpurifier/HTMLPurifier/URIScheme.php b/lib/htmlpurifier/HTMLPurifier/URIScheme.php new file mode 100644 index 0000000000..20a9781b48 --- /dev/null +++ b/lib/htmlpurifier/HTMLPurifier/URIScheme.php @@ -0,0 +1,44 @@ +default_port == $port) $port = null; + return array($userinfo, $host, $port, $path, $query); + } + +} + +?> \ No newline at end of file diff --git a/lib/htmlpurifier/HTMLPurifier/URIScheme/ftp.php b/lib/htmlpurifier/HTMLPurifier/URIScheme/ftp.php new file mode 100644 index 0000000000..dab9c981c4 --- /dev/null +++ b/lib/htmlpurifier/HTMLPurifier/URIScheme/ftp.php @@ -0,0 +1,45 @@ + \ No newline at end of file diff --git a/lib/htmlpurifier/HTMLPurifier/URIScheme/http.php b/lib/htmlpurifier/HTMLPurifier/URIScheme/http.php new file mode 100644 index 0000000000..54b250da52 --- /dev/null +++ b/lib/htmlpurifier/HTMLPurifier/URIScheme/http.php @@ -0,0 +1,24 @@ + \ No newline at end of file diff --git a/lib/htmlpurifier/HTMLPurifier/URIScheme/https.php b/lib/htmlpurifier/HTMLPurifier/URIScheme/https.php new file mode 100644 index 0000000000..7f896592de --- /dev/null +++ b/lib/htmlpurifier/HTMLPurifier/URIScheme/https.php @@ -0,0 +1,14 @@ + \ No newline at end of file diff --git a/lib/htmlpurifier/HTMLPurifier/URIScheme/mailto.php b/lib/htmlpurifier/HTMLPurifier/URIScheme/mailto.php new file mode 100644 index 0000000000..2292072eea --- /dev/null +++ b/lib/htmlpurifier/HTMLPurifier/URIScheme/mailto.php @@ -0,0 +1,30 @@ + \ No newline at end of file diff --git a/lib/htmlpurifier/HTMLPurifier/URIScheme/news.php b/lib/htmlpurifier/HTMLPurifier/URIScheme/news.php new file mode 100644 index 0000000000..c9d1c2b0c7 --- /dev/null +++ b/lib/htmlpurifier/HTMLPurifier/URIScheme/news.php @@ -0,0 +1,24 @@ + \ No newline at end of file diff --git a/lib/htmlpurifier/HTMLPurifier/URIScheme/nntp.php b/lib/htmlpurifier/HTMLPurifier/URIScheme/nntp.php new file mode 100644 index 0000000000..49fca4c3bb --- /dev/null +++ b/lib/htmlpurifier/HTMLPurifier/URIScheme/nntp.php @@ -0,0 +1,24 @@ + \ No newline at end of file diff --git a/lib/htmlpurifier/HTMLPurifier/URISchemeRegistry.php b/lib/htmlpurifier/HTMLPurifier/URISchemeRegistry.php new file mode 100644 index 0000000000..d840068a3f --- /dev/null +++ b/lib/htmlpurifier/HTMLPurifier/URISchemeRegistry.php @@ -0,0 +1,104 @@ + true, // "Hypertext Transfer Protocol", nuf' said + 'https' => true, // HTTP over SSL (Secure Socket Layer) + // quite useful, but not necessary + 'mailto' => true,// Email + 'ftp' => true, // "File Transfer Protocol" + 'irc' => true, // "Internet Relay Chat", usually needs another app + // for Usenet, these two are similar, but distinct + 'nntp' => true, // individual Netnews articles + 'news' => true // newsgroup or individual Netnews articles + ), 'lookup', + 'Whitelist that defines the schemes that a URI is allowed to have. This '. + 'prevents XSS attacks from using pseudo-schemes like javascript or mocha.' +); + +HTMLPurifier_ConfigSchema::define( + 'URI', 'OverrideAllowedSchemes', true, 'bool', + 'If this is set to true (which it is by default), you can override '. + '%URI.AllowedSchemes by simply registering a HTMLPurifier_URIScheme '. + 'to the registry. If false, you will also have to update that directive '. + 'in order to add more schemes.' +); + +/** + * Registry for retrieving specific URI scheme validator objects. + */ +class HTMLPurifier_URISchemeRegistry +{ + + /** + * Retrieve sole instance of the registry. + * @static + * @param $prototype Optional prototype to overload sole instance with, + * or bool true to reset to default registry. + * @note Pass a registry object $prototype with a compatible interface and + * the function will copy it and return it all further times. + */ + function &instance($prototype = null) { + static $instance = null; + if ($prototype !== null) { + $instance = $prototype; + } elseif ($instance === null || $prototype == true) { + $instance = new HTMLPurifier_URISchemeRegistry(); + } + return $instance; + } + + /** + * Cache of retrieved schemes. + * @protected + */ + var $schemes = array(); + + /** + * Directory where scheme objects can be found + * @private + */ + var $_scheme_dir = null; + + /** + * Retrieves a scheme validator object + * @param $scheme String scheme name like http or mailto + * @param $config HTMLPurifier_Config object + * @param $config HTMLPurifier_Context object + */ + function &getScheme($scheme, $config, &$context) { + if (!$config) $config = HTMLPurifier_Config::createDefault(); + $null = null; // for the sake of passing by reference + + // important, otherwise attacker could include arbitrary file + $allowed_schemes = $config->get('URI', 'AllowedSchemes'); + if (!$config->get('URI', 'OverrideAllowedSchemes') && + !isset($allowed_schemes[$scheme]) + ) { + return $null; + } + + if (isset($this->schemes[$scheme])) return $this->schemes[$scheme]; + if (empty($this->_dir)) $this->_dir = dirname(__FILE__) . '/URIScheme/'; + + if (!isset($allowed_schemes[$scheme])) return $null; + + @include_once $this->_dir . $scheme . '.php'; + $class = 'HTMLPurifier_URIScheme_' . $scheme; + if (!class_exists($class)) return $null; + $this->schemes[$scheme] = new $class(); + return $this->schemes[$scheme]; + } + + /** + * Registers a custom scheme to the cache. + * @param $scheme Scheme name + * @param $scheme_obj HTMLPurifier_URIScheme object + */ + function register($scheme, &$scheme_obj) { + $this->schemes[$scheme] =& $scheme_obj; + } + +} + +?> diff --git a/lib/htmlpurifier/readme_moodle.txt b/lib/htmlpurifier/readme_moodle.txt new file mode 100644 index 0000000000..53cf1070b1 --- /dev/null +++ b/lib/htmlpurifier/readme_moodle.txt @@ -0,0 +1,8 @@ +Description of HTML Purifier v1.6.0 library import into Moodle + +Changes: + * Text.php - added nolink, tex and algebra tags + +skodak + +$Id$ diff --git a/lib/weblib.php b/lib/weblib.php index 986b01c41c..5f34a0805f 100644 --- a/lib/weblib.php +++ b/lib/weblib.php @@ -1658,7 +1658,9 @@ function trusttext_prepare_edit(&$text, &$format, $usehtmleditor, $context) { */ function clean_text($text, $format=FORMAT_MOODLE) { - global $ALLOWED_TAGS; + if (empty($text) or is_numeric($text)) { + return (string)$text; + } switch ($format) { case FORMAT_PLAIN: @@ -1667,17 +1669,21 @@ function clean_text($text, $format=FORMAT_MOODLE) { default: - /// Fix non standard entity notations - $text = preg_replace('/(&#[0-9]+)(;?)/', "\\1;", $text); - $text = preg_replace('/(&#x[0-9a-fA-F]+)(;?)/', "\\1;", $text); - - /// Remove tags that are not allowed - $text = strip_tags($text, $ALLOWED_TAGS); - - /// Clean up embedded scripts and , using kses - $text = cleanAttributes($text); + if (!empty($CFG->enablehtmlpurifier)) { + $text = purify_html($text); + } else { + /// Fix non standard entity notations + $text = preg_replace('/(&#[0-9]+)(;?)/', "\\1;", $text); + $text = preg_replace('/(&#x[0-9a-fA-F]+)(;?)/', "\\1;", $text); + + /// Remove tags that are not allowed + $text = strip_tags($text, $ALLOWED_TAGS); + + /// Clean up embedded scripts and , using kses + $text = cleanAttributes($text); + } - /// Remove script events + /// Remove potential script events - some extra protection for undiscovered bugs in our code $text = eregi_replace("([^a-z])language([[:space:]]*)=", "\\1Xlanguage=", $text); $text = eregi_replace("([^a-z])on([a-z]+)([[:space:]]*)=", "\\1Xon\\2=", $text); @@ -1685,6 +1691,24 @@ function clean_text($text, $format=FORMAT_MOODLE) { } } +/** + * KSES replacement cleaning function - uses HTML Purifier. + */ +function purify_html($text) { + global $CFG; + + static $purifier = false; + if (!$purifier) { + require_once $CFG->libdir.'/htmlpurifier/HTMLPurifier.auto.php'; + $config = HTMLPurifier_Config::createDefault(); + $config->set('Core', 'AcceptFullDocuments', false); + //$config->set('HTML', 'Strict', true); + $config->set('URI', 'AllowedSchemes', array('http'=>1, 'https'=>1, 'ftp'=>1, 'irc'=>1, 'nntp'=>1, 'news'=>1, 'rtsp'=>1, 'teamspeak'=>1, 'gopher'=>1, 'mms'=>1)); + $purifier = new HTMLPurifier($config); + } + return $purifier->purify($text); +} + /** * This function takes a string and examines it for HTML tags. * If tags are detected it passes the string to a helper function {@link cleanAttributes2()} @@ -5032,13 +5056,9 @@ function redirect($url, $message='', $delay=-1, $adminroot = '') { $message = clean_text($message); - $url = html_entity_decode($url); - $url = str_replace(array("\n", "\r"), '', $url); // some more cleaning - $encodedurl = htmlentities($url); - $tmpstr = clean_text(''); //clean encoded URL - $encodedurl = substr($tmpstr, 9, strlen($tmpstr)-13); - $url = html_entity_decode($encodedurl); - $surl = addslashes($url); + $encodedurl = preg_replace("/\&(?![a-zA-Z0-9#]{1,8};)/", "&", $url); + $encodedurl = preg_replace('/^.*href="([^"]*)".*$/', "\\1", clean_text('')); + $url = str_replace('&', '&', $encodedurl); /// At developer debug level. Don't redirect if errors have been printed on screen. /// Currenly only works in PHP 5.2+ @@ -5081,7 +5101,7 @@ function redirect($url, $message='', $delay=-1, $adminroot = '') { @header('Location: '.$url); //another way for older browsers and already sent headers (eg trailing whitespace in config.php) echo ''; - echo ''; // To cope with Mozilla bug + echo ''; // To cope with Mozilla bug die; } @@ -5104,7 +5124,7 @@ function redirect($url, $message='', $delay=-1, $adminroot = '') { //'); + document.location.replace(''); } setTimeout("redirect()", ); //]]> -- 2.39.5