+++ /dev/null
-<?php
-
-/**
- * This is a stub include that automatically configures the include path.
- */
-
-set_include_path(dirname(__FILE__) . PATH_SEPARATOR . get_include_path() );
-require_once 'HTMLPurifier.php';
-
+++ /dev/null
-<?php
-
-/**
- * Function wrapper for HTML Purifier for quick use.
- * @note This function only includes the library when it is called. While
- * this is efficient for instances when you only use HTML Purifier
- * on a few of your pages, it murders bytecode caching. You still
- * need to add HTML Purifier to your path.
- * @note ''HTMLPurifier()'' is NOT the same as ''new HTMLPurifier()''
- */
-
-function HTMLPurifier($html, $config = null) {
- static $purifier = false;
- if (!$purifier) {
- require_once 'HTMLPurifier.php';
- $purifier = new HTMLPurifier();
- }
- return $purifier->purify($html, $config);
-}
-
<?php
-/*!
- * @mainpage
+/*! @mainpage
*
* HTML Purifier is an HTML filter that will take an arbitrary snippet of
* HTML and rigorously test, validate and filter it into a version that
* -# Generating HTML from the purified tokens.
*
* However, most users will only need to interface with the HTMLPurifier
- * class, so this massive amount of infrastructure is usually concealed.
- * If you plan on working with the internals, be sure to include
- * HTMLPurifier_ConfigSchema and HTMLPurifier_Config.
+ * and HTMLPurifier_Config.
*/
/*
- HTML Purifier 2.1.4 - Standards Compliant HTML Filtering
- Copyright (C) 2006-2007 Edward Z. Yang
+ HTML Purifier 3.1.0 - Standards Compliant HTML Filtering
+ Copyright (C) 2006-2008 Edward Z. Yang
This library is free software; you can redistribute it and/or
modify it under the terms of the GNU Lesser General Public
Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
*/
-// constants are slow, but we'll make one exception
-define('HTMLPURIFIER_PREFIX', dirname(__FILE__));
-
-// every class has an undocumented dependency to these, must be included!
-require_once 'HTMLPurifier/ConfigSchema.php'; // fatal errors if not included
-require_once 'HTMLPurifier/Config.php';
-require_once 'HTMLPurifier/Context.php';
-
-require_once 'HTMLPurifier/Lexer.php';
-require_once 'HTMLPurifier/Generator.php';
-require_once 'HTMLPurifier/Strategy/Core.php';
-require_once 'HTMLPurifier/Encoder.php';
-
-require_once 'HTMLPurifier/ErrorCollector.php';
-require_once 'HTMLPurifier/LanguageFactory.php';
-
-HTMLPurifier_ConfigSchema::define(
- 'Core', 'CollectErrors', false, 'bool', '
-Whether or not to collect errors found while filtering the document. This
-is a useful way to give feedback to your users. <strong>Warning:</strong>
-Currently this feature is very patchy and experimental, with lots of
-possible error messages not yet implemented. It will not cause any problems,
-but it may not help your users either. This directive has been available
-since 2.0.0.
-');
-
/**
* Facade that coordinates HTML Purifier's subsystems in order to purify HTML.
*
* -# Instance: new HTMLPurifier($config)
* -# Invocation: purify($html, $config)
* These configurations are entirely independent of each other and
- * are *not* merged.
+ * are *not* merged (this behavior may change in the future).
*
- * @todo We need an easier way to inject strategies, it'll probably end
- * up getting done through config though.
+ * @todo We need an easier way to inject strategies using the configuration
+ * object.
*/
class HTMLPurifier
{
- var $version = '2.1.4';
+ /** Version of HTML Purifier */
+ public $version = '3.1.0';
+
+ /** Constant with version of HTML Purifier */
+ const VERSION = '3.1.0';
+
+ /** Global configuration object */
+ public $config;
- var $config;
- var $filters = array();
+ /** Array of extra HTMLPurifier_Filter objects to run on HTML, for backwards compatibility */
+ private $filters = array();
- var $strategy, $generator;
+ /** Single instance of HTML Purifier */
+ private static $instance;
+
+ protected $strategy, $generator;
/**
* Resultant HTMLPurifier_Context of last run purification. Is an array
* of contexts if the last called method was purifyArray().
- * @public
*/
- var $context;
+ public $context;
/**
* Initializes the purifier.
* The parameter can also be any type that
* HTMLPurifier_Config::create() supports.
*/
- function HTMLPurifier($config = null) {
+ public function __construct($config = null) {
$this->config = HTMLPurifier_Config::create($config);
$this->strategy = new HTMLPurifier_Strategy_Core();
- $this->generator = new HTMLPurifier_Generator();
}
* Adds a filter to process the output. First come first serve
* @param $filter HTMLPurifier_Filter object
*/
- function addFilter($filter) {
+ public function addFilter($filter) {
+ trigger_error('HTMLPurifier->addFilter() is deprecated, use configuration directives in the Filter namespace or Filter.Custom', E_USER_WARNING);
$this->filters[] = $filter;
}
* that HTMLPurifier_Config::create() supports.
* @return Purified HTML
*/
- function purify($html, $config = null) {
+ public function purify($html, $config = null) {
+ // :TODO: make the config merge in, instead of replace
$config = $config ? HTMLPurifier_Config::create($config) : $this->config;
// implementation is partially environment dependant, partially
$context = new HTMLPurifier_Context();
- // our friendly neighborhood generator, all primed with configuration too!
- $this->generator->generateFromTokens(array(), $config, $context);
+ // setup HTML generator
+ $this->generator = new HTMLPurifier_Generator($config, $context);
$context->register('Generator', $this->generator);
// set up global context variables
$html = HTMLPurifier_Encoder::convertToUTF8($html, $config, $context);
- for ($i = 0, $size = count($this->filters); $i < $size; $i++) {
- $html = $this->filters[$i]->preFilter($html, $config, $context);
+ // setup filters
+ $filter_flags = $config->getBatch('Filter');
+ $custom_filters = $filter_flags['Custom'];
+ unset($filter_flags['Custom']);
+ $filters = array();
+ foreach ($filter_flags as $filter => $flag) {
+ if (!$flag) continue;
+ $class = "HTMLPurifier_Filter_$filter";
+ $filters[] = new $class;
+ }
+ foreach ($custom_filters as $filter) {
+ // maybe "HTMLPurifier_Filter_$filter", but be consistent with AutoFormat
+ $filters[] = $filter;
+ }
+ $filters = array_merge($filters, $this->filters);
+ // maybe prepare(), but later
+
+ for ($i = 0, $filter_size = count($filters); $i < $filter_size; $i++) {
+ $html = $filters[$i]->preFilter($html, $config, $context);
}
// purified HTML
$html, $config, $context
),
$config, $context
- ),
- $config, $context
+ )
);
- for ($i = $size - 1; $i >= 0; $i--) {
- $html = $this->filters[$i]->postFilter($html, $config, $context);
+ for ($i = $filter_size - 1; $i >= 0; $i--) {
+ $html = $filters[$i]->postFilter($html, $config, $context);
}
$html = HTMLPurifier_Encoder::convertFromUTF8($html, $config, $context);
* See HTMLPurifier::purify() for more details.
* @return Array of purified HTML
*/
- function purifyArray($array_of_html, $config = null) {
+ public function purifyArray($array_of_html, $config = null) {
$context_array = array();
foreach ($array_of_html as $key => $html) {
$array_of_html[$key] = $this->purify($html, $config);
/**
* Singleton for enforcing just one HTML Purifier in your system
* @param $prototype Optional prototype HTMLPurifier instance to
- * overload singleton with.
+ * overload singleton with, or HTMLPurifier_Config
+ * instance to configure the generated version with.
*/
- function &instance($prototype = null) {
- static $htmlpurifier;
- if (!$htmlpurifier || $prototype) {
- if (is_a($prototype, 'HTMLPurifier')) {
- $htmlpurifier = $prototype;
+ public static function instance($prototype = null) {
+ if (!self::$instance || $prototype) {
+ if ($prototype instanceof HTMLPurifier) {
+ self::$instance = $prototype;
} elseif ($prototype) {
- $htmlpurifier = new HTMLPurifier($prototype);
+ self::$instance = new HTMLPurifier($prototype);
} else {
- $htmlpurifier = new HTMLPurifier();
+ self::$instance = new HTMLPurifier();
}
}
- return $htmlpurifier;
+ return self::$instance;
}
- function &getInstance($prototype = null) {
+ /**
+ * @note Backwards compatibility, see instance()
+ */
+ public static function getInstance($prototype = null) {
return HTMLPurifier::instance($prototype);
}
}
-
--- /dev/null
+<?php\r
+\r
+/**\r
+ * @file\r
+ * This file was auto-generated by generate-includes.php and includes all of\r
+ * the core files required by HTML Purifier. This is a convenience stub that\r
+ * includes all files using dirname(__FILE__) and require_once. PLEASE DO NOT\r
+ * EDIT THIS FILE, changes will be overwritten the next time the script is run.\r
+ * \r
+ * Changes to include_path are not necessary.\r
+ */\r
+\r
+$__dir = dirname(__FILE__);\r
+\r
+require_once $__dir . '/HTMLPurifier.php';\r
+require_once $__dir . '/HTMLPurifier/AttrCollections.php';\r
+require_once $__dir . '/HTMLPurifier/AttrDef.php';\r
+require_once $__dir . '/HTMLPurifier/AttrTransform.php';\r
+require_once $__dir . '/HTMLPurifier/AttrTypes.php';\r
+require_once $__dir . '/HTMLPurifier/AttrValidator.php';\r
+require_once $__dir . '/HTMLPurifier/Bootstrap.php';\r
+require_once $__dir . '/HTMLPurifier/Definition.php';\r
+require_once $__dir . '/HTMLPurifier/CSSDefinition.php';\r
+require_once $__dir . '/HTMLPurifier/ChildDef.php';\r
+require_once $__dir . '/HTMLPurifier/Config.php';\r
+require_once $__dir . '/HTMLPurifier/ConfigDef.php';\r
+require_once $__dir . '/HTMLPurifier/ConfigSchema.php';\r
+require_once $__dir . '/HTMLPurifier/ContentSets.php';\r
+require_once $__dir . '/HTMLPurifier/Context.php';\r
+require_once $__dir . '/HTMLPurifier/DefinitionCache.php';\r
+require_once $__dir . '/HTMLPurifier/DefinitionCacheFactory.php';\r
+require_once $__dir . '/HTMLPurifier/Doctype.php';\r
+require_once $__dir . '/HTMLPurifier/DoctypeRegistry.php';\r
+require_once $__dir . '/HTMLPurifier/ElementDef.php';\r
+require_once $__dir . '/HTMLPurifier/Encoder.php';\r
+require_once $__dir . '/HTMLPurifier/EntityLookup.php';\r
+require_once $__dir . '/HTMLPurifier/EntityParser.php';\r
+require_once $__dir . '/HTMLPurifier/ErrorCollector.php';\r
+require_once $__dir . '/HTMLPurifier/Exception.php';\r
+require_once $__dir . '/HTMLPurifier/Filter.php';\r
+require_once $__dir . '/HTMLPurifier/Generator.php';\r
+require_once $__dir . '/HTMLPurifier/HTMLDefinition.php';\r
+require_once $__dir . '/HTMLPurifier/HTMLModule.php';\r
+require_once $__dir . '/HTMLPurifier/HTMLModuleManager.php';\r
+require_once $__dir . '/HTMLPurifier/IDAccumulator.php';\r
+require_once $__dir . '/HTMLPurifier/Injector.php';\r
+require_once $__dir . '/HTMLPurifier/Language.php';\r
+require_once $__dir . '/HTMLPurifier/LanguageFactory.php';\r
+require_once $__dir . '/HTMLPurifier/Lexer.php';\r
+require_once $__dir . '/HTMLPurifier/PercentEncoder.php';\r
+require_once $__dir . '/HTMLPurifier/Strategy.php';\r
+require_once $__dir . '/HTMLPurifier/StringHash.php';\r
+require_once $__dir . '/HTMLPurifier/StringHashParser.php';\r
+require_once $__dir . '/HTMLPurifier/TagTransform.php';\r
+require_once $__dir . '/HTMLPurifier/Token.php';\r
+require_once $__dir . '/HTMLPurifier/TokenFactory.php';\r
+require_once $__dir . '/HTMLPurifier/URI.php';\r
+require_once $__dir . '/HTMLPurifier/URIDefinition.php';\r
+require_once $__dir . '/HTMLPurifier/URIFilter.php';\r
+require_once $__dir . '/HTMLPurifier/URIParser.php';\r
+require_once $__dir . '/HTMLPurifier/URIScheme.php';\r
+require_once $__dir . '/HTMLPurifier/URISchemeRegistry.php';\r
+require_once $__dir . '/HTMLPurifier/VarParser.php';\r
+require_once $__dir . '/HTMLPurifier/VarParserException.php';\r
+require_once $__dir . '/HTMLPurifier/AttrDef/CSS.php';\r
+require_once $__dir . '/HTMLPurifier/AttrDef/Enum.php';\r
+require_once $__dir . '/HTMLPurifier/AttrDef/Integer.php';\r
+require_once $__dir . '/HTMLPurifier/AttrDef/Lang.php';\r
+require_once $__dir . '/HTMLPurifier/AttrDef/Text.php';\r
+require_once $__dir . '/HTMLPurifier/AttrDef/URI.php';\r
+require_once $__dir . '/HTMLPurifier/AttrDef/CSS/Number.php';\r
+require_once $__dir . '/HTMLPurifier/AttrDef/CSS/AlphaValue.php';\r
+require_once $__dir . '/HTMLPurifier/AttrDef/CSS/Background.php';\r
+require_once $__dir . '/HTMLPurifier/AttrDef/CSS/BackgroundPosition.php';\r
+require_once $__dir . '/HTMLPurifier/AttrDef/CSS/Border.php';\r
+require_once $__dir . '/HTMLPurifier/AttrDef/CSS/Color.php';\r
+require_once $__dir . '/HTMLPurifier/AttrDef/CSS/Composite.php';\r
+require_once $__dir . '/HTMLPurifier/AttrDef/CSS/DenyElementDecorator.php';\r
+require_once $__dir . '/HTMLPurifier/AttrDef/CSS/Filter.php';\r
+require_once $__dir . '/HTMLPurifier/AttrDef/CSS/Font.php';\r
+require_once $__dir . '/HTMLPurifier/AttrDef/CSS/FontFamily.php';\r
+require_once $__dir . '/HTMLPurifier/AttrDef/CSS/ImportantDecorator.php';\r
+require_once $__dir . '/HTMLPurifier/AttrDef/CSS/Length.php';\r
+require_once $__dir . '/HTMLPurifier/AttrDef/CSS/ListStyle.php';\r
+require_once $__dir . '/HTMLPurifier/AttrDef/CSS/Multiple.php';\r
+require_once $__dir . '/HTMLPurifier/AttrDef/CSS/Percentage.php';\r
+require_once $__dir . '/HTMLPurifier/AttrDef/CSS/TextDecoration.php';\r
+require_once $__dir . '/HTMLPurifier/AttrDef/CSS/URI.php';\r
+require_once $__dir . '/HTMLPurifier/AttrDef/HTML/Bool.php';\r
+require_once $__dir . '/HTMLPurifier/AttrDef/HTML/Color.php';\r
+require_once $__dir . '/HTMLPurifier/AttrDef/HTML/FrameTarget.php';\r
+require_once $__dir . '/HTMLPurifier/AttrDef/HTML/ID.php';\r
+require_once $__dir . '/HTMLPurifier/AttrDef/HTML/Pixels.php';\r
+require_once $__dir . '/HTMLPurifier/AttrDef/HTML/Length.php';\r
+require_once $__dir . '/HTMLPurifier/AttrDef/HTML/LinkTypes.php';\r
+require_once $__dir . '/HTMLPurifier/AttrDef/HTML/MultiLength.php';\r
+require_once $__dir . '/HTMLPurifier/AttrDef/HTML/Nmtokens.php';\r
+require_once $__dir . '/HTMLPurifier/AttrDef/URI/Email.php';\r
+require_once $__dir . '/HTMLPurifier/AttrDef/URI/Host.php';\r
+require_once $__dir . '/HTMLPurifier/AttrDef/URI/IPv4.php';\r
+require_once $__dir . '/HTMLPurifier/AttrDef/URI/IPv6.php';\r
+require_once $__dir . '/HTMLPurifier/AttrDef/URI/Email/SimpleCheck.php';\r
+require_once $__dir . '/HTMLPurifier/AttrTransform/BdoDir.php';\r
+require_once $__dir . '/HTMLPurifier/AttrTransform/BgColor.php';\r
+require_once $__dir . '/HTMLPurifier/AttrTransform/BoolToCSS.php';\r
+require_once $__dir . '/HTMLPurifier/AttrTransform/Border.php';\r
+require_once $__dir . '/HTMLPurifier/AttrTransform/EnumToCSS.php';\r
+require_once $__dir . '/HTMLPurifier/AttrTransform/ImgRequired.php';\r
+require_once $__dir . '/HTMLPurifier/AttrTransform/ImgSpace.php';\r
+require_once $__dir . '/HTMLPurifier/AttrTransform/Lang.php';\r
+require_once $__dir . '/HTMLPurifier/AttrTransform/Length.php';\r
+require_once $__dir . '/HTMLPurifier/AttrTransform/Name.php';\r
+require_once $__dir . '/HTMLPurifier/AttrTransform/ScriptRequired.php';\r
+require_once $__dir . '/HTMLPurifier/ChildDef/Chameleon.php';\r
+require_once $__dir . '/HTMLPurifier/ChildDef/Custom.php';\r
+require_once $__dir . '/HTMLPurifier/ChildDef/Empty.php';\r
+require_once $__dir . '/HTMLPurifier/ChildDef/Required.php';\r
+require_once $__dir . '/HTMLPurifier/ChildDef/Optional.php';\r
+require_once $__dir . '/HTMLPurifier/ChildDef/StrictBlockquote.php';\r
+require_once $__dir . '/HTMLPurifier/ChildDef/Table.php';\r
+require_once $__dir . '/HTMLPurifier/ConfigDef/Directive.php';\r
+require_once $__dir . '/HTMLPurifier/ConfigDef/DirectiveAlias.php';\r
+require_once $__dir . '/HTMLPurifier/ConfigDef/Namespace.php';\r
+require_once $__dir . '/HTMLPurifier/DefinitionCache/Decorator.php';\r
+require_once $__dir . '/HTMLPurifier/DefinitionCache/Null.php';\r
+require_once $__dir . '/HTMLPurifier/DefinitionCache/Serializer.php';\r
+require_once $__dir . '/HTMLPurifier/DefinitionCache/Decorator/Cleanup.php';\r
+require_once $__dir . '/HTMLPurifier/DefinitionCache/Decorator/Memory.php';\r
+require_once $__dir . '/HTMLPurifier/HTMLModule/Bdo.php';\r
+require_once $__dir . '/HTMLPurifier/HTMLModule/CommonAttributes.php';\r
+require_once $__dir . '/HTMLPurifier/HTMLModule/Edit.php';\r
+require_once $__dir . '/HTMLPurifier/HTMLModule/Hypertext.php';\r
+require_once $__dir . '/HTMLPurifier/HTMLModule/Image.php';\r
+require_once $__dir . '/HTMLPurifier/HTMLModule/Legacy.php';\r
+require_once $__dir . '/HTMLPurifier/HTMLModule/List.php';\r
+require_once $__dir . '/HTMLPurifier/HTMLModule/NonXMLCommonAttributes.php';\r
+require_once $__dir . '/HTMLPurifier/HTMLModule/Object.php';\r
+require_once $__dir . '/HTMLPurifier/HTMLModule/Presentation.php';\r
+require_once $__dir . '/HTMLPurifier/HTMLModule/Proprietary.php';\r
+require_once $__dir . '/HTMLPurifier/HTMLModule/Ruby.php';\r
+require_once $__dir . '/HTMLPurifier/HTMLModule/Scripting.php';\r
+require_once $__dir . '/HTMLPurifier/HTMLModule/StyleAttribute.php';\r
+require_once $__dir . '/HTMLPurifier/HTMLModule/Tables.php';\r
+require_once $__dir . '/HTMLPurifier/HTMLModule/Target.php';\r
+require_once $__dir . '/HTMLPurifier/HTMLModule/Text.php';\r
+require_once $__dir . '/HTMLPurifier/HTMLModule/Tidy.php';\r
+require_once $__dir . '/HTMLPurifier/HTMLModule/XMLCommonAttributes.php';\r
+require_once $__dir . '/HTMLPurifier/HTMLModule/Tidy/Proprietary.php';\r
+require_once $__dir . '/HTMLPurifier/HTMLModule/Tidy/XHTMLAndHTML4.php';\r
+require_once $__dir . '/HTMLPurifier/HTMLModule/Tidy/Strict.php';\r
+require_once $__dir . '/HTMLPurifier/HTMLModule/Tidy/Transitional.php';\r
+require_once $__dir . '/HTMLPurifier/HTMLModule/Tidy/XHTML.php';\r
+require_once $__dir . '/HTMLPurifier/Injector/AutoParagraph.php';\r
+require_once $__dir . '/HTMLPurifier/Injector/Linkify.php';\r
+require_once $__dir . '/HTMLPurifier/Injector/PurifierLinkify.php';\r
+require_once $__dir . '/HTMLPurifier/Lexer/DOMLex.php';\r
+require_once $__dir . '/HTMLPurifier/Lexer/DirectLex.php';\r
+require_once $__dir . '/HTMLPurifier/Strategy/Composite.php';\r
+require_once $__dir . '/HTMLPurifier/Strategy/Core.php';\r
+require_once $__dir . '/HTMLPurifier/Strategy/FixNesting.php';\r
+require_once $__dir . '/HTMLPurifier/Strategy/MakeWellFormed.php';\r
+require_once $__dir . '/HTMLPurifier/Strategy/RemoveForeignElements.php';\r
+require_once $__dir . '/HTMLPurifier/Strategy/ValidateAttributes.php';\r
+require_once $__dir . '/HTMLPurifier/TagTransform/Font.php';\r
+require_once $__dir . '/HTMLPurifier/TagTransform/Simple.php';\r
+require_once $__dir . '/HTMLPurifier/Token/Comment.php';\r
+require_once $__dir . '/HTMLPurifier/Token/Tag.php';\r
+require_once $__dir . '/HTMLPurifier/Token/Empty.php';\r
+require_once $__dir . '/HTMLPurifier/Token/End.php';\r
+require_once $__dir . '/HTMLPurifier/Token/Start.php';\r
+require_once $__dir . '/HTMLPurifier/Token/Text.php';\r
+require_once $__dir . '/HTMLPurifier/URIFilter/DisableExternal.php';\r
+require_once $__dir . '/HTMLPurifier/URIFilter/DisableExternalResources.php';\r
+require_once $__dir . '/HTMLPurifier/URIFilter/HostBlacklist.php';\r
+require_once $__dir . '/HTMLPurifier/URIFilter/MakeAbsolute.php';\r
+require_once $__dir . '/HTMLPurifier/URIScheme/ftp.php';\r
+require_once $__dir . '/HTMLPurifier/URIScheme/http.php';\r
+require_once $__dir . '/HTMLPurifier/URIScheme/https.php';\r
+require_once $__dir . '/HTMLPurifier/URIScheme/mailto.php';\r
+require_once $__dir . '/HTMLPurifier/URIScheme/news.php';\r
+require_once $__dir . '/HTMLPurifier/URIScheme/nntp.php';\r
+require_once $__dir . '/HTMLPurifier/VarParser/Flexible.php';\r
+require_once $__dir . '/HTMLPurifier/VarParser/Native.php';\r
<?php
-require_once 'HTMLPurifier/AttrTypes.php';
-
/**
* Defines common attribute collections that modules reference
*/
/**
* Associative array of attribute collections, indexed by name
*/
- var $info = array();
+ public $info = array();
/**
* Performs all expansions on internal data for use by other inclusions
* @param $attr_types HTMLPurifier_AttrTypes instance
* @param $modules Hash array of HTMLPurifier_HTMLModule members
*/
- function HTMLPurifier_AttrCollections($attr_types, $modules) {
+ public function __construct($attr_types, $modules) {
// load extensions from the modules
foreach ($modules as $module) {
foreach ($module->attr_collections as $coll_i => $coll) {
* all inclusions specified by the zero index.
* @param &$attr Reference to attribute array
*/
- function performInclusions(&$attr) {
+ public function performInclusions(&$attr) {
if (!isset($attr[0])) return;
$merge = $attr[0];
$seen = array(); // recursion guard
* @param &$attr Reference to attribute array
* @param $attr_types HTMLPurifier_AttrTypes instance
*/
- function expandIdentifiers(&$attr, $attr_types) {
+ public function expandIdentifiers(&$attr, $attr_types) {
// because foreach will process new elements we add, make sure we
// skip duplicates
* subclasses are also responsible for cleaning the code if possible.
*/
-class HTMLPurifier_AttrDef
+abstract class HTMLPurifier_AttrDef
{
/**
* Tells us whether or not an HTML attribute is minimized. Has no
* meaning in other contexts.
*/
- var $minimized = false;
+ public $minimized = false;
/**
* Tells us whether or not an HTML attribute is required. Has no
* meaning in other contexts
*/
- var $required = false;
+ public $required = false;
/**
* Validates and cleans passed string according to a definition.
*
- * @public
* @param $string String to be validated and cleaned.
* @param $config Mandatory HTMLPurifier_Config object.
* @param $context Mandatory HTMLPurifier_AttrContext object.
*/
- function validate($string, $config, &$context) {
- trigger_error('Cannot call abstract function', E_USER_ERROR);
- }
+ abstract public function validate($string, $config, $context);
/**
* Convenience method that parses a string as if it were CDATA.
* function. Trim and whitespace collapsing are supposed to only
* occur in NMTOKENs. However, note that we are NOT necessarily
* parsing XML, thus, this behavior may still be correct.
- *
- * @public
*/
- function parseCDATA($string) {
+ public function parseCDATA($string) {
$string = trim($string);
$string = str_replace("\n", '', $string);
$string = str_replace(array("\r", "\t"), ' ', $string);
* Factory method for creating this class from a string.
* @param $string String construction info
* @return Created AttrDef object corresponding to $string
- * @public
*/
- function make($string) {
- // default implementation, return flyweight of this object
- // if overloaded, it is *necessary* for you to clone the
- // object (usually by instantiating a new copy) and return that
+ public function make($string) {
+ // default implementation, return a flyweight of this object.
+ // If $string has an effect on the returned object (i.e. you
+ // need to overload this method), it is best
+ // to clone or instantiate new copies. (Instantiation is safer.)
return $this;
}
* Removes spaces from rgb(0, 0, 0) so that shorthand CSS properties work
* properly. THIS IS A HACK!
*/
- function mungeRgb($string) {
+ protected function mungeRgb($string) {
return preg_replace('/rgb\((\d+)\s*,\s*(\d+)\s*,\s*(\d+)\)/', 'rgb(\1,\2,\3)', $string);
}
<?php
-require_once 'HTMLPurifier/AttrDef.php';
-require_once 'HTMLPurifier/CSSDefinition.php';
-
/**
* Validates the HTML attribute style, otherwise known as CSS.
* @note We don't implement the whole CSS specification, so it might be
class HTMLPurifier_AttrDef_CSS extends HTMLPurifier_AttrDef
{
- function validate($css, $config, &$context) {
+ public function validate($css, $config, $context) {
$css = $this->parseCDATA($css);
--- /dev/null
+<?php
+
+class HTMLPurifier_AttrDef_CSS_AlphaValue extends HTMLPurifier_AttrDef_CSS_Number
+{
+
+ public function __construct() {
+ parent::__construct(false); // opacity is non-negative, but we will clamp it
+ }
+
+ public function validate($number, $config, $context) {
+ $result = parent::validate($number, $config, $context);
+ if ($result === false) return $result;
+ $float = (float) $result;
+ if ($float < 0.0) $result = '0';
+ if ($float > 1.0) $result = '1';
+ return $result;
+ }
+
+}
<?php
-require_once 'HTMLPurifier/AttrDef.php';
-require_once 'HTMLPurifier/CSSDefinition.php';
-
/**
* Validates shorthand CSS property background.
* @warning Does not support url tokens that have internal spaces.
* Local copy of component validators.
* @note See HTMLPurifier_AttrDef_Font::$info for a similar impl.
*/
- var $info;
+ protected $info;
- function HTMLPurifier_AttrDef_CSS_Background($config) {
+ public function __construct($config) {
$def = $config->getCSSDefinition();
$this->info['background-color'] = $def->info['background-color'];
$this->info['background-image'] = $def->info['background-image'];
$this->info['background-position'] = $def->info['background-position'];
}
- function validate($string, $config, &$context) {
+ public function validate($string, $config, $context) {
// regular pre-processing
$string = $this->parseCDATA($string);
<?php
-require_once 'HTMLPurifier/AttrDef.php';
-require_once 'HTMLPurifier/AttrDef/CSS/Length.php';
-require_once 'HTMLPurifier/AttrDef/CSS/Percentage.php';
-
/* W3C says:
[ // adjective and number must be in correct order, even if
// you could switch them without introducing ambiguity.
class HTMLPurifier_AttrDef_CSS_BackgroundPosition extends HTMLPurifier_AttrDef
{
- var $length;
- var $percentage;
+ protected $length;
+ protected $percentage;
- function HTMLPurifier_AttrDef_CSS_BackgroundPosition() {
+ public function __construct() {
$this->length = new HTMLPurifier_AttrDef_CSS_Length();
$this->percentage = new HTMLPurifier_AttrDef_CSS_Percentage();
}
- function validate($string, $config, &$context) {
+ public function validate($string, $config, $context) {
$string = $this->parseCDATA($string);
$bits = explode(' ', $string);
<?php
-require_once 'HTMLPurifier/AttrDef.php';
-
/**
* Validates the border property as defined by CSS.
*/
/**
* Local copy of properties this property is shorthand for.
*/
- var $info = array();
+ protected $info = array();
- function HTMLPurifier_AttrDef_CSS_Border($config) {
+ public function __construct($config) {
$def = $config->getCSSDefinition();
$this->info['border-width'] = $def->info['border-width'];
$this->info['border-style'] = $def->info['border-style'];
$this->info['border-top-color'] = $def->info['border-top-color'];
}
- function validate($string, $config, &$context) {
+ public function validate($string, $config, $context) {
$string = $this->parseCDATA($string);
$string = $this->mungeRgb($string);
$bits = explode(' ', $string);
<?php
-require_once 'HTMLPurifier/AttrDef.php';
-
-HTMLPurifier_ConfigSchema::define(
- 'Core', 'ColorKeywords', array(
- 'maroon' => '#800000',
- 'red' => '#FF0000',
- 'orange' => '#FFA500',
- 'yellow' => '#FFFF00',
- 'olive' => '#808000',
- 'purple' => '#800080',
- 'fuchsia' => '#FF00FF',
- 'white' => '#FFFFFF',
- 'lime' => '#00FF00',
- 'green' => '#008000',
- 'navy' => '#000080',
- 'blue' => '#0000FF',
- 'aqua' => '#00FFFF',
- 'teal' => '#008080',
- 'black' => '#000000',
- 'silver' => '#C0C0C0',
- 'gray' => '#808080'
- ), 'hash', '
-Lookup array of color names to six digit hexadecimal number corresponding
-to color, with preceding hash mark. Used when parsing colors.
-This directive has been available since 2.0.0.
-');
-
/**
* Validates Color as defined by CSS.
*/
class HTMLPurifier_AttrDef_CSS_Color extends HTMLPurifier_AttrDef
{
- function validate($color, $config, &$context) {
+ public function validate($color, $config, $context) {
static $colors = null;
if ($colors === null) $colors = $config->get('Core', 'ColorKeywords');
/**
* List of HTMLPurifier_AttrDef objects that may process strings
- * @protected
+ * @todo Make protected
*/
- var $defs;
+ public $defs;
/**
* @param $defs List of HTMLPurifier_AttrDef objects
*/
- function HTMLPurifier_AttrDef_CSS_Composite($defs) {
+ public function __construct($defs) {
$this->defs = $defs;
}
- function validate($string, $config, &$context) {
+ public function validate($string, $config, $context) {
foreach ($this->defs as $i => $def) {
$result = $this->defs[$i]->validate($string, $config, $context);
if ($result !== false) return $result;
*/
class HTMLPurifier_AttrDef_CSS_DenyElementDecorator extends HTMLPurifier_AttrDef
{
- var $def, $element;
+ protected $def, $element;
/**
* @param $def Definition to wrap
* @param $element Element to deny
*/
- function HTMLPurifier_AttrDef_CSS_DenyElementDecorator(&$def, $element) {
- $this->def =& $def;
+ public function __construct($def, $element) {
+ $this->def = $def;
$this->element = $element;
}
/**
* Checks if CurrentToken is set and equal to $this->element
*/
- function validate($string, $config, $context) {
+ public function validate($string, $config, $context) {
$token = $context->get('CurrentToken', true);
if ($token && $token->name == $this->element) return false;
return $this->def->validate($string, $config, $context);
--- /dev/null
+<?php
+
+/**
+ * Microsoft's proprietary filter: CSS property
+ * @note Currently supports the alpha filter. In the future, this will
+ * probably need an extensible framework
+ */
+class HTMLPurifier_AttrDef_CSS_Filter extends HTMLPurifier_AttrDef
+{
+
+ protected $intValidator;
+
+ public function __construct() {
+ $this->intValidator = new HTMLPurifier_AttrDef_Integer();
+ }
+
+ public function validate($value, $config, $context) {
+ $value = $this->parseCDATA($value);
+ if ($value === 'none') return $value;
+ // if we looped this we could support multiple filters
+ $function_length = strcspn($value, '(');
+ $function = trim(substr($value, 0, $function_length));
+ if ($function !== 'alpha' &&
+ $function !== 'Alpha' &&
+ $function !== 'progid:DXImageTransform.Microsoft.Alpha'
+ ) return false;
+ $cursor = $function_length + 1;
+ $parameters_length = strcspn($value, ')', $cursor);
+ $parameters = substr($value, $cursor, $parameters_length);
+ $params = explode(',', $parameters);
+ $ret_params = array();
+ $lookup = array();
+ foreach ($params as $param) {
+ list($key, $value) = explode('=', $param);
+ $key = trim($key);
+ $value = trim($value);
+ if (isset($lookup[$key])) continue;
+ if ($key !== 'opacity') continue;
+ $value = $this->intValidator->validate($value, $config, $context);
+ if ($value === false) continue;
+ $int = (int) $value;
+ if ($int > 100) $value = '100';
+ if ($int < 0) $value = '0';
+ $ret_params[] = "$key=$value";
+ $lookup[$key] = true;
+ }
+ $ret_parameters = implode(',', $ret_params);
+ $ret_function = "$function($ret_parameters)";
+ return $ret_function;
+ }
+
+}
<?php
-require_once 'HTMLPurifier/AttrDef.php';
-
/**
* Validates shorthand CSS property font.
*/
* CSSDefinition, this wouldn't be necessary. We'd instantiate
* our own copies.
*/
- var $info = array();
+ protected $info = array();
- function HTMLPurifier_AttrDef_CSS_Font($config) {
+ public function __construct($config) {
$def = $config->getCSSDefinition();
$this->info['font-style'] = $def->info['font-style'];
$this->info['font-variant'] = $def->info['font-variant'];
$this->info['font-family'] = $def->info['font-family'];
}
- function validate($string, $config, &$context) {
+ public function validate($string, $config, $context) {
static $system_fonts = array(
'caption' => true,
<?php
-require_once 'HTMLPurifier/AttrDef.php';
-
-// whitelisting allowed fonts would be nice
-
/**
* Validates a font family list according to CSS spec
+ * @todo whitelisting allowed fonts would be nice
*/
class HTMLPurifier_AttrDef_CSS_FontFamily extends HTMLPurifier_AttrDef
{
- function validate($string, $config, &$context) {
+ public function validate($string, $config, $context) {
static $generic_names = array(
'serif' => true,
'sans-serif' => true,
--- /dev/null
+<?php
+
+/**
+ * Decorator which enables !important to be used in CSS values.
+ */
+class HTMLPurifier_AttrDef_CSS_ImportantDecorator extends HTMLPurifier_AttrDef
+{
+ protected $def, $allow;
+
+ /**
+ * @param $def Definition to wrap
+ * @param $allow Whether or not to allow !important
+ */
+ public function __construct($def, $allow = false) {
+ $this->def = $def;
+ $this->allow = $allow;
+ }
+ /**
+ * Intercepts and removes !important if necessary
+ */
+ public function validate($string, $config, $context) {
+ // test for ! and important tokens
+ $string = trim($string);
+ $is_important = false;
+ // :TODO: optimization: test directly for !important and ! important
+ if (strlen($string) >= 9 && substr($string, -9) === 'important') {
+ $temp = rtrim(substr($string, 0, -9));
+ // use a temp, because we might want to restore important
+ if (strlen($temp) >= 1 && substr($temp, -1) === '!') {
+ $string = rtrim(substr($temp, 0, -1));
+ $is_important = true;
+ }
+ }
+ $string = $this->def->validate($string, $config, $context);
+ if ($this->allow && $is_important) $string .= ' !important';
+ return $string;
+ }
+}
<?php
-require_once 'HTMLPurifier/AttrDef.php';
-require_once 'HTMLPurifier/AttrDef/CSS/Number.php';
-
/**
* Represents a Length as defined by CSS.
*/
* @warning The code assumes all units are two characters long. Be careful
* if we have to change this behavior!
*/
- var $units = array('em' => true, 'ex' => true, 'px' => true, 'in' => true,
+ protected $units = array('em' => true, 'ex' => true, 'px' => true, 'in' => true,
'cm' => true, 'mm' => true, 'pt' => true, 'pc' => true);
/**
* Instance of HTMLPurifier_AttrDef_Number to defer number validation to
*/
- var $number_def;
+ protected $number_def;
/**
* @param $non_negative Bool indication whether or not negative values are
* allowed.
*/
- function HTMLPurifier_AttrDef_CSS_Length($non_negative = false) {
+ public function __construct($non_negative = false) {
$this->number_def = new HTMLPurifier_AttrDef_CSS_Number($non_negative);
}
- function validate($length, $config, &$context) {
+ public function validate($length, $config, $context) {
$length = $this->parseCDATA($length);
if ($length === '') return false;
<?php
-require_once 'HTMLPurifier/AttrDef.php';
-
/**
* Validates shorthand CSS property list-style.
* @warning Does not support url tokens that have internal spaces.
* Local copy of component validators.
* @note See HTMLPurifier_AttrDef_CSS_Font::$info for a similar impl.
*/
- var $info;
+ protected $info;
- function HTMLPurifier_AttrDef_CSS_ListStyle($config) {
+ public function __construct($config) {
$def = $config->getCSSDefinition();
$this->info['list-style-type'] = $def->info['list-style-type'];
$this->info['list-style-position'] = $def->info['list-style-position'];
$this->info['list-style-image'] = $def->info['list-style-image'];
}
- function validate($string, $config, &$context) {
+ public function validate($string, $config, $context) {
// regular pre-processing
$string = $this->parseCDATA($string);
<?php
-require_once 'HTMLPurifier/AttrDef.php';
-
/**
* Framework class for strings that involve multiple values.
*
/**
* Instance of component definition to defer validation to.
+ * @todo Make protected
*/
- var $single;
+ public $single;
/**
* Max number of values allowed.
+ * @todo Make protected
*/
- var $max;
+ public $max;
/**
* @param $single HTMLPurifier_AttrDef to multiply
* @param $max Max number of values allowed (usually four)
*/
- function HTMLPurifier_AttrDef_CSS_Multiple($single, $max = 4) {
+ public function __construct($single, $max = 4) {
$this->single = $single;
$this->max = $max;
}
- function validate($string, $config, &$context) {
+ public function validate($string, $config, $context) {
$string = $this->parseCDATA($string);
if ($string === '') return false;
$parts = explode(' ', $string); // parseCDATA replaced \r, \t and \n
/**
* Bool indicating whether or not only positive values allowed.
*/
- var $non_negative = false;
+ protected $non_negative = false;
/**
* @param $non_negative Bool indicating whether negatives are forbidden
*/
- function HTMLPurifier_AttrDef_CSS_Number($non_negative = false) {
+ public function __construct($non_negative = false) {
$this->non_negative = $non_negative;
}
- function validate($number, $config, &$context) {
+ public function validate($number, $config, $context) {
$number = $this->parseCDATA($number);
if ($number === '') return false;
+ if ($number === '0') return '0';
$sign = '';
switch ($number[0]) {
$number = ltrim($number, '0');
return $number ? $sign . $number : '0';
}
- if (!strpos($number, '.')) return false;
+
+ // Period is the only non-numeric character allowed
+ if (strpos($number, '.') === false) return false;
list($left, $right) = explode('.', $number, 2);
- if (!ctype_digit($left)) return false;
- $left = ltrim($left, '0');
+ if ($left === '' && $right === '') return false;
+ if ($left !== '' && !ctype_digit($left)) return false;
+ $left = ltrim($left, '0');
$right = rtrim($right, '0');
if ($right === '') {
<?php
-require_once 'HTMLPurifier/AttrDef.php';
-require_once 'HTMLPurifier/AttrDef/CSS/Number.php';
-
/**
* Validates a Percentage as defined by the CSS spec.
*/
/**
* Instance of HTMLPurifier_AttrDef_CSS_Number to defer number validation
*/
- var $number_def;
+ protected $number_def;
/**
* @param Bool indicating whether to forbid negative values
*/
- function HTMLPurifier_AttrDef_CSS_Percentage($non_negative = false) {
+ public function __construct($non_negative = false) {
$this->number_def = new HTMLPurifier_AttrDef_CSS_Number($non_negative);
}
- function validate($string, $config, &$context) {
+ public function validate($string, $config, $context) {
$string = $this->parseCDATA($string);
<?php
-require_once 'HTMLPurifier/AttrDef.php';
-
/**
* Validates the value for the CSS property text-decoration
* @note This class could be generalized into a version that acts sort of
class HTMLPurifier_AttrDef_CSS_TextDecoration extends HTMLPurifier_AttrDef
{
- function validate($string, $config, &$context) {
+ public function validate($string, $config, $context) {
static $allowed_values = array(
'line-through' => true,
<?php
-require_once 'HTMLPurifier/AttrDef/URI.php';
-
/**
* Validates a URI in CSS syntax, which uses url('http://example.com')
* @note While theoretically speaking a URI in a CSS document could
class HTMLPurifier_AttrDef_CSS_URI extends HTMLPurifier_AttrDef_URI
{
- function HTMLPurifier_AttrDef_CSS_URI() {
- parent::HTMLPurifier_AttrDef_URI(true); // always embedded
+ public function __construct() {
+ parent::__construct(true); // always embedded
}
- function validate($uri_string, $config, &$context) {
+ public function validate($uri_string, $config, $context) {
// parse the URI out of the string and then pass it onto
// the parent object
<?php
-require_once 'HTMLPurifier/AttrDef.php';
-
// Enum = Enumerated
/**
* Validates a keyword against a list of valid values.
/**
* Lookup table of valid values.
+ * @todo Make protected
*/
- var $valid_values = array();
+ public $valid_values = array();
/**
* Bool indicating whether or not enumeration is case sensitive.
* @note In general this is always case insensitive.
*/
- var $case_sensitive = false; // values according to W3C spec
+ protected $case_sensitive = false; // values according to W3C spec
/**
* @param $valid_values List of valid values
* @param $case_sensitive Bool indicating whether or not case sensitive
*/
- function HTMLPurifier_AttrDef_Enum(
+ public function __construct(
$valid_values = array(), $case_sensitive = false
) {
$this->valid_values = array_flip($valid_values);
$this->case_sensitive = $case_sensitive;
}
- function validate($string, $config, &$context) {
+ public function validate($string, $config, $context) {
$string = trim($string);
if (!$this->case_sensitive) {
// we may want to do full case-insensitive libraries
* valid values. Example: "foo,bar,baz". Prepend "s:" to make
* case sensitive
*/
- function make($string) {
+ public function make($string) {
if (strlen($string) > 2 && $string[0] == 's' && $string[1] == ':') {
$string = substr($string, 2);
$sensitive = true;
<?php
-require_once 'HTMLPurifier/AttrDef.php';
-
/**
* Validates a boolean attribute
*/
class HTMLPurifier_AttrDef_HTML_Bool extends HTMLPurifier_AttrDef
{
- var $name;
- var $minimized = true;
+ protected $name;
+ public $minimized = true;
- function HTMLPurifier_AttrDef_HTML_Bool($name = false) {$this->name = $name;}
+ public function __construct($name = false) {$this->name = $name;}
- function validate($string, $config, &$context) {
+ public function validate($string, $config, $context) {
if (empty($string)) return false;
return $this->name;
}
/**
* @param $string Name of attribute
*/
- function make($string) {
+ public function make($string) {
return new HTMLPurifier_AttrDef_HTML_Bool($string);
}
<?php
-require_once 'HTMLPurifier/AttrDef.php';
-require_once 'HTMLPurifier/AttrDef/CSS/Color.php'; // for %Core.ColorKeywords
-
/**
* Validates a color according to the HTML spec.
*/
class HTMLPurifier_AttrDef_HTML_Color extends HTMLPurifier_AttrDef
{
- function validate($string, $config, &$context) {
+ public function validate($string, $config, $context) {
static $colors = null;
if ($colors === null) $colors = $config->get('Core', 'ColorKeywords');
<?php
-HTMLPurifier_ConfigSchema::define(
- 'Attr', 'AllowedFrameTargets', array(), 'lookup',
- 'Lookup table of all allowed link frame targets. Some commonly used '.
- 'link targets include _blank, _self, _parent and _top. Values should '.
- 'be lowercase, as validation will be done in a case-sensitive manner '.
- 'despite W3C\'s recommendation. XHTML 1.0 Strict does not permit '.
- 'the target attribute so this directive will have no effect in that '.
- 'doctype. XHTML 1.1 does not enable the Target module by default, you '.
- 'will have to manually enable it (see the module documentation for more details.)'
-);
-
-require_once 'HTMLPurifier/AttrDef/Enum.php';
-
/**
* Special-case enum attribute definition that lazy loads allowed frame targets
*/
class HTMLPurifier_AttrDef_HTML_FrameTarget extends HTMLPurifier_AttrDef_Enum
{
- var $valid_values = false; // uninitialized value
- var $case_sensitive = false;
+ public $valid_values = false; // uninitialized value
+ protected $case_sensitive = false;
- function HTMLPurifier_AttrDef_HTML_FrameTarget() {}
+ public function __construct() {}
- function validate($string, $config, &$context) {
+ public function validate($string, $config, $context) {
if ($this->valid_values === false) $this->valid_values = $config->get('Attr', 'AllowedFrameTargets');
return parent::validate($string, $config, $context);
}
<?php
-require_once 'HTMLPurifier/AttrDef.php';
-require_once 'HTMLPurifier/IDAccumulator.php';
-
-HTMLPurifier_ConfigSchema::define(
- 'Attr', 'EnableID', false, 'bool',
- 'Allows the ID attribute in HTML. This is disabled by default '.
- 'due to the fact that without proper configuration user input can '.
- 'easily break the validation of a webpage by specifying an ID that is '.
- 'already on the surrounding HTML. If you don\'t mind throwing caution to '.
- 'the wind, enable this directive, but I strongly recommend you also '.
- 'consider blacklisting IDs you use (%Attr.IDBlacklist) or prefixing all '.
- 'user supplied IDs (%Attr.IDPrefix). This directive has been available '.
- 'since 1.2.0, and when set to true reverts to the behavior of pre-1.2.0 '.
- 'versions.'
-);
-HTMLPurifier_ConfigSchema::defineAlias(
- 'HTML', 'EnableAttrID', 'Attr', 'EnableID'
-);
-
-HTMLPurifier_ConfigSchema::define(
- 'Attr', 'IDPrefix', '', 'string',
- 'String to prefix to IDs. If you have no idea what IDs your pages '.
- 'may use, you may opt to simply add a prefix to all user-submitted ID '.
- 'attributes so that they are still usable, but will not conflict with '.
- 'core page IDs. Example: setting the directive to \'user_\' will result in '.
- 'a user submitted \'foo\' to become \'user_foo\' Be sure to set '.
- '%HTML.EnableAttrID to true before using '.
- 'this. This directive was available since 1.2.0.'
-);
-
-HTMLPurifier_ConfigSchema::define(
- 'Attr', 'IDPrefixLocal', '', 'string',
- 'Temporary prefix for IDs used in conjunction with %Attr.IDPrefix. If '.
- 'you need to allow multiple sets of '.
- 'user content on web page, you may need to have a seperate prefix that '.
- 'changes with each iteration. This way, seperately submitted user content '.
- 'displayed on the same page doesn\'t clobber each other. Ideal values '.
- 'are unique identifiers for the content it represents (i.e. the id of '.
- 'the row in the database). Be sure to add a seperator (like an underscore) '.
- 'at the end. Warning: this directive will not work unless %Attr.IDPrefix '.
- 'is set to a non-empty value! This directive was available since 1.2.0.'
-);
-
-HTMLPurifier_ConfigSchema::define(
- 'Attr', 'IDBlacklistRegexp', null, 'string/null',
- 'PCRE regular expression to be matched against all IDs. If the expression '.
- 'is matches, the ID is rejected. Use this with care: may cause '.
- 'significant degradation. ID matching is done after all other '.
- 'validation. This directive was available since 1.6.0.'
-);
-
/**
* Validates the HTML attribute ID.
* @warning Even though this is the id processor, it
// ref functionality disabled, since we also have to verify
// whether or not the ID it refers to exists
- function validate($id, $config, &$context) {
+ public function validate($id, $config, $context) {
if (!$config->get('Attr', 'EnableID')) return false;
<?php
-require_once 'HTMLPurifier/AttrDef.php';
-require_once 'HTMLPurifier/AttrDef/HTML/Pixels.php';
-
/**
* Validates the HTML type length (not to be confused with CSS's length).
*
class HTMLPurifier_AttrDef_HTML_Length extends HTMLPurifier_AttrDef_HTML_Pixels
{
- function validate($string, $config, &$context) {
+ public function validate($string, $config, $context) {
$string = trim($string);
if ($string === '') return false;
<?php
-require_once 'HTMLPurifier/AttrDef.php';
-
-HTMLPurifier_ConfigSchema::define(
- 'Attr', 'AllowedRel', array(), 'lookup',
- 'List of allowed forward document relationships in the rel attribute. '.
- 'Common values may be nofollow or print. By default, this is empty, '.
- 'meaning that no document relationships are allowed. This directive '.
- 'was available since 1.6.0.'
-);
-
-HTMLPurifier_ConfigSchema::define(
- 'Attr', 'AllowedRev', array(), 'lookup',
- 'List of allowed reverse document relationships in the rev attribute. '.
- 'This attribute is a bit of an edge-case; if you don\'t know what it '.
- 'is for, stay away. This directive was available since 1.6.0.'
-);
-
/**
* Validates a rel/rev link attribute against a directive of allowed values
* @note We cannot use Enum because link types allow multiple
{
/** Name config attribute to pull. */
- var $name;
+ protected $name;
- function HTMLPurifier_AttrDef_HTML_LinkTypes($name) {
+ public function __construct($name) {
$configLookup = array(
'rel' => 'AllowedRel',
'rev' => 'AllowedRev'
$this->name = $configLookup[$name];
}
- function validate($string, $config, &$context) {
+ public function validate($string, $config, $context) {
$allowed = $config->get('Attr', $this->name);
if (empty($allowed)) return false;
<?php
-require_once 'HTMLPurifier/AttrDef.php';
-require_once 'HTMLPurifier/AttrDef/HTML/Length.php';
-
/**
* Validates a MultiLength as defined by the HTML spec.
*
class HTMLPurifier_AttrDef_HTML_MultiLength extends HTMLPurifier_AttrDef_HTML_Length
{
- function validate($string, $config, &$context) {
+ public function validate($string, $config, $context) {
$string = trim($string);
if ($string === '') return false;
<?php
-require_once 'HTMLPurifier/AttrDef.php';
-require_once 'HTMLPurifier/Config.php';
-
/**
* Validates contents based on NMTOKENS attribute type.
* @note The only current use for this is the class attribute in HTML
class HTMLPurifier_AttrDef_HTML_Nmtokens extends HTMLPurifier_AttrDef
{
- function validate($string, $config, &$context) {
+ public function validate($string, $config, $context) {
$string = trim($string);
<?php
-require_once 'HTMLPurifier/AttrDef.php';
-
/**
* Validates an integer representation of pixels according to the HTML spec.
*/
class HTMLPurifier_AttrDef_HTML_Pixels extends HTMLPurifier_AttrDef
{
- function validate($string, $config, &$context) {
+ public function validate($string, $config, $context) {
$string = trim($string);
if ($string === '0') return $string;
<?php
-require_once 'HTMLPurifier/AttrDef.php';
-
/**
* Validates an integer.
* @note While this class was modeled off the CSS definition, no currently
/**
* Bool indicating whether or not negative values are allowed
*/
- var $negative = true;
+ protected $negative = true;
/**
* Bool indicating whether or not zero is allowed
*/
- var $zero = true;
+ protected $zero = true;
/**
* Bool indicating whether or not positive values are allowed
*/
- var $positive = true;
+ protected $positive = true;
/**
* @param $negative Bool indicating whether or not negative values are allowed
* @param $zero Bool indicating whether or not zero is allowed
* @param $positive Bool indicating whether or not positive values are allowed
*/
- function HTMLPurifier_AttrDef_Integer(
+ public function __construct(
$negative = true, $zero = true, $positive = true
) {
$this->negative = $negative;
$this->positive = $positive;
}
- function validate($integer, $config, &$context) {
+ public function validate($integer, $config, $context) {
$integer = $this->parseCDATA($integer);
if ($integer === '') return false;
<?php
-require_once 'HTMLPurifier/AttrDef.php';
-
/**
* Validates the HTML attribute lang, effectively a language code.
* @note Built according to RFC 3066, which obsoleted RFC 1766
class HTMLPurifier_AttrDef_Lang extends HTMLPurifier_AttrDef
{
- function validate($string, $config, &$context) {
+ public function validate($string, $config, $context) {
// moodle change - we use special lang strings unfortunatelly
return ereg_replace('[^0-9a-zA-Z_-]', '', $string);
<?php
-require_once 'HTMLPurifier/AttrDef.php';
-
/**
* Validates arbitrary text according to the HTML spec.
*/
class HTMLPurifier_AttrDef_Text extends HTMLPurifier_AttrDef
{
- function validate($string, $config, &$context) {
+ public function validate($string, $config, $context) {
return $this->parseCDATA($string);
}
<?php
-require_once 'HTMLPurifier/AttrDef.php';
-require_once 'HTMLPurifier/URIParser.php';
-require_once 'HTMLPurifier/URIScheme.php';
-require_once 'HTMLPurifier/URISchemeRegistry.php';
-require_once 'HTMLPurifier/AttrDef/URI/Host.php';
-require_once 'HTMLPurifier/PercentEncoder.php';
-require_once 'HTMLPurifier/AttrDef/URI/Email.php';
-
-// special case filtering directives
-
-HTMLPurifier_ConfigSchema::define(
- 'URI', 'Munge', null, 'string/null', '
-<p>
- Munges all browsable (usually http, https and ftp)
- absolute URI\'s into another URI, usually a URI redirection service.
- This directive accepts a URI, formatted with a <code>%s</code> where
- the url-encoded original URI should be inserted (sample:
- <code>http://www.google.com/url?q=%s</code>).
-</p>
-<p>
- Uses for this directive:
-</p>
-<ul>
- <li>
- Prevent PageRank leaks, while being fairly transparent
- to users (you may also want to add some client side JavaScript to
- override the text in the statusbar). <strong>Notice</strong>:
- Many security experts believe that this form of protection does not deter spam-bots.
- </li>
- <li>
- Redirect users to a splash page telling them they are leaving your
- website. While this is poor usability practice, it is often mandated
- in corporate environments.
- </li>
-</ul>
-<p>
- This directive has been available since 1.3.0.
-</p>
-');
-
-// disabling directives
-
-HTMLPurifier_ConfigSchema::define(
- 'URI', 'Disable', false, 'bool', '
-<p>
- Disables all URIs in all forms. Not sure why you\'d want to do that
- (after all, the Internet\'s founded on the notion of a hyperlink).
- This directive has been available since 1.3.0.
-</p>
-');
-HTMLPurifier_ConfigSchema::defineAlias('Attr', 'DisableURI', 'URI', 'Disable');
-
-HTMLPurifier_ConfigSchema::define(
- 'URI', 'DisableResources', false, 'bool', '
-<p>
- Disables embedding resources, essentially meaning no pictures. You can
- still link to them though. See %URI.DisableExternalResources for why
- this might be a good idea. This directive has been available since 1.3.0.
-</p>
-');
-
/**
* Validates a URI as defined by RFC 3986.
* @note Scheme-specific mechanics deferred to HTMLPurifier_URIScheme
class HTMLPurifier_AttrDef_URI extends HTMLPurifier_AttrDef
{
- var $parser;
- var $embedsResource;
+ protected $parser;
+ protected $embedsResource;
/**
* @param $embeds_resource_resource Does the URI here result in an extra HTTP request?
*/
- function HTMLPurifier_AttrDef_URI($embeds_resource = false) {
+ public function __construct($embeds_resource = false) {
$this->parser = new HTMLPurifier_URIParser();
$this->embedsResource = (bool) $embeds_resource;
}
- function validate($uri, $config, &$context) {
+ public function validate($uri, $config, $context) {
if ($config->get('URI', 'Disable')) return false;
if (!$result) break;
// chained filtering
- $uri_def =& $config->getDefinition('URI');
+ $uri_def = $config->getDefinition('URI');
$result = $uri_def->filter($uri, $config, $context);
if (!$result) break;
<?php
-require_once 'HTMLPurifier/AttrDef.php';
-
-class HTMLPurifier_AttrDef_URI_Email extends HTMLPurifier_AttrDef
+abstract class HTMLPurifier_AttrDef_URI_Email extends HTMLPurifier_AttrDef
{
/**
}
// sub-implementations
-//moodlefix require_once 'HTMLPurifier/AttrDef/URI/Email/SimpleCheck.php';
--- /dev/null
+<?php
+
+/**
+ * Primitive email validation class based on the regexp found at
+ * http://www.regular-expressions.info/email.html
+ */
+class HTMLPurifier_AttrDef_URI_Email_SimpleCheck extends HTMLPurifier_AttrDef_URI_Email
+{
+
+ public function validate($string, $config, $context) {
+ // no support for named mailboxes i.e. "Bob <bob@example.com>"
+ // that needs more percent encoding to be done
+ if ($string == '') return false;
+ $string = trim($string);
+ $result = preg_match('/^[A-Z0-9._%-]+@[A-Z0-9.-]+\.[A-Z]{2,4}$/i', $string);
+ return $result ? $string : false;
+ }
+
+}
+
<?php
-require_once 'HTMLPurifier/AttrDef.php';
-require_once 'HTMLPurifier/AttrDef/URI/IPv4.php';
-require_once 'HTMLPurifier/AttrDef/URI/IPv6.php';
-
/**
* Validates a host according to the IPv4, IPv6 and DNS (future) specifications.
*/
/**
* Instance of HTMLPurifier_AttrDef_URI_IPv4 sub-validator
*/
- var $ipv4;
+ protected $ipv4;
/**
* Instance of HTMLPurifier_AttrDef_URI_IPv6 sub-validator
*/
- var $ipv6;
+ protected $ipv6;
- function HTMLPurifier_AttrDef_URI_Host() {
+ public function __construct() {
$this->ipv4 = new HTMLPurifier_AttrDef_URI_IPv4();
$this->ipv6 = new HTMLPurifier_AttrDef_URI_IPv6();
}
- function validate($string, $config, &$context) {
+ public function validate($string, $config, $context) {
$length = strlen($string);
if ($string === '') return '';
if ($length > 1 && $string[0] === '[' && $string[$length-1] === ']') {
<?php
-require_once 'HTMLPurifier/AttrDef.php';
-
/**
* Validates an IPv4 address
* @author Feyd @ forums.devnetwork.net (public domain)
/**
* IPv4 regex, protected so that IPv6 can reuse it
- * @protected
*/
- var $ip4;
+ protected $ip4;
- function validate($aIP, $config, &$context) {
+ public function validate($aIP, $config, $context) {
if (!$this->ip4) $this->_loadRegex();
* Lazy load function to prevent regex from being stuffed in
* cache.
*/
- function _loadRegex() {
+ protected function _loadRegex() {
$oct = '(?:25[0-5]|2[0-4][0-9]|1[0-9]{2}|[1-9][0-9]|[0-9])'; // 0-255
$this->ip4 = "(?:{$oct}\\.{$oct}\\.{$oct}\\.{$oct})";
}
<?php
-require_once 'HTMLPurifier/AttrDef/URI/IPv4.php';
-
/**
* Validates an IPv6 address.
* @author Feyd @ forums.devnetwork.net (public domain)
class HTMLPurifier_AttrDef_URI_IPv6 extends HTMLPurifier_AttrDef_URI_IPv4
{
- function validate($aIP, $config, &$context) {
+ public function validate($aIP, $config, $context) {
if (!$this->ip4) $this->_loadRegex();
* more details.
*/
-class HTMLPurifier_AttrTransform
+abstract class HTMLPurifier_AttrTransform
{
/**
* @param $context Mandatory HTMLPurifier_Context object
* @returns Processed attribute array.
*/
- function transform($attr, $config, &$context) {
- trigger_error('Cannot call abstract function', E_USER_ERROR);
- }
+ abstract public function transform($attr, $config, $context);
/**
* Prepends CSS properties to the style attribute, creating the
* @param $attr Attribute array to process (passed by reference)
* @param $css CSS to prepend
*/
- function prependCSS(&$attr, $css) {
+ public function prependCSS(&$attr, $css) {
$attr['style'] = isset($attr['style']) ? $attr['style'] : '';
$attr['style'] = $css . $attr['style'];
}
* @param $attr Attribute array to process (passed by reference)
* @param $key Key of attribute to confiscate
*/
- function confiscateAttr(&$attr, $key) {
+ public function confiscateAttr(&$attr, $key) {
if (!isset($attr[$key])) return null;
$value = $attr[$key];
unset($attr[$key]);
<?php
-require_once 'HTMLPurifier/AttrTransform.php';
-
// this MUST be placed in post, as it assumes that any value in dir is valid
-HTMLPurifier_ConfigSchema::define(
- 'Attr', 'DefaultTextDir', 'ltr', 'string',
- 'Defines the default text direction (ltr or rtl) of the document '.
- 'being parsed. This generally is the same as the value of the dir '.
- 'attribute in HTML, or ltr if that is not specified.'
-);
-HTMLPurifier_ConfigSchema::defineAllowedValues(
- 'Attr', 'DefaultTextDir', array( 'ltr', 'rtl' )
-);
-
/**
* Post-trasnform that ensures that bdo tags have the dir attribute set.
*/
class HTMLPurifier_AttrTransform_BdoDir extends HTMLPurifier_AttrTransform
{
- function transform($attr, $config, &$context) {
+ public function transform($attr, $config, $context) {
if (isset($attr['dir'])) return $attr;
$attr['dir'] = $config->get('Attr', 'DefaultTextDir');
return $attr;
<?php
-require_once 'HTMLPurifier/AttrTransform.php';
-
/**
* Pre-transform that changes deprecated bgcolor attribute to CSS.
*/
-class HTMLPurifier_AttrTransform_BgColor
-extends HTMLPurifier_AttrTransform {
+class HTMLPurifier_AttrTransform_BgColor extends HTMLPurifier_AttrTransform {
- function transform($attr, $config, &$context) {
+ public function transform($attr, $config, $context) {
if (!isset($attr['bgcolor'])) return $attr;
<?php
-require_once 'HTMLPurifier/AttrTransform.php';
-
/**
* Pre-transform that changes converts a boolean attribute to fixed CSS
*/
-class HTMLPurifier_AttrTransform_BoolToCSS
-extends HTMLPurifier_AttrTransform {
+class HTMLPurifier_AttrTransform_BoolToCSS extends HTMLPurifier_AttrTransform {
/**
* Name of boolean attribute that is trigger
*/
- var $attr;
+ protected $attr;
/**
* CSS declarations to add to style, needs trailing semicolon
*/
- var $css;
+ protected $css;
/**
* @param $attr string attribute name to convert from
* @param $css string CSS declarations to add to style (needs semicolon)
*/
- function HTMLPurifier_AttrTransform_BoolToCSS($attr, $css) {
+ public function __construct($attr, $css) {
$this->attr = $attr;
$this->css = $css;
}
- function transform($attr, $config, &$context) {
+ public function transform($attr, $config, $context) {
if (!isset($attr[$this->attr])) return $attr;
unset($attr[$this->attr]);
$this->prependCSS($attr, $this->css);
<?php
-require_once 'HTMLPurifier/AttrTransform.php';
-
/**
* Pre-transform that changes deprecated border attribute to CSS.
*/
class HTMLPurifier_AttrTransform_Border extends HTMLPurifier_AttrTransform {
- function transform($attr, $config, &$context) {
+ public function transform($attr, $config, $context) {
if (!isset($attr['border'])) return $attr;
$border_width = $this->confiscateAttr($attr, 'border');
// some validation should happen here
<?php
-require_once 'HTMLPurifier/AttrTransform.php';
-
/**
* Generic pre-transform that converts an attribute with a fixed number of
* values (enumerated) to CSS.
/**
* Name of attribute to transform from
*/
- var $attr;
+ protected $attr;
/**
* Lookup array of attribute values to CSS
*/
- var $enumToCSS = array();
+ protected $enumToCSS = array();
/**
* Case sensitivity of the matching
* @warning Currently can only be guaranteed to work with ASCII
* values.
*/
- var $caseSensitive = false;
+ protected $caseSensitive = false;
/**
* @param $attr String attribute name to transform from
* @param $enumToCSS Lookup array of attribute values to CSS
* @param $case_sensitive Boolean case sensitivity indicator, default false
*/
- function HTMLPurifier_AttrTransform_EnumToCSS($attr, $enum_to_css, $case_sensitive = false) {
+ public function __construct($attr, $enum_to_css, $case_sensitive = false) {
$this->attr = $attr;
$this->enumToCSS = $enum_to_css;
$this->caseSensitive = (bool) $case_sensitive;
}
- function transform($attr, $config, &$context) {
+ public function transform($attr, $config, $context) {
if (!isset($attr[$this->attr])) return $attr;
<?php
-require_once 'HTMLPurifier/AttrTransform.php';
-
// must be called POST validation
-HTMLPurifier_ConfigSchema::define(
- 'Attr', 'DefaultInvalidImage', '', 'string',
- 'This is the default image an img tag will be pointed to if it does '.
- 'not have a valid src attribute. In future versions, we may allow the '.
- 'image tag to be removed completely, but due to design issues, this is '.
- 'not possible right now.'
-);
-
-HTMLPurifier_ConfigSchema::define(
- 'Attr', 'DefaultInvalidImageAlt', 'Invalid image', 'string',
- 'This is the content of the alt tag of an invalid image if the user '.
- 'had not previously specified an alt attribute. It has no effect when the '.
- 'image is valid but there was no alt attribute present.'
-);
-
/**
* Transform that supplies default values for the src and alt attributes
* in img tags, as well as prevents the img tag from being removed
class HTMLPurifier_AttrTransform_ImgRequired extends HTMLPurifier_AttrTransform
{
- function transform($attr, $config, &$context) {
+ public function transform($attr, $config, $context) {
$src = true;
if (!isset($attr['src'])) {
<?php
-require_once 'HTMLPurifier/AttrTransform.php';
-
/**
* Pre-transform that changes deprecated hspace and vspace attributes to CSS
*/
-class HTMLPurifier_AttrTransform_ImgSpace
-extends HTMLPurifier_AttrTransform {
+class HTMLPurifier_AttrTransform_ImgSpace extends HTMLPurifier_AttrTransform {
- var $attr;
- var $css = array(
+ protected $attr;
+ protected $css = array(
'hspace' => array('left', 'right'),
'vspace' => array('top', 'bottom')
);
- function HTMLPurifier_AttrTransform_ImgSpace($attr) {
+ public function __construct($attr) {
$this->attr = $attr;
if (!isset($this->css[$attr])) {
trigger_error(htmlspecialchars($attr) . ' is not valid space attribute');
}
}
- function transform($attr, $config, &$context) {
+ public function transform($attr, $config, $context) {
if (!isset($attr[$this->attr])) return $attr;
<?php
-require_once 'HTMLPurifier/AttrTransform.php';
-
/**
* Post-transform that copies lang's value to xml:lang (and vice-versa)
* @note Theoretically speaking, this could be a pre-transform, but putting
class HTMLPurifier_AttrTransform_Lang extends HTMLPurifier_AttrTransform
{
- function transform($attr, $config, &$context) {
+ public function transform($attr, $config, $context) {
$lang = isset($attr['lang']) ? $attr['lang'] : false;
$xml_lang = isset($attr['xml:lang']) ? $attr['xml:lang'] : false;
<?php
-require_once 'HTMLPurifier/AttrTransform.php';
-
/**
* Class for handling width/height length attribute transformations to CSS
*/
class HTMLPurifier_AttrTransform_Length extends HTMLPurifier_AttrTransform
{
- var $name;
- var $cssName;
+ protected $name;
+ protected $cssName;
- function HTMLPurifier_AttrTransform_Length($name, $css_name = null) {
+ public function __construct($name, $css_name = null) {
$this->name = $name;
$this->cssName = $css_name ? $css_name : $name;
}
- function transform($attr, $config, &$context) {
+ public function transform($attr, $config, $context) {
if (!isset($attr[$this->name])) return $attr;
$length = $this->confiscateAttr($attr, $this->name);
if(ctype_digit($length)) $length .= 'px';
<?php
-require_once 'HTMLPurifier/AttrTransform.php';
-
/**
* Pre-transform that changes deprecated name attribute to ID if necessary
*/
class HTMLPurifier_AttrTransform_Name extends HTMLPurifier_AttrTransform
{
- function transform($attr, $config, &$context) {
+ public function transform($attr, $config, $context) {
if (!isset($attr['name'])) return $attr;
$id = $this->confiscateAttr($attr, 'name');
if ( isset($attr['id'])) return $attr;
--- /dev/null
+<?php
+
+/**
+ * Implements required attribute stipulation for <script>
+ */
+class HTMLPurifier_AttrTransform_ScriptRequired extends HTMLPurifier_AttrTransform
+{
+ public function transform($attr, $config, $context) {
+ if (!isset($attr['type'])) {
+ $attr['type'] = 'text/javascript';
+ }
+ return $attr;
+ }
+}
<?php
-require_once 'HTMLPurifier/AttrDef/Lang.php';
-require_once 'HTMLPurifier/AttrDef/Enum.php';
-require_once 'HTMLPurifier/AttrDef/HTML/Bool.php';
-require_once 'HTMLPurifier/AttrDef/HTML/ID.php';
-require_once 'HTMLPurifier/AttrDef/HTML/Length.php';
-require_once 'HTMLPurifier/AttrDef/HTML/MultiLength.php';
-require_once 'HTMLPurifier/AttrDef/HTML/Nmtokens.php';
-require_once 'HTMLPurifier/AttrDef/HTML/Pixels.php';
-require_once 'HTMLPurifier/AttrDef/HTML/Color.php';
-require_once 'HTMLPurifier/AttrDef/Integer.php';
-require_once 'HTMLPurifier/AttrDef/Text.php';
-require_once 'HTMLPurifier/AttrDef/URI.php';
-
/**
* Provides lookup array of attribute types to HTMLPurifier_AttrDef objects
*/
{
/**
* Lookup array of attribute string identifiers to concrete implementations
- * @protected
*/
- var $info = array();
+ protected $info = array();
/**
* Constructs the info array, supplying default implementations for attribute
* types.
*/
- function HTMLPurifier_AttrTypes() {
+ public function __construct() {
// pseudo-types, must be instantiated via shorthand
$this->info['Enum'] = new HTMLPurifier_AttrDef_Enum();
$this->info['Bool'] = new HTMLPurifier_AttrDef_HTML_Bool();
* @param $type String type name
* @return Object AttrDef for type
*/
- function get($type) {
+ public function get($type) {
// determine if there is any extra info tacked on
if (strpos($type, '#') !== false) list($type, $string) = explode('#', $type, 2);
* @param $type String type name
* @param $impl Object AttrDef for type
*/
- function set($type, $impl) {
+ public function set($type, $impl) {
$this->info[$type] = $impl;
}
}
* @param $config Instance of HTMLPurifier_Config
* @param $context Instance of HTMLPurifier_Context
*/
- function validateToken(&$token, &$config, &$context) {
+ public function validateToken(&$token, &$config, $context) {
$definition = $config->getHTMLDefinition();
$e =& $context->get('ErrorCollector', true);
$current_token =& $context->get('CurrentToken', true);
if (!$current_token) $context->register('CurrentToken', $token);
- if ($token->type !== 'start' && $token->type !== 'empty') return $token;
+ if (
+ !$token instanceof HTMLPurifier_Token_Start &&
+ !$token instanceof HTMLPurifier_Token_Empty
+ ) return $token;
// create alias to global definition array, see also $defs
// DEFINITION CALL
--- /dev/null
+<?php
+
+// constants are slow, so we use as few as possible
+if (!defined('HTMLPURIFIER_PREFIX')) {
+ define('HTMLPURIFIER_PREFIX', realpath(dirname(__FILE__) . '/..'));
+}
+
+// accomodations for versions earlier than 5.0.2
+// borrowed from PHP_Compat, LGPL licensed, by Aidan Lister <aidan@php.net>
+if (!defined('PHP_EOL')) {
+ switch (strtoupper(substr(PHP_OS, 0, 3))) {
+ case 'WIN':
+ define('PHP_EOL', "\r\n");
+ break;
+ case 'DAR':
+ define('PHP_EOL', "\r");
+ break;
+ default:
+ define('PHP_EOL', "\n");
+ }
+}
+
+/**
+ * Bootstrap class that contains meta-functionality for HTML Purifier such as
+ * the autoload function.
+ *
+ * @note
+ * This class may be used without any other files from HTML Purifier.
+ */
+class HTMLPurifier_Bootstrap
+{
+
+ /**
+ * Autoload function for HTML Purifier
+ * @param $class Class to load
+ */
+ public static function autoload($class) {
+ $file = HTMLPurifier_Bootstrap::getPath($class);
+ if (!$file) return false;
+ require HTMLPURIFIER_PREFIX . '/' . $file;
+ return true;
+ }
+
+ /**
+ * Returns the path for a specific class.
+ */
+ public static function getPath($class) {
+ if (strncmp('HTMLPurifier', $class, 12) !== 0) return false;
+ // Custom implementations
+ if (strncmp('HTMLPurifier_Language_', $class, 22) === 0) {
+ $code = str_replace('_', '-', substr($class, 22));
+ $file = 'HTMLPurifier/Language/classes/' . $code . '.php';
+ } else {
+ $file = str_replace('_', '/', $class) . '.php';
+ }
+ if (!file_exists(HTMLPURIFIER_PREFIX . '/' . $file)) return false;
+ return $file;
+ }
+
+ /**
+ * "Pre-registers" our autoloader on the SPL stack.
+ */
+ public static function registerAutoload() {
+ $autoload = array('HTMLPurifier_Bootstrap', 'autoload');
+ if ( ($funcs = spl_autoload_functions()) === false ) {
+ spl_autoload_register($autoload);
+ } elseif (function_exists('spl_autoload_unregister')) {
+ $compat = version_compare(PHP_VERSION, '5.1.2', '<=') &&
+ version_compare(PHP_VERSION, '5.1.0', '>=');
+ foreach ($funcs as $func) {
+ if (is_array($func)) {
+ // :TRICKY: There are some compatibility issues and some
+ // places where we need to error out
+ $reflector = new ReflectionMethod($func[0], $func[1]);
+ if (!$reflector->isStatic()) {
+ throw new Exception('
+ HTML Purifier autoloader registrar is not compatible
+ with non-static object methods due to PHP Bug #44144;
+ Please do not use HTMLPurifier.autoload.php (or any
+ file that includes this file); instead, place the code:
+ spl_autoload_register(array(\'HTMLPurifier_Bootstrap\', \'autoload\'))
+ after your own autoloaders.
+ ');
+ }
+ // Suprisingly, spl_autoload_register supports the
+ // Class::staticMethod callback format, although call_user_func doesn't
+ if ($compat) $func = implode('::', $func);
+ }
+ spl_autoload_unregister($func);
+ }
+ spl_autoload_register($autoload);
+ foreach ($funcs as $func) spl_autoload_register($func);
+ }
+ }
+
+}
<?php
-require_once 'HTMLPurifier/Definition.php';
-
-require_once 'HTMLPurifier/AttrDef/CSS/Background.php';
-require_once 'HTMLPurifier/AttrDef/CSS/BackgroundPosition.php';
-require_once 'HTMLPurifier/AttrDef/CSS/Border.php';
-require_once 'HTMLPurifier/AttrDef/CSS/Color.php';
-require_once 'HTMLPurifier/AttrDef/CSS/Composite.php';
-require_once 'HTMLPurifier/AttrDef/CSS/DenyElementDecorator.php';
-require_once 'HTMLPurifier/AttrDef/CSS/Font.php';
-require_once 'HTMLPurifier/AttrDef/CSS/FontFamily.php';
-require_once 'HTMLPurifier/AttrDef/CSS/Length.php';
-require_once 'HTMLPurifier/AttrDef/CSS/ListStyle.php';
-require_once 'HTMLPurifier/AttrDef/CSS/Multiple.php';
-require_once 'HTMLPurifier/AttrDef/CSS/Percentage.php';
-require_once 'HTMLPurifier/AttrDef/CSS/TextDecoration.php';
-require_once 'HTMLPurifier/AttrDef/CSS/URI.php';
-require_once 'HTMLPurifier/AttrDef/Enum.php';
-
-HTMLPurifier_ConfigSchema::define(
- 'CSS', 'DefinitionRev', 1, 'int', '
-<p>
- Revision identifier for your custom definition. See
- %HTML.DefinitionRev for details. This directive has been available
- since 2.0.0.
-</p>
-');
-
/**
* Defines allowed CSS attributes and what their values are.
* @see HTMLPurifier_HTMLDefinition
class HTMLPurifier_CSSDefinition extends HTMLPurifier_Definition
{
- var $type = 'CSS';
+ public $type = 'CSS';
/**
* Assoc array of attribute name to definition object.
*/
- var $info = array();
+ public $info = array();
/**
* Constructs the info array. The meat of this class.
*/
- function doSetup($config) {
+ protected function doSetup($config) {
$this->info['text-align'] = new HTMLPurifier_AttrDef_Enum(
array('left', 'right', 'center', 'justify'), false);
// partial support
$this->info['white-space'] = new HTMLPurifier_AttrDef_Enum(array('nowrap'));
+ if ($config->get('CSS', 'Proprietary')) {
+ $this->doSetupProprietary($config);
+ }
+
+ if ($config->get('CSS', 'AllowTricky')) {
+ $this->doSetupTricky($config);
+ }
+
+ $allow_important = $config->get('CSS', 'AllowImportant');
+ // wrap all attr-defs with decorator that handles !important
+ foreach ($this->info as $k => $v) {
+ $this->info[$k] = new HTMLPurifier_AttrDef_CSS_ImportantDecorator($v, $allow_important);
+ }
+
+ $this->setupConfigStuff($config);
+ }
+
+ protected function doSetupProprietary($config) {
+ // Internet Explorer only scrollbar colors
+ $this->info['scrollbar-arrow-color'] = new HTMLPurifier_AttrDef_CSS_Color();
+ $this->info['scrollbar-base-color'] = new HTMLPurifier_AttrDef_CSS_Color();
+ $this->info['scrollbar-darkshadow-color'] = new HTMLPurifier_AttrDef_CSS_Color();
+ $this->info['scrollbar-face-color'] = new HTMLPurifier_AttrDef_CSS_Color();
+ $this->info['scrollbar-highlight-color'] = new HTMLPurifier_AttrDef_CSS_Color();
+ $this->info['scrollbar-shadow-color'] = new HTMLPurifier_AttrDef_CSS_Color();
+
+ // technically not proprietary, but CSS3, and no one supports it
+ $this->info['opacity'] = new HTMLPurifier_AttrDef_CSS_AlphaValue();
+ $this->info['-moz-opacity'] = new HTMLPurifier_AttrDef_CSS_AlphaValue();
+ $this->info['-khtml-opacity'] = new HTMLPurifier_AttrDef_CSS_AlphaValue();
+
+ // only opacity, for now
+ $this->info['filter'] = new HTMLPurifier_AttrDef_CSS_Filter();
+
}
+ protected function doSetupTricky($config) {
+ $this->info['display'] = new HTMLPurifier_AttrDef_Enum(array(
+ 'inline', 'block', 'list-item', 'run-in', 'compact',
+ 'marker', 'table', 'inline-table', 'table-row-group',
+ 'table-header-group', 'table-footer-group', 'table-row',
+ 'table-column-group', 'table-column', 'table-cell', 'table-caption', 'none'
+ ));
+ $this->info['visibility'] = new HTMLPurifier_AttrDef_Enum(array(
+ 'visible', 'hidden', 'collapse'
+ ));
+ }
+
+
+ /**
+ * Performs extra config-based processing. Based off of
+ * HTMLPurifier_HTMLDefinition.
+ * @todo Refactor duplicate elements into common class (probably using
+ * composition, not inheritance).
+ */
+ protected function setupConfigStuff($config) {
+
+ // setup allowed elements
+ $support = "(for information on implementing this, see the ".
+ "support forums) ";
+ $allowed_attributes = $config->get('CSS', 'AllowedProperties');
+ if ($allowed_attributes !== null) {
+ foreach ($this->info as $name => $d) {
+ if(!isset($allowed_attributes[$name])) unset($this->info[$name]);
+ unset($allowed_attributes[$name]);
+ }
+ // emit errors
+ foreach ($allowed_attributes as $name => $d) {
+ // :TODO: Is this htmlspecialchars() call really necessary?
+ $name = htmlspecialchars($name);
+ trigger_error("Style attribute '$name' is not supported $support", E_USER_WARNING);
+ }
+ }
+
+ }
}
<?php
-// HTMLPurifier_ChildDef and inheritance have three types of output:
-// true = leave nodes as is
-// false = delete parent node and all children
-// array(...) = replace children nodes with these
-
-HTMLPurifier_ConfigSchema::define(
- 'Core', 'EscapeInvalidChildren', false, 'bool',
- 'When true, a child is found that is not allowed in the context of the '.
- 'parent element will be transformed into text as if it were ASCII. When '.
- 'false, that element and all internal tags will be dropped, though text '.
- 'will be preserved. There is no option for dropping the element but '.
- 'preserving child nodes.'
-);
-
/**
* Defines allowed child nodes and validates tokens against it.
*/
-class HTMLPurifier_ChildDef
+abstract class HTMLPurifier_ChildDef
{
/**
* Type of child definition, usually right-most part of class name lowercase.
* Used occasionally in terms of context.
- * @public
*/
- var $type;
+ public $type;
/**
* Bool that indicates whether or not an empty array of children is okay
*
* This is necessary for redundant checking when changes affecting
* a child node may cause a parent node to now be disallowed.
- *
- * @public
*/
- var $allow_empty;
+ public $allow_empty;
/**
* Lookup array of all elements that this definition could possibly allow
*/
- var $elements = array();
+ public $elements = array();
/**
* Validates nodes according to definition and returns modification.
*
- * @public
* @param $tokens_of_children Array of HTMLPurifier_Token
* @param $config HTMLPurifier_Config object
* @param $context HTMLPurifier_Context object
* @return bool false to remove parent node
* @return array of replacement child tokens
*/
- function validateChildren($tokens_of_children, $config, &$context) {
- trigger_error('Call to abstract function', E_USER_ERROR);
- }
+ abstract public function validateChildren($tokens_of_children, $config, $context);
}
<?php
-require_once 'HTMLPurifier/ChildDef.php';
-
/**
* Definition that uses different definitions depending on context.
*
/**
* Instance of the definition object to use when inline. Usually stricter.
- * @public
*/
- var $inline;
+ public $inline;
/**
* Instance of the definition object to use when block.
- * @public
*/
- var $block;
+ public $block;
- var $type = 'chameleon';
+ public $type = 'chameleon';
/**
* @param $inline List of elements to allow when inline.
* @param $block List of elements to allow when block.
*/
- function HTMLPurifier_ChildDef_Chameleon($inline, $block) {
+ public function __construct($inline, $block) {
$this->inline = new HTMLPurifier_ChildDef_Optional($inline);
$this->block = new HTMLPurifier_ChildDef_Optional($block);
$this->elements = $this->block->elements;
}
- function validateChildren($tokens_of_children, $config, &$context) {
+ public function validateChildren($tokens_of_children, $config, $context) {
if ($context->get('IsInline') === false) {
return $this->block->validateChildren(
$tokens_of_children, $config, $context);
<?php
-require_once 'HTMLPurifier/ChildDef.php';
-
/**
* Custom validation class, accepts DTD child definitions
*
*/
class HTMLPurifier_ChildDef_Custom extends HTMLPurifier_ChildDef
{
- var $type = 'custom';
- var $allow_empty = false;
+ public $type = 'custom';
+ public $allow_empty = false;
/**
* Allowed child pattern as defined by the DTD
*/
- var $dtd_regex;
+ public $dtd_regex;
/**
* PCRE regex derived from $dtd_regex
* @private
*/
- var $_pcre_regex;
+ private $_pcre_regex;
/**
* @param $dtd_regex Allowed child pattern from the DTD
*/
- function HTMLPurifier_ChildDef_Custom($dtd_regex) {
+ public function __construct($dtd_regex) {
$this->dtd_regex = $dtd_regex;
$this->_compileRegex();
}
/**
* Compiles the PCRE regex from a DTD regex ($dtd_regex to $_pcre_regex)
*/
- function _compileRegex() {
+ protected function _compileRegex() {
$raw = str_replace(' ', '', $this->dtd_regex);
if ($raw{0} != '(') {
$raw = "($raw)";
$this->_pcre_regex = $reg;
}
- function validateChildren($tokens_of_children, $config, &$context) {
+ public function validateChildren($tokens_of_children, $config, $context) {
$list_of_children = '';
$nesting = 0; // depth into the nest
foreach ($tokens_of_children as $token) {
$is_child = ($nesting == 0); // direct
- if ($token->type == 'start') {
+ if ($token instanceof HTMLPurifier_Token_Start) {
$nesting++;
- } elseif ($token->type == 'end') {
+ } elseif ($token instanceof HTMLPurifier_Token_End) {
$nesting--;
}
<?php
-require_once 'HTMLPurifier/ChildDef.php';
-
/**
* Definition that disallows all elements.
* @warning validateChildren() in this class is actually never called, because
*/
class HTMLPurifier_ChildDef_Empty extends HTMLPurifier_ChildDef
{
- var $allow_empty = true;
- var $type = 'empty';
- function HTMLPurifier_ChildDef_Empty() {}
- function validateChildren($tokens_of_children, $config, &$context) {
+ public $allow_empty = true;
+ public $type = 'empty';
+ public function __construct() {}
+ public function validateChildren($tokens_of_children, $config, $context) {
return array();
}
}
<?php
-require_once 'HTMLPurifier/ChildDef/Required.php';
-
/**
* Definition that allows a set of elements, and allows no children.
* @note This is a hack to reuse code from HTMLPurifier_ChildDef_Required,
*/
class HTMLPurifier_ChildDef_Optional extends HTMLPurifier_ChildDef_Required
{
- var $allow_empty = true;
- var $type = 'optional';
- function validateChildren($tokens_of_children, $config, &$context) {
+ public $allow_empty = true;
+ public $type = 'optional';
+ public function validateChildren($tokens_of_children, $config, $context) {
$result = parent::validateChildren($tokens_of_children, $config, $context);
if ($result === false) {
if (empty($tokens_of_children)) return true;
<?php
-require_once 'HTMLPurifier/ChildDef.php';
-
/**
* Definition that allows a set of elements, but disallows empty children.
*/
* Lookup table of allowed elements.
* @public
*/
- var $elements = array();
+ public $elements = array();
/**
* @param $elements List of allowed element names (lowercase).
*/
- function HTMLPurifier_ChildDef_Required($elements) {
+ public function __construct($elements) {
if (is_string($elements)) {
$elements = str_replace(' ', '', $elements);
$elements = explode('|', $elements);
}
$this->elements = $elements;
}
- var $allow_empty = false;
- var $type = 'required';
- function validateChildren($tokens_of_children, $config, &$context) {
+ public $allow_empty = false;
+ public $type = 'required';
+ public function validateChildren($tokens_of_children, $config, $context) {
// if there are no tokens, delete parent node
if (empty($tokens_of_children)) return false;
// generator
static $gen = null;
if ($gen === null) {
- $gen = new HTMLPurifier_Generator();
+ $gen = new HTMLPurifier_Generator($config, $context);
}
foreach ($tokens_of_children as $token) {
$is_child = ($nesting == 0);
- if ($token->type == 'start') {
+ if ($token instanceof HTMLPurifier_Token_Start) {
$nesting++;
- } elseif ($token->type == 'end') {
+ } elseif ($token instanceof HTMLPurifier_Token_End) {
$nesting--;
}
$is_deleting = false;
if (!isset($this->elements[$token->name])) {
$is_deleting = true;
- if ($pcdata_allowed && $token->type == 'text') {
+ if ($pcdata_allowed && $token instanceof HTMLPurifier_Token_Text) {
$result[] = $token;
} elseif ($pcdata_allowed && $escape_invalid_children) {
$result[] = new HTMLPurifier_Token_Text(
continue;
}
}
- if (!$is_deleting || ($pcdata_allowed && $token->type == 'text')) {
+ if (!$is_deleting || ($pcdata_allowed && $token instanceof HTMLPurifier_Token_Text)) {
$result[] = $token;
} elseif ($pcdata_allowed && $escape_invalid_children) {
$result[] =
<?php
-require_once 'HTMLPurifier/ChildDef/Required.php';
-
/**
* Takes the contents of blockquote when in strict and reformats for validation.
*/
-class HTMLPurifier_ChildDef_StrictBlockquote
-extends HTMLPurifier_ChildDef_Required
+class HTMLPurifier_ChildDef_StrictBlockquote extends HTMLPurifier_ChildDef_Required
{
- var $real_elements;
- var $fake_elements;
- var $allow_empty = true;
- var $type = 'strictblockquote';
- var $init = false;
- function validateChildren($tokens_of_children, $config, &$context) {
+ protected $real_elements;
+ protected $fake_elements;
+ public $allow_empty = true;
+ public $type = 'strictblockquote';
+ protected $init = false;
+ public function validateChildren($tokens_of_children, $config, $context) {
$def = $config->getHTMLDefinition();
if (!$this->init) {
if (!$is_inline) {
if (!$depth) {
if (
- ($token->type == 'text' && !$token->is_whitespace) ||
- ($token->type != 'text' && !isset($this->elements[$token->name]))
+ ($token instanceof HTMLPurifier_Token_Text && !$token->is_whitespace) ||
+ (!$token instanceof HTMLPurifier_Token_Text && !isset($this->elements[$token->name]))
) {
$is_inline = true;
$ret[] = $block_wrap_start;
} else {
if (!$depth) {
// starting tokens have been inline text / empty
- if ($token->type == 'start' || $token->type == 'empty') {
+ if ($token instanceof HTMLPurifier_Token_Start || $token instanceof HTMLPurifier_Token_Empty) {
if (isset($this->elements[$token->name])) {
// ended
$ret[] = $block_wrap_end;
}
}
$ret[] = $token;
- if ($token->type == 'start') $depth++;
- if ($token->type == 'end') $depth--;
+ if ($token instanceof HTMLPurifier_Token_Start) $depth++;
+ if ($token instanceof HTMLPurifier_Token_End) $depth--;
}
if ($is_inline) $ret[] = $block_wrap_end;
return $ret;
<?php
-require_once 'HTMLPurifier/ChildDef.php';
-
/**
* Definition for tables
*/
class HTMLPurifier_ChildDef_Table extends HTMLPurifier_ChildDef
{
- var $allow_empty = false;
- var $type = 'table';
- var $elements = array('tr' => true, 'tbody' => true, 'thead' => true,
+ public $allow_empty = false;
+ public $type = 'table';
+ public $elements = array('tr' => true, 'tbody' => true, 'thead' => true,
'tfoot' => true, 'caption' => true, 'colgroup' => true, 'col' => true);
- function HTMLPurifier_ChildDef_Table() {}
- function validateChildren($tokens_of_children, $config, &$context) {
+ public function __construct() {}
+ public function validateChildren($tokens_of_children, $config, $context) {
if (empty($tokens_of_children)) return false;
// this ensures that the loop gets run one last time before closing
if ($token === false) {
// terminating sequence started
- } elseif ($token->type == 'start') {
+ } elseif ($token instanceof HTMLPurifier_Token_Start) {
$nesting++;
- } elseif ($token->type == 'end') {
+ } elseif ($token instanceof HTMLPurifier_Token_End) {
$nesting--;
}
$collection[] = $token;
continue;
default:
- if ($token->type == 'text' && $token->is_whitespace) {
+ if ($token instanceof HTMLPurifier_Token_Text && $token->is_whitespace) {
$collection[] = $token;
$tag_index++;
}
<?php
-require_once 'HTMLPurifier/ConfigSchema.php';
-
-// member variables
-require_once 'HTMLPurifier/HTMLDefinition.php';
-require_once 'HTMLPurifier/CSSDefinition.php';
-require_once 'HTMLPurifier/URIDefinition.php';
-require_once 'HTMLPurifier/Doctype.php';
-require_once 'HTMLPurifier/DefinitionCacheFactory.php';
-
-// accomodations for versions earlier than 4.3.10 and 5.0.2
-// borrowed from PHP_Compat, LGPL licensed, by Aidan Lister <aidan@php.net>
-if (!defined('PHP_EOL')) {
- switch (strtoupper(substr(PHP_OS, 0, 3))) {
- case 'WIN':
- define('PHP_EOL', "\r\n");
- break;
- case 'DAR':
- define('PHP_EOL', "\r");
- break;
- default:
- define('PHP_EOL', "\n");
- }
-}
-
/**
* Configuration object that triggers customizable behavior.
*
* because a configuration object should always be forwarded,
* otherwise, you run the risk of missing a parameter and then
* being stumped when a configuration directive doesn't work.
+ *
+ * @todo Reconsider some of the public member variables
*/
class HTMLPurifier_Config
{
/**
* HTML Purifier's version
*/
- var $version = '2.1.4';
+ public $version = '3.1.0';
/**
- * Two-level associative array of configuration directives
+ * Bool indicator whether or not to automatically finalize
+ * the object if a read operation is done
*/
- var $conf;
+ public $autoFinalize = true;
+
+ // protected member variables
/**
- * Reference HTMLPurifier_ConfigSchema for value checking
+ * Namespace indexed array of serials for specific namespaces (see
+ * getSerial() for more info).
*/
- var $def;
+ protected $serials = array();
/**
- * Indexed array of definitions
+ * Serial for entire configuration object
*/
- var $definitions;
+ protected $serial;
/**
- * Bool indicator whether or not config is finalized
+ * Two-level associative array of configuration directives
*/
- var $finalized = false;
+ protected $conf;
/**
- * Bool indicator whether or not to automatically finalize
- * the object if a read operation is done
+ * Parser for variables
*/
- var $autoFinalize = true;
+ protected $parser;
/**
- * Namespace indexed array of serials for specific namespaces (see
- * getSerial for more info).
+ * Reference HTMLPurifier_ConfigSchema for value checking
+ * @note This is public for introspective purposes. Please don't
+ * abuse!
*/
- var $serials = array();
+ public $def;
/**
- * Serial for entire configuration object
+ * Indexed array of definitions
+ */
+ protected $definitions;
+
+ /**
+ * Bool indicator whether or not config is finalized
*/
- var $serial;
+ protected $finalized = false;
/**
* @param $definition HTMLPurifier_ConfigSchema that defines what directives
* are allowed.
*/
- function HTMLPurifier_Config(&$definition) {
+ public function __construct($definition) {
$this->conf = $definition->defaults; // set up, copy in defaults
$this->def = $definition; // keep a copy around for checking
+ $this->parser = new HTMLPurifier_VarParser_Flexible();
}
/**
* Convenience constructor that creates a config object based on a mixed var
- * @static
* @param mixed $config Variable that defines the state of the config
* object. Can be: a HTMLPurifier_Config() object,
* an array of directives based on loadArray(),
* or a string filename of an ini file.
+ * @param HTMLPurifier_ConfigSchema Schema object
* @return Configured HTMLPurifier_Config object
*/
- function create($config) {
- if (is_a($config, 'HTMLPurifier_Config')) {
+ public static function create($config, $schema = null) {
+ if ($config instanceof HTMLPurifier_Config) {
// pass-through
return $config;
}
- $ret = HTMLPurifier_Config::createDefault();
+ if (!$schema) {
+ $ret = HTMLPurifier_Config::createDefault();
+ } else {
+ $ret = new HTMLPurifier_Config($schema);
+ }
if (is_string($config)) $ret->loadIni($config);
elseif (is_array($config)) $ret->loadArray($config);
return $ret;
/**
* Convenience constructor that creates a default configuration object.
- * @static
* @return Default HTMLPurifier_Config object.
*/
- function createDefault() {
- $definition =& HTMLPurifier_ConfigSchema::instance();
+ public static function createDefault() {
+ $definition = HTMLPurifier_ConfigSchema::instance();
$config = new HTMLPurifier_Config($definition);
return $config;
}
* @param $namespace String namespace
* @param $key String key
*/
- function get($namespace, $key, $from_alias = false) {
+ public function get($namespace, $key) {
if (!$this->finalized && $this->autoFinalize) $this->finalize();
if (!isset($this->def->info[$namespace][$key])) {
// can't add % due to SimpleTest bug
* Retreives an array of directives to values from a given namespace
* @param $namespace String namespace
*/
- function getBatch($namespace) {
+ public function getBatch($namespace) {
if (!$this->finalized && $this->autoFinalize) $this->finalize();
if (!isset($this->def->info[$namespace])) {
trigger_error('Cannot retrieve undefined namespace ' . htmlspecialchars($namespace),
* before processing!
* @param $namespace Namespace to get serial for
*/
- function getBatchSerial($namespace) {
+ public function getBatchSerial($namespace) {
if (empty($this->serials[$namespace])) {
$batch = $this->getBatch($namespace);
unset($batch['DefinitionRev']);
* Returns a md5 signature for the entire configuration object
* that uniquely identifies that particular configuration
*/
- function getSerial() {
+ public function getSerial() {
if (empty($this->serial)) {
$this->serial = md5(serialize($this->getAll()));
}
/**
* Retrieves all directives, organized by namespace
*/
- function getAll() {
+ public function getAll() {
if (!$this->finalized && $this->autoFinalize) $this->finalize();
return $this->conf;
}
* @param $key String key
* @param $value Mixed value
*/
- function set($namespace, $key, $value, $from_alias = false) {
+ public function set($namespace, $key, $value, $from_alias = false) {
if ($this->isFinalized('Cannot set directive after finalization')) return;
if (!isset($this->def->info[$namespace][$key])) {
trigger_error('Cannot set undefined directive ' . htmlspecialchars("$namespace.$key") . ' to value',
if ($this->def->info[$namespace][$key]->class == 'alias') {
if ($from_alias) {
trigger_error('Double-aliases not allowed, please fix '.
- 'ConfigSchema bug with' . "$namespace.$key");
+ 'ConfigSchema bug with' . "$namespace.$key", E_USER_ERROR);
+ return;
}
- $this->set($this->def->info[$namespace][$key]->namespace,
- $this->def->info[$namespace][$key]->name,
+ $this->set($new_ns = $this->def->info[$namespace][$key]->namespace,
+ $new_dir = $this->def->info[$namespace][$key]->name,
$value, true);
+ trigger_error("$namespace.$key is an alias, preferred directive name is $new_ns.$new_dir", E_USER_NOTICE);
+ return;
+ }
+ try {
+ $value = $this->parser->parse(
+ $value,
+ $type = $this->def->info[$namespace][$key]->type,
+ $this->def->info[$namespace][$key]->allow_null
+ );
+ } catch (HTMLPurifier_VarParserException $e) {
+ trigger_error('Value for ' . "$namespace.$key" . ' is of invalid type, should be ' . $type, E_USER_WARNING);
return;
}
- $value = $this->def->validate(
- $value,
- $type = $this->def->info[$namespace][$key]->type,
- $this->def->info[$namespace][$key]->allow_null
- );
if (is_string($value)) {
// resolve value alias if defined
if (isset($this->def->info[$namespace][$key]->aliases[$value])) {
}
}
}
- if ($this->def->isError($value)) {
- trigger_error('Value for ' . "$namespace.$key" . ' is of invalid type, should be ' . $type, E_USER_WARNING);
- return;
- }
$this->conf[$namespace][$key] = $value;
// reset definitions if the directives they depend on changed
/**
* Convenience function for error reporting
- * @private
*/
- function _listify($lookup) {
+ private function _listify($lookup) {
$list = array();
foreach ($lookup as $name => $b) $list[] = $name;
return implode(', ', $list);
}
/**
- * Retrieves reference to the HTML definition.
+ * Retrieves object reference to the HTML definition.
* @param $raw Return a copy that has not been setup yet. Must be
* called before it's been setup, otherwise won't work.
*/
- function &getHTMLDefinition($raw = false) {
- $def =& $this->getDefinition('HTML', $raw);
- return $def; // prevent PHP 4.4.0 from complaining
+ public function getHTMLDefinition($raw = false) {
+ return $this->getDefinition('HTML', $raw);
}
/**
- * Retrieves reference to the CSS definition
+ * Retrieves object reference to the CSS definition
+ * @param $raw Return a copy that has not been setup yet. Must be
+ * called before it's been setup, otherwise won't work.
*/
- function &getCSSDefinition($raw = false) {
- $def =& $this->getDefinition('CSS', $raw);
- return $def;
+ public function getCSSDefinition($raw = false) {
+ return $this->getDefinition('CSS', $raw);
}
/**
* @param $type Type of definition: HTML, CSS, etc
* @param $raw Whether or not definition should be returned raw
*/
- function &getDefinition($type, $raw = false) {
+ public function getDefinition($type, $raw = false) {
if (!$this->finalized && $this->autoFinalize) $this->finalize();
$factory = HTMLPurifier_DefinitionCacheFactory::instance();
$cache = $factory->create($type, $this);
} elseif ($type == 'URI') {
$this->definitions[$type] = new HTMLPurifier_URIDefinition();
} else {
- trigger_error("Definition of $type type not supported");
- $false = false;
- return $false;
+ throw new HTMLPurifier_Exception("Definition of $type type not supported");
}
// quick abort if raw
if ($raw) {
if (is_null($this->get($type, 'DefinitionID'))) {
// fatally error out if definition ID not set
- trigger_error("Cannot retrieve raw version without specifying %$type.DefinitionID", E_USER_ERROR);
- $false = new HTMLPurifier_Error();
- return $false;
+ throw new HTMLPurifier_Exception("Cannot retrieve raw version without specifying %$type.DefinitionID");
}
return $this->definitions[$type];
}
* Namespace.Directive => Value
* @param $config_array Configuration associative array
*/
- function loadArray($config_array) {
+ public function loadArray($config_array) {
if ($this->isFinalized('Cannot load directives after finalization')) return;
foreach ($config_array as $key => $value) {
$key = str_replace('_', '.', $key);
* that are allowed in a web-form context as per an allowed
* namespaces/directives list.
* @param $allowed List of allowed namespaces/directives
- * @static
*/
- function getAllowedDirectivesForForm($allowed) {
- $schema = HTMLPurifier_ConfigSchema::instance();
+ public static function getAllowedDirectivesForForm($allowed, $schema = null) {
+ if (!$schema) {
+ $schema = HTMLPurifier_ConfigSchema::instance();
+ }
if ($allowed !== true) {
if (is_string($allowed)) $allowed = array($allowed);
$allowed_ns = array();
* @param $index Index/name that the config variables are in
* @param $allowed List of allowed namespaces/directives
* @param $mq_fix Boolean whether or not to enable magic quotes fix
- * @static
+ * @param $schema Instance of HTMLPurifier_ConfigSchema to use, if not global copy
*/
- function loadArrayFromForm($array, $index, $allowed = true, $mq_fix = true) {
- $ret = HTMLPurifier_Config::prepareArrayFromForm($array, $index, $allowed, $mq_fix);
- $config = HTMLPurifier_Config::create($ret);
+ public static function loadArrayFromForm($array, $index = false, $allowed = true, $mq_fix = true, $schema = null) {
+ $ret = HTMLPurifier_Config::prepareArrayFromForm($array, $index, $allowed, $mq_fix, $schema);
+ $config = HTMLPurifier_Config::create($ret, $schema);
return $config;
}
* Merges in configuration values from $_GET/$_POST to object. NOT STATIC.
* @note Same parameters as loadArrayFromForm
*/
- function mergeArrayFromForm($array, $index, $allowed = true, $mq_fix = true) {
- $ret = HTMLPurifier_Config::prepareArrayFromForm($array, $index, $allowed, $mq_fix);
+ public function mergeArrayFromForm($array, $index = false, $allowed = true, $mq_fix = true) {
+ $ret = HTMLPurifier_Config::prepareArrayFromForm($array, $index, $allowed, $mq_fix, $this->def);
$this->loadArray($ret);
}
/**
* Prepares an array from a form into something usable for the more
* strict parts of HTMLPurifier_Config
- * @static
*/
- function prepareArrayFromForm($array, $index, $allowed = true, $mq_fix = true) {
- $array = (isset($array[$index]) && is_array($array[$index])) ? $array[$index] : array();
- $mq = get_magic_quotes_gpc() && $mq_fix;
+ public static function prepareArrayFromForm($array, $index = false, $allowed = true, $mq_fix = true, $schema = null) {
+ if ($index !== false) $array = (isset($array[$index]) && is_array($array[$index])) ? $array[$index] : array();
+ $mq = $mq_fix && function_exists('get_magic_quotes_gpc') && get_magic_quotes_gpc();
- $allowed = HTMLPurifier_Config::getAllowedDirectivesForForm($allowed);
+ $allowed = HTMLPurifier_Config::getAllowedDirectivesForForm($allowed, $schema);
$ret = array();
foreach ($allowed as $key) {
list($ns, $directive) = $key;
* Loads configuration values from an ini file
* @param $filename Name of ini file
*/
- function loadIni($filename) {
+ public function loadIni($filename) {
if ($this->isFinalized('Cannot load directives after finalization')) return;
$array = parse_ini_file($filename, true);
$this->loadArray($array);
* Checks whether or not the configuration object is finalized.
* @param $error String error message, or false for no error
*/
- function isFinalized($error = false) {
+ public function isFinalized($error = false) {
if ($this->finalized && $error) {
trigger_error($error, E_USER_ERROR);
}
* Finalizes configuration only if auto finalize is on and not
* already finalized
*/
- function autoFinalize() {
+ public function autoFinalize() {
if (!$this->finalized && $this->autoFinalize) $this->finalize();
}
/**
* Finalizes a configuration object, prohibiting further change
*/
- function finalize() {
+ public function finalize() {
$this->finalized = true;
}
}
+
/**
* Base class for configuration entity
*/
-class HTMLPurifier_ConfigDef {
- var $class = false;
+abstract class HTMLPurifier_ConfigDef {
+ public $class = false;
}
<?php
-require_once 'HTMLPurifier/ConfigDef.php';
-
/**
* Structure object containing definition of a directive.
* @note This structure does not contain default values
class HTMLPurifier_ConfigDef_Directive extends HTMLPurifier_ConfigDef
{
- var $class = 'directive';
+ public $class = 'directive';
- function HTMLPurifier_ConfigDef_Directive(
+ public function __construct(
$type = null,
- $descriptions = null,
$allow_null = null,
$allowed = null,
$aliases = null
) {
- if ( $type !== null) $this->type = $type;
- if ($descriptions !== null) $this->descriptions = $descriptions;
- if ( $allow_null !== null) $this->allow_null = $allow_null;
- if ( $allowed !== null) $this->allowed = $allowed;
- if ( $aliases !== null) $this->aliases = $aliases;
+ if ( $type !== null) $this->type = $type;
+ if ( $allow_null !== null) $this->allow_null = $allow_null;
+ if ( $allowed !== null) $this->allowed = $allowed;
+ if ( $aliases !== null) $this->aliases = $aliases;
}
/**
* - hash (array of key => value)
* - mixed (anything goes)
*/
- var $type = 'mixed';
-
- /**
- * Plaintext descriptions of the configuration entity is. Organized by
- * file and line number, so multiple descriptions are allowed.
- */
- var $descriptions = array();
+ public $type = 'mixed';
/**
* Is null allowed? Has no effect for mixed type.
* @bool
*/
- var $allow_null = false;
+ public $allow_null = false;
/**
* Lookup table of allowed values of the element, bool true if all allowed.
*/
- var $allowed = true;
+ public $allowed = true;
/**
* Hash of value aliases, i.e. values that are equivalent.
*/
- var $aliases = array();
-
- /**
- * Advisory list of directive aliases, i.e. other directives that
- * redirect here
- */
- var $directiveAliases = array();
-
- /**
- * Adds a description to the array
- */
- function addDescription($file, $line, $description) {
- if (!isset($this->descriptions[$file])) $this->descriptions[$file] = array();
- $this->descriptions[$file][$line] = $description;
- }
+ public $aliases = array();
}
<?php
-require_once 'HTMLPurifier/ConfigDef.php';
-
/**
* Structure object describing a directive alias
*/
class HTMLPurifier_ConfigDef_DirectiveAlias extends HTMLPurifier_ConfigDef
{
- var $class = 'alias';
+ public $class = 'alias';
/**
* Namespace being aliased to
*/
- var $namespace;
+ public $namespace;
/**
* Directive being aliased to
*/
- var $name;
+ public $name;
- function HTMLPurifier_ConfigDef_DirectiveAlias($namespace, $name) {
+ public function __construct($namespace, $name) {
$this->namespace = $namespace;
$this->name = $name;
}
<?php
-require_once 'HTMLPurifier/ConfigDef.php';
-
/**
* Structure object describing of a namespace
*/
-class HTMLPurifier_ConfigDef_Namespace extends HTMLPurifier_ConfigDef {
-
- function HTMLPurifier_ConfigDef_Namespace($description = null) {
- $this->description = $description;
- }
-
- var $class = 'namespace';
-
- /**
- * String description of what kinds of directives go in this namespace.
- */
- var $description;
-
+class HTMLPurifier_ConfigDef_Namespace extends HTMLPurifier_ConfigDef
+{
+ public $class = 'namespace';
}
<?php
-require_once 'HTMLPurifier/Error.php';
-require_once 'HTMLPurifier/ConfigDef.php';
-require_once 'HTMLPurifier/ConfigDef/Namespace.php';
-require_once 'HTMLPurifier/ConfigDef/Directive.php';
-require_once 'HTMLPurifier/ConfigDef/DirectiveAlias.php';
-
-if (!defined('HTMLPURIFIER_SCHEMA_STRICT')) define('HTMLPURIFIER_SCHEMA_STRICT', false);
-
/**
* Configuration definition, defines directives and their defaults.
- * @note If you update this, please update Printer_ConfigForm
- * @todo The ability to define things multiple times is confusing and should
- * be factored out to its own function named registerDependency() or
- * addNote(), where only the namespace.name and an extra descriptions
- * documenting the nature of the dependency are needed. Since it's
- * possible that the dependency is registered before the configuration
- * is defined, deferring it to some sort of cache until it actually
- * gets defined would be wise, keeping it opaque until it does get
- * defined. We could add a finalize() method which would cause it to
- * error out if we get a dangling dependency. It's difficult, however,
- * to know whether or not it's a dependency, or a codependency, that is
- * neither of them fully depends on it. Where does the configuration go
- * then? This could be partially resolved by allowing blanket definitions
- * and then splitting them up into finer-grained versions, however, there
- * might be implementation difficulties in ini files regarding order of
- * execution.
*/
class HTMLPurifier_ConfigSchema {
* Defaults of the directives and namespaces.
* @note This shares the exact same structure as HTMLPurifier_Config::$conf
*/
- var $defaults = array();
+ public $defaults = array();
/**
* Definition of the directives.
*/
- var $info = array();
+ public $info = array();
/**
- * Definition of namespaces.
+ * Application-wide singleton
*/
- var $info_namespace = array();
+ static protected $singleton;
/**
- * Lookup table of allowed types.
+ * Variable parser.
*/
- var $types = array(
- 'string' => 'String',
- 'istring' => 'Case-insensitive string',
- 'text' => 'Text',
- 'itext' => 'Case-insensitive text',
- 'int' => 'Integer',
- 'float' => 'Float',
- 'bool' => 'Boolean',
- 'lookup' => 'Lookup array',
- 'list' => 'Array list',
- 'hash' => 'Associative array',
- 'mixed' => 'Mixed'
- );
+ protected $parser;
+
+ public function __construct() {
+ $this->parser = new HTMLPurifier_VarParser_Flexible();
+ }
/**
- * Initializes the default namespaces.
+ * Unserializes the default ConfigSchema.
*/
- function initialize() {
- $this->defineNamespace('Core', 'Core features that are always available.');
- $this->defineNamespace('Attr', 'Features regarding attribute validation.');
- $this->defineNamespace('URI', 'Features regarding Uniform Resource Identifiers.');
- $this->defineNamespace('HTML', 'Configuration regarding allowed HTML.');
- $this->defineNamespace('CSS', 'Configuration regarding allowed CSS.');
- $this->defineNamespace('AutoFormat', 'Configuration for activating auto-formatting functionality (also known as <code>Injector</code>s)');
- $this->defineNamespace('AutoFormatParam', 'Configuration for customizing auto-formatting functionality');
- $this->defineNamespace('Output', 'Configuration relating to the generation of (X)HTML.');
- $this->defineNamespace('Cache', 'Configuration for DefinitionCache and related subclasses.');
- $this->defineNamespace('Test', 'Developer testing configuration for our unit tests.');
+ public static function makeFromSerial() {
+ return unserialize(file_get_contents(HTMLPURIFIER_PREFIX . '/HTMLPurifier/ConfigSchema/schema.ser'));
}
/**
* Retrieves an instance of the application-wide configuration definition.
- * @static
*/
- function &instance($prototype = null) {
- static $instance;
+ public static function instance($prototype = null) {
if ($prototype !== null) {
- $instance = $prototype;
- } elseif ($instance === null || $prototype === true) {
- $instance = new HTMLPurifier_ConfigSchema();
- $instance->initialize();
+ HTMLPurifier_ConfigSchema::$singleton = $prototype;
+ } elseif (HTMLPurifier_ConfigSchema::$singleton === null || $prototype === true) {
+ HTMLPurifier_ConfigSchema::$singleton = HTMLPurifier_ConfigSchema::makeFromSerial();
}
- return $instance;
+ return HTMLPurifier_ConfigSchema::$singleton;
}
/**
* Defines a directive for configuration
- * @static
- * @warning Will fail of directive's namespace is defined
+ * @warning Will fail of directive's namespace is defined.
+ * @warning This method's signature is slightly different from the legacy
+ * define() static method! Beware!
* @param $namespace Namespace the directive is in
* @param $name Key of directive
* @param $default Default value of directive
* @param $type Allowed type of the directive. See
* HTMLPurifier_DirectiveDef::$type for allowed values
- * @param $description Description of directive for documentation
+ * @param $allow_null Whether or not to allow null values
*/
- function define($namespace, $name, $default, $type, $description) {
- $def =& HTMLPurifier_ConfigSchema::instance();
-
- // basic sanity checks
- if (HTMLPURIFIER_SCHEMA_STRICT) {
- if (!isset($def->info[$namespace])) {
- trigger_error('Cannot define directive for undefined namespace',
- E_USER_ERROR);
- return;
- }
- if (!ctype_alnum($name)) {
- trigger_error('Directive name must be alphanumeric',
- E_USER_ERROR);
- return;
- }
- if (empty($description)) {
- trigger_error('Description must be non-empty',
- E_USER_ERROR);
- return;
- }
- }
-
- if (isset($def->info[$namespace][$name])) {
- // already defined
- if (
- $def->info[$namespace][$name]->type !== $type ||
- $def->defaults[$namespace][$name] !== $default
- ) {
- trigger_error('Inconsistent default or type, cannot redefine');
- return;
- }
- } else {
- // needs defining
-
- // process modifiers (OPTIMIZE!)
- $type_values = explode('/', $type, 2);
- $type = $type_values[0];
- $modifier = isset($type_values[1]) ? $type_values[1] : false;
- $allow_null = ($modifier === 'null');
-
- if (HTMLPURIFIER_SCHEMA_STRICT) {
- if (!isset($def->types[$type])) {
- trigger_error('Invalid type for configuration directive',
- E_USER_ERROR);
- return;
- }
- $default = $def->validate($default, $type, $allow_null);
- if ($def->isError($default)) {
- trigger_error('Default value does not match directive type',
- E_USER_ERROR);
- return;
- }
- }
-
- $def->info[$namespace][$name] =
- new HTMLPurifier_ConfigDef_Directive();
- $def->info[$namespace][$name]->type = $type;
- $def->info[$namespace][$name]->allow_null = $allow_null;
- $def->defaults[$namespace][$name] = $default;
- }
- if (!HTMLPURIFIER_SCHEMA_STRICT) return;
- $backtrace = debug_backtrace();
- $file = $def->mungeFilename($backtrace[0]['file']);
- $line = $backtrace[0]['line'];
- $def->info[$namespace][$name]->addDescription($file,$line,$description);
+ public function add($namespace, $name, $default, $type, $allow_null) {
+ $default = $this->parser->parse($default, $type, $allow_null);
+ $this->info[$namespace][$name] = new HTMLPurifier_ConfigDef_Directive();
+ $this->info[$namespace][$name]->type = $type;
+ $this->info[$namespace][$name]->allow_null = $allow_null;
+ $this->defaults[$namespace][$name] = $default;
}
/**
* Defines a namespace for directives to be put into.
- * @static
+ * @warning This is slightly different from the corresponding static
+ * method.
* @param $namespace Namespace's name
- * @param $description Description of the namespace
*/
- function defineNamespace($namespace, $description) {
- $def =& HTMLPurifier_ConfigSchema::instance();
- if (HTMLPURIFIER_SCHEMA_STRICT) {
- if (isset($def->info[$namespace])) {
- trigger_error('Cannot redefine namespace', E_USER_ERROR);
- return;
- }
- if (!ctype_alnum($namespace)) {
- trigger_error('Namespace name must be alphanumeric',
- E_USER_ERROR);
- return;
- }
- if (empty($description)) {
- trigger_error('Description must be non-empty',
- E_USER_ERROR);
- return;
- }
- }
- $def->info[$namespace] = array();
- $def->info_namespace[$namespace] = new HTMLPurifier_ConfigDef_Namespace();
- $def->info_namespace[$namespace]->description = $description;
- $def->defaults[$namespace] = array();
+ public function addNamespace($namespace) {
+ $this->info[$namespace] = array();
+ $this->defaults[$namespace] = array();
}
/**
*
* Directive value aliases are convenient for developers because it lets
* them set a directive to several values and get the same result.
- * @static
* @param $namespace Directive's namespace
* @param $name Name of Directive
- * @param $alias Name of aliased value
- * @param $real Value aliased value will be converted into
+ * @param $aliases Hash of aliased values to the real alias
*/
- function defineValueAliases($namespace, $name, $aliases) {
- $def =& HTMLPurifier_ConfigSchema::instance();
- if (HTMLPURIFIER_SCHEMA_STRICT && !isset($def->info[$namespace][$name])) {
- trigger_error('Cannot set value alias for non-existant directive',
- E_USER_ERROR);
- return;
- }
+ public function addValueAliases($namespace, $name, $aliases) {
foreach ($aliases as $alias => $real) {
- if (HTMLPURIFIER_SCHEMA_STRICT) {
- if (!$def->info[$namespace][$name] !== true &&
- !isset($def->info[$namespace][$name]->allowed[$real])
- ) {
- trigger_error('Cannot define alias to value that is not allowed',
- E_USER_ERROR);
- return;
- }
- if (isset($def->info[$namespace][$name]->allowed[$alias])) {
- trigger_error('Cannot define alias over allowed value',
- E_USER_ERROR);
- return;
- }
- }
- $def->info[$namespace][$name]->aliases[$alias] = $real;
+ $this->info[$namespace][$name]->aliases[$alias] = $real;
}
}
/**
* Defines a set of allowed values for a directive.
- * @static
+ * @warning This is slightly different from the corresponding static
+ * method definition.
* @param $namespace Namespace of directive
* @param $name Name of directive
- * @param $allowed_values Arraylist of allowed values
+ * @param $allowed Lookup array of allowed values
*/
- function defineAllowedValues($namespace, $name, $allowed_values) {
- $def =& HTMLPurifier_ConfigSchema::instance();
- if (HTMLPURIFIER_SCHEMA_STRICT && !isset($def->info[$namespace][$name])) {
- trigger_error('Cannot define allowed values for undefined directive',
- E_USER_ERROR);
- return;
- }
- $directive =& $def->info[$namespace][$name];
- $type = $directive->type;
- if (HTMLPURIFIER_SCHEMA_STRICT && $type != 'string' && $type != 'istring') {
- trigger_error('Cannot define allowed values for directive whose type is not string',
- E_USER_ERROR);
- return;
- }
- if ($directive->allowed === true) {
- $directive->allowed = array();
- }
- foreach ($allowed_values as $value) {
- $directive->allowed[$value] = true;
- }
- if (
- HTMLPURIFIER_SCHEMA_STRICT &&
- $def->defaults[$namespace][$name] !== null &&
- !isset($directive->allowed[$def->defaults[$namespace][$name]])
- ) {
- trigger_error('Default value must be in allowed range of variables',
- E_USER_ERROR);
- $directive->allowed = true; // undo undo!
- return;
- }
+ public function addAllowedValues($namespace, $name, $allowed) {
+ $type = $this->info[$namespace][$name]->type;
+ $this->info[$namespace][$name]->allowed = $allowed;
}
/**
* Defines a directive alias for backwards compatibility
- * @static
* @param $namespace
* @param $name Directive that will be aliased
* @param $new_namespace
* @param $new_name Directive that the alias will be to
*/
- function defineAlias($namespace, $name, $new_namespace, $new_name) {
- $def =& HTMLPurifier_ConfigSchema::instance();
- if (HTMLPURIFIER_SCHEMA_STRICT) {
- if (!isset($def->info[$namespace])) {
- trigger_error('Cannot define directive alias in undefined namespace',
- E_USER_ERROR);
- return;
- }
- if (!ctype_alnum($name)) {
- trigger_error('Directive name must be alphanumeric',
- E_USER_ERROR);
- return;
- }
- if (isset($def->info[$namespace][$name])) {
- trigger_error('Cannot define alias over directive',
- E_USER_ERROR);
- return;
- }
- if (!isset($def->info[$new_namespace][$new_name])) {
- trigger_error('Cannot define alias to undefined directive',
- E_USER_ERROR);
- return;
- }
- if ($def->info[$new_namespace][$new_name]->class == 'alias') {
- trigger_error('Cannot define alias to alias',
- E_USER_ERROR);
- return;
- }
- }
- $def->info[$namespace][$name] =
- new HTMLPurifier_ConfigDef_DirectiveAlias(
- $new_namespace, $new_name);
- $def->info[$new_namespace][$new_name]->directiveAliases[] = "$namespace.$name";
+ public function addAlias($namespace, $name, $new_namespace, $new_name) {
+ $this->info[$namespace][$name] = new HTMLPurifier_ConfigDef_DirectiveAlias($new_namespace, $new_name);
}
- /**
- * Validate a variable according to type. Return null if invalid.
- */
- function validate($var, $type, $allow_null = false) {
- if (!isset($this->types[$type])) {
- trigger_error('Invalid type', E_USER_ERROR);
- return;
- }
- if ($allow_null && $var === null) return null;
- switch ($type) {
- case 'mixed':
- //if (is_string($var)) $var = unserialize($var);
- return $var;
- case 'istring':
- case 'string':
- case 'text': // no difference, just is longer/multiple line string
- case 'itext':
- if (!is_string($var)) break;
- if ($type === 'istring' || $type === 'itext') $var = strtolower($var);
- return $var;
- case 'int':
- if (is_string($var) && ctype_digit($var)) $var = (int) $var;
- elseif (!is_int($var)) break;
- return $var;
- case 'float':
- if (is_string($var) && is_numeric($var)) $var = (float) $var;
- elseif (!is_float($var)) break;
- return $var;
- case 'bool':
- if (is_int($var) && ($var === 0 || $var === 1)) {
- $var = (bool) $var;
- } elseif (is_string($var)) {
- if ($var == 'on' || $var == 'true' || $var == '1') {
- $var = true;
- } elseif ($var == 'off' || $var == 'false' || $var == '0') {
- $var = false;
- } else {
- break;
- }
- } elseif (!is_bool($var)) break;
- return $var;
- case 'list':
- case 'hash':
- case 'lookup':
- if (is_string($var)) {
- // special case: technically, this is an array with
- // a single empty string item, but having an empty
- // array is more intuitive
- if ($var == '') return array();
- if (strpos($var, "\n") === false && strpos($var, "\r") === false) {
- // simplistic string to array method that only works
- // for simple lists of tag names or alphanumeric characters
- $var = explode(',',$var);
- } else {
- $var = preg_split('/(,|[\n\r]+)/', $var);
- }
- // remove spaces
- foreach ($var as $i => $j) $var[$i] = trim($j);
- if ($type === 'hash') {
- // key:value,key2:value2
- $nvar = array();
- foreach ($var as $keypair) {
- $c = explode(':', $keypair, 2);
- if (!isset($c[1])) continue;
- $nvar[$c[0]] = $c[1];
- }
- $var = $nvar;
- }
- }
- if (!is_array($var)) break;
- $keys = array_keys($var);
- if ($keys === array_keys($keys)) {
- if ($type == 'list') return $var;
- elseif ($type == 'lookup') {
- $new = array();
- foreach ($var as $key) {
- $new[$key] = true;
- }
- return $new;
- } else break;
- }
- if ($type === 'lookup') {
- foreach ($var as $key => $value) {
- $var[$key] = true;
- }
- }
- return $var;
+ // DEPRECATED METHODS
+
+ /** @see HTMLPurifier_ConfigSchema->set() */
+ public static function define($namespace, $name, $default, $type, $description) {
+ HTMLPurifier_ConfigSchema::deprecated(__METHOD__);
+ // process modifiers (OPTIMIZE!)
+ $type_values = explode('/', $type, 2);
+ $type = $type_values[0];
+ $modifier = isset($type_values[1]) ? $type_values[1] : false;
+ $allow_null = ($modifier === 'null');
+ $def = HTMLPurifier_ConfigSchema::instance();
+ $def->add($namespace, $name, $default, $type, $allow_null);
+ }
+
+ /** @see HTMLPurifier_ConfigSchema->addNamespace() */
+ public static function defineNamespace($namespace, $description) {
+ HTMLPurifier_ConfigSchema::deprecated(__METHOD__);
+ $def = HTMLPurifier_ConfigSchema::instance();
+ $def->addNamespace($namespace);
+ }
+
+ /** @see HTMLPurifier_ConfigSchema->addValueAliases() */
+ public static function defineValueAliases($namespace, $name, $aliases) {
+ HTMLPurifier_ConfigSchema::deprecated(__METHOD__);
+ $def = HTMLPurifier_ConfigSchema::instance();
+ $def->addValueAliases($namespace, $name, $aliases);
+ }
+
+ /** @see HTMLPurifier_ConfigSchema->addAllowedValues() */
+ public static function defineAllowedValues($namespace, $name, $allowed_values) {
+ HTMLPurifier_ConfigSchema::deprecated(__METHOD__);
+ $allowed = array();
+ foreach ($allowed_values as $value) {
+ $allowed[$value] = true;
}
- $error = new HTMLPurifier_Error();
- return $error;
+ $def = HTMLPurifier_ConfigSchema::instance();
+ $def->addAllowedValues($namespace, $name, $allowed);
}
- /**
- * Takes an absolute path and munges it into a more manageable relative path
- */
- function mungeFilename($filename) {
- if (!HTMLPURIFIER_SCHEMA_STRICT) return $filename;
- $offset = strrpos($filename, 'HTMLPurifier');
- $filename = substr($filename, $offset);
- $filename = str_replace('\\', '/', $filename);
- return $filename;
+ /** @see HTMLPurifier_ConfigSchema->addAlias() */
+ public static function defineAlias($namespace, $name, $new_namespace, $new_name) {
+ HTMLPurifier_ConfigSchema::deprecated(__METHOD__);
+ $def = HTMLPurifier_ConfigSchema::instance();
+ $def->addAlias($namespace, $name, $new_namespace, $new_name);
+ }
+
+ /** @deprecated, use HTMLPurifier_VarParser->parse() */
+ public function validate($a, $b, $c = false) {
+ trigger_error("HTMLPurifier_ConfigSchema->validate deprecated, use HTMLPurifier_VarParser->parse instead", E_USER_NOTICE);
+ return $this->parser->parse($a, $b, $c);
}
/**
- * Checks if var is an HTMLPurifier_Error object
+ * Throws an E_USER_NOTICE stating that a method is deprecated.
*/
- function isError($var) {
- if (!is_object($var)) return false;
- if (!is_a($var, 'HTMLPurifier_Error')) return false;
- return true;
+ private static function deprecated($method) {
+ trigger_error("Static HTMLPurifier_ConfigSchema::$method deprecated, use add*() method instead", E_USER_NOTICE);
}
+
}
--- /dev/null
+<?php
+
+/**
+ * Converts HTMLPurifier_ConfigSchema_Interchange to our runtime
+ * representation used to perform checks on user configuration.
+ */
+class HTMLPurifier_ConfigSchema_Builder_ConfigSchema
+{
+
+ public function build($interchange) {
+ $schema = new HTMLPurifier_ConfigSchema();
+ foreach ($interchange->namespaces as $n) {
+ $schema->addNamespace($n->namespace);
+ }
+ foreach ($interchange->directives as $d) {
+ $schema->add(
+ $d->id->namespace,
+ $d->id->directive,
+ $d->default,
+ $d->type,
+ $d->typeAllowsNull
+ );
+ if ($d->allowed !== null) {
+ $schema->addAllowedValues(
+ $d->id->namespace,
+ $d->id->directive,
+ $d->allowed
+ );
+ }
+ foreach ($d->aliases as $alias) {
+ $schema->addAlias(
+ $alias->namespace,
+ $alias->directive,
+ $d->id->namespace,
+ $d->id->directive
+ );
+ }
+ if ($d->valueAliases !== null) {
+ $schema->addValueAliases(
+ $d->id->namespace,
+ $d->id->directive,
+ $d->valueAliases
+ );
+ }
+ }
+ return $schema;
+ }
+
+}
--- /dev/null
+<?php
+
+/**
+ * Converts HTMLPurifier_ConfigSchema_Interchange to an XML format,
+ * which can be further processed to generate documentation.
+ */
+class HTMLPurifier_ConfigSchema_Builder_Xml extends XMLWriter
+{
+
+ protected $interchange;
+
+ protected function writeHTMLDiv($html) {
+ $this->startElement('div');
+
+ $purifier = HTMLPurifier::getInstance();
+ $html = $purifier->purify($html);
+ $this->writeAttribute('xmlns', 'http://www.w3.org/1999/xhtml');
+ $this->writeRaw($html);
+
+ $this->endElement(); // div
+ }
+
+ protected function export($var) {
+ if ($var === array()) return 'array()';
+ return var_export($var, true);
+ }
+
+ public function build($interchange) {
+ // global access, only use as last resort
+ $this->interchange = $interchange;
+
+ $this->setIndent(true);
+ $this->startDocument('1.0', 'UTF-8');
+ $this->startElement('configdoc');
+ $this->writeElement('title', $interchange->name);
+
+ foreach ($interchange->namespaces as $namespace) {
+ $this->buildNamespace($namespace);
+ }
+
+ $this->endElement(); // configdoc
+ $this->flush();
+ }
+
+ public function buildNamespace($namespace) {
+ $this->startElement('namespace');
+ $this->writeAttribute('id', $namespace->namespace);
+
+ $this->writeElement('name', $namespace->namespace);
+ $this->startElement('description');
+ $this->writeHTMLDiv($namespace->description);
+ $this->endElement(); // description
+
+ foreach ($this->interchange->directives as $directive) {
+ if ($directive->id->namespace !== $namespace->namespace) continue;
+ $this->buildDirective($directive);
+ }
+
+ $this->endElement(); // namespace
+ }
+
+ public function buildDirective($directive) {
+ $this->startElement('directive');
+ $this->writeAttribute('id', $directive->id->toString());
+
+ $this->writeElement('name', $directive->id->directive);
+
+ $this->startElement('aliases');
+ foreach ($directive->aliases as $alias) $this->writeElement('alias', $alias->toString());
+ $this->endElement(); // aliases
+
+ $this->startElement('constraints');
+ if ($directive->version) $this->writeElement('version', $directive->version);
+ $this->startElement('type');
+ if ($directive->typeAllowsNull) $this->writeAttribute('allow-null', 'yes');
+ $this->text($directive->type);
+ $this->endElement(); // type
+ if ($directive->allowed) {
+ $this->startElement('allowed');
+ foreach ($directive->allowed as $value => $x) $this->writeElement('value', $value);
+ $this->endElement(); // allowed
+ }
+ $this->writeElement('default', $this->export($directive->default));
+ $this->writeAttribute('xml:space', 'preserve');
+ if ($directive->external) {
+ $this->startElement('external');
+ foreach ($directive->external as $project) $this->writeElement('project', $project);
+ $this->endElement();
+ }
+ $this->endElement(); // constraints
+
+ if ($directive->deprecatedVersion) {
+ $this->startElement('deprecated');
+ $this->writeElement('version', $directive->deprecatedVersion);
+ $this->writeElement('use', $directive->deprecatedUse->toString());
+ $this->endElement(); // deprecated
+ }
+
+ $this->startElement('description');
+ $this->writeHTMLDiv($directive->description);
+ $this->endElement(); // description
+
+ $this->endElement(); // directive
+ }
+
+}
--- /dev/null
+<?php
+
+/**
+ * Exceptions related to configuration schema
+ */
+class HTMLPurifier_ConfigSchema_Exception extends HTMLPurifier_Exception
+{
+
+}
--- /dev/null
+<?php
+
+/**
+ * Generic schema interchange format that can be converted to a runtime
+ * representation (HTMLPurifier_ConfigSchema) or HTML documentation. Members
+ * are completely validated.
+ */
+class HTMLPurifier_ConfigSchema_Interchange
+{
+
+ /**
+ * Name of the application this schema is describing.
+ */
+ public $name;
+
+ /**
+ * Array of Namespace ID => array(namespace info)
+ */
+ public $namespaces = array();
+
+ /**
+ * Array of Directive ID => array(directive info)
+ */
+ public $directives = array();
+
+ /**
+ * Adds a namespace array to $namespaces
+ */
+ public function addNamespace($namespace) {
+ if (isset($this->namespaces[$i = $namespace->namespace])) {
+ throw new HTMLPurifier_ConfigSchema_Exception("Cannot redefine namespace '$i'");
+ }
+ $this->namespaces[$i] = $namespace;
+ }
+
+ /**
+ * Adds a directive array to $directives
+ */
+ public function addDirective($directive) {
+ if (isset($this->directives[$i = $directive->id->toString()])) {
+ throw new HTMLPurifier_ConfigSchema_Exception("Cannot redefine directive '$i'");
+ }
+ $this->directives[$i] = $directive;
+ }
+
+ /**
+ * Convenience function to perform standard validation. Throws exception
+ * on failed validation.
+ */
+ public function validate() {
+ $validator = new HTMLPurifier_ConfigSchema_Validator();
+ return $validator->validate($this);
+ }
+
+}
--- /dev/null
+<?php
+
+/**
+ * Interchange component class describing configuration directives.
+ */
+class HTMLPurifier_ConfigSchema_Interchange_Directive
+{
+
+ /**
+ * ID of directive, instance of HTMLPurifier_ConfigSchema_Interchange_Id.
+ */
+ public $id;
+
+ /**
+ * String type, e.g. 'integer' or 'istring'.
+ */
+ public $type;
+
+ /**
+ * Default value, e.g. 3 or 'DefaultVal'.
+ */
+ public $default;
+
+ /**
+ * HTML description.
+ */
+ public $description;
+
+ /**
+ * Boolean whether or not null is allowed as a value.
+ */
+ public $typeAllowsNull = false;
+
+ /**
+ * Lookup table of allowed scalar values, e.g. array('allowed' => true).
+ * Null if all values are allowed.
+ */
+ public $allowed;
+
+ /**
+ * List of aliases for the directive,
+ * e.g. array(new HTMLPurifier_ConfigSchema_Interchange_Id('Ns', 'Dir'))).
+ */
+ public $aliases = array();
+
+ /**
+ * Hash of value aliases, e.g. array('alt' => 'real'). Null if value
+ * aliasing is disabled (necessary for non-scalar types).
+ */
+ public $valueAliases;
+
+ /**
+ * Version of HTML Purifier the directive was introduced, e.g. '1.3.1'.
+ * Null if the directive has always existed.
+ */
+ public $version;
+
+ /**
+ * ID of directive that supercedes this old directive, is an instance
+ * of HTMLPurifier_ConfigSchema_Interchange_Id. Null if not deprecated.
+ */
+ public $deprecatedUse;
+
+ /**
+ * Version of HTML Purifier this directive was deprecated. Null if not
+ * deprecated.
+ */
+ public $deprecatedVersion;
+
+ /**
+ * List of external projects this directive depends on, e.g. array('CSSTidy').
+ */
+ public $external = array();
+
+}
--- /dev/null
+<?php
+
+/**
+ * Represents a directive ID in the interchange format.
+ */
+class HTMLPurifier_ConfigSchema_Interchange_Id
+{
+
+ public $namespace, $directive;
+
+ public function __construct($namespace, $directive) {
+ $this->namespace = $namespace;
+ $this->directive = $directive;
+ }
+
+ /**
+ * @warning This is NOT magic, to ensure that people don't abuse SPL and
+ * cause problems for PHP 5.0 support.
+ */
+ public function toString() {
+ return $this->namespace . '.' . $this->directive;
+ }
+
+ public static function make($id) {
+ list($namespace, $directive) = explode('.', $id);
+ return new HTMLPurifier_ConfigSchema_Interchange_Id($namespace, $directive);
+ }
+
+}
--- /dev/null
+<?php
+
+/**
+ * Interchange component class describing namespaces.
+ */
+class HTMLPurifier_ConfigSchema_Interchange_Namespace
+{
+
+ /**
+ * Name of namespace defined.
+ */
+ public $namespace;
+
+ /**
+ * HTML description.
+ */
+ public $description;
+
+}
--- /dev/null
+<?php
+
+class HTMLPurifier_ConfigSchema_InterchangeBuilder
+{
+
+ /**
+ * Used for processing DEFAULT, nothing else.
+ */
+ protected $varParser;
+
+ public function __construct($varParser = null) {
+ $this->varParser = $varParser ? $varParser : new HTMLPurifier_VarParser_Native();
+ }
+
+ public static function buildFromDirectory($dir = null) {
+ $parser = new HTMLPurifier_StringHashParser();
+ $builder = new HTMLPurifier_ConfigSchema_InterchangeBuilder();
+ $interchange = new HTMLPurifier_ConfigSchema_Interchange();
+
+ if (!$dir) $dir = HTMLPURIFIER_PREFIX . '/HTMLPurifier/ConfigSchema/schema/';
+ $info = parse_ini_file($dir . 'info.ini');
+ $interchange->name = $info['name'];
+
+ $files = array();
+ $dh = opendir($dir);
+ while (false !== ($file = readdir($dh))) {
+ if (!$file || $file[0] == '.' || strrchr($file, '.') !== '.txt') {
+ continue;
+ }
+ $files[] = $file;
+ }
+ closedir($dh);
+
+ sort($files);
+ foreach ($files as $file) {
+ $builder->build(
+ $interchange,
+ new HTMLPurifier_StringHash( $parser->parseFile($dir . $file) )
+ );
+ }
+
+ return $interchange;
+ }
+
+ /**
+ * Builds an interchange object based on a hash.
+ * @param $interchange HTMLPurifier_ConfigSchema_Interchange object to build
+ * @param $hash HTMLPurifier_ConfigSchema_StringHash source data
+ */
+ public function build($interchange, $hash) {
+ if (!$hash instanceof HTMLPurifier_StringHash) {
+ $hash = new HTMLPurifier_StringHash($hash);
+ }
+ if (!isset($hash['ID'])) {
+ throw new HTMLPurifier_ConfigSchema_Exception('Hash does not have any ID');
+ }
+ if (strpos($hash['ID'], '.') === false) {
+ $this->buildNamespace($interchange, $hash);
+ } else {
+ $this->buildDirective($interchange, $hash);
+ }
+ $this->_findUnused($hash);
+ }
+
+ public function buildNamespace($interchange, $hash) {
+ $namespace = new HTMLPurifier_ConfigSchema_Interchange_Namespace();
+ $namespace->namespace = $hash->offsetGet('ID');
+ if (isset($hash['DESCRIPTION'])) {
+ $namespace->description = $hash->offsetGet('DESCRIPTION');
+ }
+ $interchange->addNamespace($namespace);
+ }
+
+ public function buildDirective($interchange, $hash) {
+ $directive = new HTMLPurifier_ConfigSchema_Interchange_Directive();
+
+ // These are required elements:
+ $directive->id = $this->id($hash->offsetGet('ID'));
+ $id = $directive->id->toString(); // convenience
+
+ if (isset($hash['TYPE'])) {
+ $type = explode('/', $hash->offsetGet('TYPE'));
+ if (isset($type[1])) $directive->typeAllowsNull = true;
+ $directive->type = $type[0];
+ } else {
+ throw new HTMLPurifier_ConfigSchema_Exception("TYPE in directive hash '$id' not defined");
+ }
+
+ if (isset($hash['DEFAULT'])) {
+ try {
+ $directive->default = $this->varParser->parse($hash->offsetGet('DEFAULT'), $directive->type, $directive->typeAllowsNull);
+ } catch (HTMLPurifier_VarParserException $e) {
+ throw new HTMLPurifier_ConfigSchema_Exception($e->getMessage() . " in DEFAULT in directive hash '$id'");
+ }
+ }
+
+ if (isset($hash['DESCRIPTION'])) {
+ $directive->description = $hash->offsetGet('DESCRIPTION');
+ }
+
+ if (isset($hash['ALLOWED'])) {
+ $directive->allowed = $this->lookup($this->evalArray($hash->offsetGet('ALLOWED')));
+ }
+
+ if (isset($hash['VALUE-ALIASES'])) {
+ $directive->valueAliases = $this->evalArray($hash->offsetGet('VALUE-ALIASES'));
+ }
+
+ if (isset($hash['ALIASES'])) {
+ $raw_aliases = trim($hash->offsetGet('ALIASES'));
+ $aliases = preg_split('/\s*,\s*/', $raw_aliases);
+ foreach ($aliases as $alias) {
+ $directive->aliases[] = $this->id($alias);
+ }
+ }
+
+ if (isset($hash['VERSION'])) {
+ $directive->version = $hash->offsetGet('VERSION');
+ }
+
+ if (isset($hash['DEPRECATED-USE'])) {
+ $directive->deprecatedUse = $this->id($hash->offsetGet('DEPRECATED-USE'));
+ }
+
+ if (isset($hash['DEPRECATED-VERSION'])) {
+ $directive->deprecatedVersion = $hash->offsetGet('DEPRECATED-VERSION');
+ }
+
+ if (isset($hash['EXTERNAL'])) {
+ $directive->external = preg_split('/\s*,\s*/', trim($hash->offsetGet('EXTERNAL')));
+ }
+
+ $interchange->addDirective($directive);
+ }
+
+ /**
+ * Evaluates an array PHP code string without array() wrapper
+ */
+ protected function evalArray($contents) {
+ return eval('return array('. $contents .');');
+ }
+
+ /**
+ * Converts an array list into a lookup array.
+ */
+ protected function lookup($array) {
+ $ret = array();
+ foreach ($array as $val) $ret[$val] = true;
+ return $ret;
+ }
+
+ /**
+ * Convenience function that creates an HTMLPurifier_ConfigSchema_Interchange_Id
+ * object based on a string Id.
+ */
+ protected function id($id) {
+ return HTMLPurifier_ConfigSchema_Interchange_Id::make($id);
+ }
+
+ /**
+ * Triggers errors for any unused keys passed in the hash; such keys
+ * may indicate typos, missing values, etc.
+ * @param $hash Instance of ConfigSchema_StringHash to check.
+ */
+ protected function _findUnused($hash) {
+ $accessed = $hash->getAccessed();
+ foreach ($hash as $k => $v) {
+ if (!isset($accessed[$k])) {
+ trigger_error("String hash key '$k' not used by builder", E_USER_NOTICE);
+ }
+ }
+ }
+
+}
+
--- /dev/null
+<?php
+
+/**
+ * Performs validations on HTMLPurifier_ConfigSchema_Interchange
+ *
+ * @note If you see '// handled by InterchangeBuilder', that means a
+ * design decision in that class would prevent this validation from
+ * ever being necessary. We have them anyway, however, for
+ * redundancy.
+ */
+class HTMLPurifier_ConfigSchema_Validator
+{
+
+ /**
+ * Easy to access global objects.
+ */
+ protected $interchange, $aliases;
+
+ /**
+ * Context-stack to provide easy to read error messages.
+ */
+ protected $context = array();
+
+ /**
+ * HTMLPurifier_VarParser to test default's type.
+ */
+ protected $parser;
+
+ public function __construct() {
+ $this->parser = new HTMLPurifier_VarParser();
+ }
+
+ /**
+ * Validates a fully-formed interchange object. Throws an
+ * HTMLPurifier_ConfigSchema_Exception if there's a problem.
+ */
+ public function validate($interchange) {
+ $this->interchange = $interchange;
+ $this->aliases = array();
+ // PHP is a bit lax with integer <=> string conversions in
+ // arrays, so we don't use the identical !== comparison
+ foreach ($interchange->namespaces as $i => $namespace) {
+ if ($i != $namespace->namespace) $this->error(false, "Integrity violation: key '$i' does not match internal id '{$namespace->namespace}'");
+ $this->validateNamespace($namespace);
+ }
+ foreach ($interchange->directives as $i => $directive) {
+ $id = $directive->id->toString();
+ if ($i != $id) $this->error(false, "Integrity violation: key '$i' does not match internal id '$id'");
+ $this->validateDirective($directive);
+ }
+ return true;
+ }
+
+ /**
+ * Validates a HTMLPurifier_ConfigSchema_Interchange_Namespace object.
+ */
+ public function validateNamespace($n) {
+ $this->context[] = "namespace '{$n->namespace}'";
+ $this->with($n, 'namespace')
+ ->assertNotEmpty()
+ ->assertAlnum(); // implicit assertIsString handled by InterchangeBuilder
+ $this->with($n, 'description')
+ ->assertNotEmpty()
+ ->assertIsString(); // handled by InterchangeBuilder
+ array_pop($this->context);
+ }
+
+ /**
+ * Validates a HTMLPurifier_ConfigSchema_Interchange_Id object.
+ */
+ public function validateId($id) {
+ $id_string = $id->toString();
+ $this->context[] = "id '$id_string'";
+ if (!$id instanceof HTMLPurifier_ConfigSchema_Interchange_Id) {
+ // handled by InterchangeBuilder
+ $this->error(false, 'is not an instance of HTMLPurifier_ConfigSchema_Interchange_Id');
+ }
+ if (!isset($this->interchange->namespaces[$id->namespace])) {
+ $this->error('namespace', 'does not exist'); // assumes that the namespace was validated already
+ }
+ $this->with($id, 'directive')
+ ->assertNotEmpty()
+ ->assertAlnum(); // implicit assertIsString handled by InterchangeBuilder
+ array_pop($this->context);
+ }
+
+ /**
+ * Validates a HTMLPurifier_ConfigSchema_Interchange_Directive object.
+ */
+ public function validateDirective($d) {
+ $id = $d->id->toString();
+ $this->context[] = "directive '$id'";
+ $this->validateId($d->id);
+
+ $this->with($d, 'description')
+ ->assertNotEmpty();
+
+ // BEGIN - handled by InterchangeBuilder
+ $this->with($d, 'type')
+ ->assertNotEmpty();
+ $this->with($d, 'typeAllowsNull')
+ ->assertIsBool();
+ try {
+ // This also tests validity of $d->type
+ $this->parser->parse($d->default, $d->type, $d->typeAllowsNull);
+ } catch (HTMLPurifier_VarParserException $e) {
+ $this->error('default', 'had error: ' . $e->getMessage());
+ }
+ // END - handled by InterchangeBuilder
+
+ if (!is_null($d->allowed) || !empty($d->valueAliases)) {
+ // allowed and valueAliases require that we be dealing with
+ // strings, so check for that early.
+ if (!isset(HTMLPurifier_VarParser::$stringTypes[$d->type])) {
+ $this->error('type', 'must be a string type when used with allowed or value aliases');
+ }
+ }
+
+ $this->validateDirectiveAllowed($d);
+ $this->validateDirectiveValueAliases($d);
+ $this->validateDirectiveAliases($d);
+
+ array_pop($this->context);
+ }
+
+ /**
+ * Extra validation if $allowed member variable of
+ * HTMLPurifier_ConfigSchema_Interchange_Directive is defined.
+ */
+ public function validateDirectiveAllowed($d) {
+ if (is_null($d->allowed)) return;
+ $this->with($d, 'allowed')
+ ->assertNotEmpty()
+ ->assertIsLookup(); // handled by InterchangeBuilder
+ if (is_string($d->default) && !isset($d->allowed[$d->default])) {
+ $this->error('default', 'must be an allowed value');
+ }
+ $this->context[] = 'allowed';
+ foreach ($d->allowed as $val => $x) {
+ if (!is_string($val)) $this->error("value $val", 'must be a string');
+ }
+ array_pop($this->context);
+ }
+
+ /**
+ * Extra validation if $valueAliases member variable of
+ * HTMLPurifier_ConfigSchema_Interchange_Directive is defined.
+ */
+ public function validateDirectiveValueAliases($d) {
+ if (is_null($d->valueAliases)) return;
+ $this->with($d, 'valueAliases')
+ ->assertIsArray(); // handled by InterchangeBuilder
+ $this->context[] = 'valueAliases';
+ foreach ($d->valueAliases as $alias => $real) {
+ if (!is_string($alias)) $this->error("alias $alias", 'must be a string');
+ if (!is_string($real)) $this->error("alias target $real from alias '$alias'", 'must be a string');
+ if ($alias === $real) {
+ $this->error("alias '$alias'", "must not be an alias to itself");
+ }
+ }
+ if (!is_null($d->allowed)) {
+ foreach ($d->valueAliases as $alias => $real) {
+ if (isset($d->allowed[$alias])) {
+ $this->error("alias '$alias'", 'must not be an allowed value');
+ } elseif (!isset($d->allowed[$real])) {
+ $this->error("alias '$alias'", 'must be an alias to an allowed value');
+ }
+ }
+ }
+ array_pop($this->context);
+ }
+
+ /**
+ * Extra validation if $aliases member variable of
+ * HTMLPurifier_ConfigSchema_Interchange_Directive is defined.
+ */
+ public function validateDirectiveAliases($d) {
+ $this->with($d, 'aliases')
+ ->assertIsArray(); // handled by InterchangeBuilder
+ $this->context[] = 'aliases';
+ foreach ($d->aliases as $alias) {
+ $this->validateId($alias);
+ $s = $alias->toString();
+ if (isset($this->interchange->directives[$s])) {
+ $this->error("alias '$s'", 'collides with another directive');
+ }
+ if (isset($this->aliases[$s])) {
+ $other_directive = $this->aliases[$s];
+ $this->error("alias '$s'", "collides with alias for directive '$other_directive'");
+ }
+ $this->aliases[$s] = $d->id->toString();
+ }
+ array_pop($this->context);
+ }
+
+ // protected helper functions
+
+ /**
+ * Convenience function for generating HTMLPurifier_ConfigSchema_ValidatorAtom
+ * for validating simple member variables of objects.
+ */
+ protected function with($obj, $member) {
+ return new HTMLPurifier_ConfigSchema_ValidatorAtom($this->getFormattedContext(), $obj, $member);
+ }
+
+ /**
+ * Emits an error, providing helpful context.
+ */
+ protected function error($target, $msg) {
+ if ($target !== false) $prefix = ucfirst($target) . ' in ' . $this->getFormattedContext();
+ else $prefix = ucfirst($this->getFormattedContext());
+ throw new HTMLPurifier_ConfigSchema_Exception(trim($prefix . ' ' . $msg));
+ }
+
+ /**
+ * Returns a formatted context string.
+ */
+ protected function getFormattedContext() {
+ return implode(' in ', array_reverse($this->context));
+ }
+
+}
--- /dev/null
+<?php
+
+/**
+ * Fluent interface for validating the contents of member variables.
+ * This should be immutable. See HTMLPurifier_ConfigSchema_Validator for
+ * use-cases. We name this an 'atom' because it's ONLY for validations that
+ * are independent and usually scalar.
+ */
+class HTMLPurifier_ConfigSchema_ValidatorAtom
+{
+
+ protected $context, $obj, $member, $contents;
+
+ public function __construct($context, $obj, $member) {
+ $this->context = $context;
+ $this->obj = $obj;
+ $this->member = $member;
+ $this->contents =& $obj->$member;
+ }
+
+ public function assertIsString() {
+ if (!is_string($this->contents)) $this->error('must be a string');
+ return $this;
+ }
+
+ public function assertIsBool() {
+ if (!is_bool($this->contents)) $this->error('must be a boolean');
+ return $this;
+ }
+
+ public function assertIsArray() {
+ if (!is_array($this->contents)) $this->error('must be an array');
+ return $this;
+ }
+
+ public function assertNotNull() {
+ if ($this->contents === null) $this->error('must not be null');
+ return $this;
+ }
+
+ public function assertAlnum() {
+ $this->assertIsString();
+ if (!ctype_alnum($this->contents)) $this->error('must be alphanumeric');
+ return $this;
+ }
+
+ public function assertNotEmpty() {
+ if (empty($this->contents)) $this->error('must not be empty');
+ return $this;
+ }
+
+ public function assertIsLookup() {
+ $this->assertIsArray();
+ foreach ($this->contents as $v) {
+ if ($v !== true) $this->error('must be a lookup array');
+ }
+ return $this;
+ }
+
+ protected function error($msg) {
+ throw new HTMLPurifier_ConfigSchema_Exception(ucfirst($this->member) . ' in ' . $this->context . ' ' . $msg);
+ }
+
+}
+
+
--- /dev/null
+O:25:"HTMLPurifier_ConfigSchema":3:{s:8:"defaults";a:12:{s:4:"Attr";a:11:{s:19:"AllowedFrameTargets";a:0:{}s:10:"AllowedRel";a:0:{}s:10:"AllowedRev";a:0:{}s:19:"DefaultInvalidImage";s:0:"";s:22:"DefaultInvalidImageAlt";s:13:"Invalid image";s:14:"DefaultTextDir";s:3:"ltr";s:8:"EnableID";b:0;s:11:"IDBlacklist";a:0:{}s:17:"IDBlacklistRegexp";N;s:8:"IDPrefix";s:0:"";s:13:"IDPrefixLocal";s:0:"";}s:10:"AutoFormat";a:4:{s:13:"AutoParagraph";b:0;s:6:"Custom";a:0:{}s:7:"Linkify";b:0;s:15:"PurifierLinkify";b:0;}s:15:"AutoFormatParam";a:1:{s:21:"PurifierLinkifyDocURL";s:3:"#%s";}s:3:"CSS";a:5:{s:14:"AllowImportant";b:0;s:11:"AllowTricky";b:0;s:17:"AllowedProperties";N;s:13:"DefinitionRev";i:1;s:11:"Proprietary";b:0;}s:5:"Cache";a:2:{s:14:"DefinitionImpl";s:10:"Serializer";s:14:"SerializerPath";N;}s:4:"Core";a:15:{s:17:"AggressivelyFixLt";b:0;s:13:"CollectErrors";b:0;s:13:"ColorKeywords";a:17:{s:6:"maroon";s:7:"#800000";s:3:"red";s:7:"#FF0000";s:6:"orange";s:7:"#FFA500";s:6:"yellow";s:7:"#FFFF00";s:5:"olive";s:7:"#808000";s:6:"purple";s:7:"#800080";s:7:"fuchsia";s:7:"#FF00FF";s:5:"white";s:7:"#FFFFFF";s:4:"lime";s:7:"#00FF00";s:5:"green";s:7:"#008000";s:4:"navy";s:7:"#000080";s:4:"blue";s:7:"#0000FF";s:4:"aqua";s:7:"#00FFFF";s:4:"teal";s:7:"#008080";s:5:"black";s:7:"#000000";s:6:"silver";s:7:"#C0C0C0";s:4:"gray";s:7:"#808080";}s:25:"ConvertDocumentToFragment";b:1;s:31:"DirectLexLineNumberSyncInterval";i:0;s:8:"Encoding";s:5:"utf-8";s:21:"EscapeInvalidChildren";b:0;s:17:"EscapeInvalidTags";b:0;s:24:"EscapeNonASCIICharacters";b:0;s:14:"HiddenElements";a:2:{s:6:"script";b:1;s:5:"style";b:1;}s:8:"Language";s:2:"en";s:9:"LexerImpl";N;s:19:"MaintainLineNumbers";N;s:16:"RemoveInvalidImg";b:1;s:20:"RemoveScriptContents";N;}s:6:"Filter";a:3:{s:6:"Custom";a:0:{}s:18:"ExtractStyleBlocks";b:0;s:7:"YouTube";b:0;}s:11:"FilterParam";a:3:{s:26:"ExtractStyleBlocksEscaping";b:1;s:23:"ExtractStyleBlocksScope";N;s:26:"ExtractStyleBlocksTidyImpl";N;}s:4:"HTML";a:20:{s:7:"Allowed";N;s:17:"AllowedAttributes";N;s:15:"AllowedElements";N;s:14:"AllowedModules";N;s:12:"BlockWrapper";s:1:"p";s:11:"CoreModules";a:7:{s:9:"Structure";b:1;s:4:"Text";b:1;s:9:"Hypertext";b:1;s:4:"List";b:1;s:22:"NonXMLCommonAttributes";b:1;s:19:"XMLCommonAttributes";b:1;s:16:"CommonAttributes";b:1;}s:13:"CustomDoctype";N;s:12:"DefinitionID";N;s:13:"DefinitionRev";i:1;s:7:"Doctype";N;s:19:"ForbiddenAttributes";a:0:{}s:17:"ForbiddenElements";a:0:{}s:6:"Parent";s:3:"div";s:11:"Proprietary";b:0;s:6:"Strict";b:0;s:7:"TidyAdd";a:0:{}s:9:"TidyLevel";s:6:"medium";s:10:"TidyRemove";a:0:{}s:7:"Trusted";b:0;s:5:"XHTML";b:1;}s:6:"Output";a:3:{s:21:"CommentScriptContents";b:1;s:7:"Newline";N;s:10:"TidyFormat";b:0;}s:4:"Test";a:1:{s:12:"ForceNoIconv";b:0;}s:3:"URI";a:14:{s:14:"AllowedSchemes";a:6:{s:4:"http";b:1;s:5:"https";b:1;s:6:"mailto";b:1;s:3:"ftp";b:1;s:4:"nntp";b:1;s:4:"news";b:1;}s:4:"Base";N;s:13:"DefaultScheme";s:4:"http";s:12:"DefinitionID";N;s:13:"DefinitionRev";i:1;s:7:"Disable";b:0;s:15:"DisableExternal";b:0;s:24:"DisableExternalResources";b:0;s:16:"DisableResources";b:0;s:4:"Host";N;s:13:"HostBlacklist";a:0:{}s:12:"MakeAbsolute";b:0;s:5:"Munge";N;s:22:"OverrideAllowedSchemes";b:1;}}s:4:"info";a:12:{s:4:"Attr";a:12:{s:19:"AllowedFrameTargets";O:32:"HTMLPurifier_ConfigDef_Directive":5:{s:5:"class";s:9:"directive";s:4:"type";s:6:"lookup";s:10:"allow_null";b:0;s:7:"allowed";b:1;s:7:"aliases";a:0:{}}s:10:"AllowedRel";O:32:"HTMLPurifier_ConfigDef_Directive":5:{s:5:"class";s:9:"directive";s:4:"type";s:6:"lookup";s:10:"allow_null";b:0;s:7:"allowed";b:1;s:7:"aliases";a:0:{}}s:10:"AllowedRev";O:32:"HTMLPurifier_ConfigDef_Directive":5:{s:5:"class";s:9:"directive";s:4:"type";s:6:"lookup";s:10:"allow_null";b:0;s:7:"allowed";b:1;s:7:"aliases";a:0:{}}s:19:"DefaultInvalidImage";O:32:"HTMLPurifier_ConfigDef_Directive":5:{s:5:"class";s:9:"directive";s:4:"type";s:6:"string";s:10:"allow_null";b:0;s:7:"allowed";b:1;s:7:"aliases";a:0:{}}s:22:"DefaultInvalidImageAlt";O:32:"HTMLPurifier_ConfigDef_Directive":5:{s:5:"class";s:9:"directive";s:4:"type";s:6:"string";s:10:"allow_null";b:0;s:7:"allowed";b:1;s:7:"aliases";a:0:{}}s:14:"DefaultTextDir";O:32:"HTMLPurifier_ConfigDef_Directive":5:{s:5:"class";s:9:"directive";s:4:"type";s:6:"string";s:10:"allow_null";b:0;s:7:"allowed";a:2:{s:3:"ltr";b:1;s:3:"rtl";b:1;}s:7:"aliases";a:0:{}}s:8:"EnableID";O:32:"HTMLPurifier_ConfigDef_Directive":5:{s:5:"class";s:9:"directive";s:4:"type";s:4:"bool";s:10:"allow_null";b:0;s:7:"allowed";b:1;s:7:"aliases";a:0:{}}s:11:"IDBlacklist";O:32:"HTMLPurifier_ConfigDef_Directive":5:{s:5:"class";s:9:"directive";s:4:"type";s:4:"list";s:10:"allow_null";b:0;s:7:"allowed";b:1;s:7:"aliases";a:0:{}}s:17:"IDBlacklistRegexp";O:32:"HTMLPurifier_ConfigDef_Directive":5:{s:5:"class";s:9:"directive";s:4:"type";s:6:"string";s:10:"allow_null";b:1;s:7:"allowed";b:1;s:7:"aliases";a:0:{}}s:8:"IDPrefix";O:32:"HTMLPurifier_ConfigDef_Directive":5:{s:5:"class";s:9:"directive";s:4:"type";s:6:"string";s:10:"allow_null";b:0;s:7:"allowed";b:1;s:7:"aliases";a:0:{}}s:13:"IDPrefixLocal";O:32:"HTMLPurifier_ConfigDef_Directive":5:{s:5:"class";s:9:"directive";s:4:"type";s:6:"string";s:10:"allow_null";b:0;s:7:"allowed";b:1;s:7:"aliases";a:0:{}}s:10:"DisableURI";O:37:"HTMLPurifier_ConfigDef_DirectiveAlias":3:{s:5:"class";s:5:"alias";s:9:"namespace";s:3:"URI";s:4:"name";s:7:"Disable";}}s:10:"AutoFormat";a:4:{s:13:"AutoParagraph";O:32:"HTMLPurifier_ConfigDef_Directive":5:{s:5:"class";s:9:"directive";s:4:"type";s:4:"bool";s:10:"allow_null";b:0;s:7:"allowed";b:1;s:7:"aliases";a:0:{}}s:6:"Custom";O:32:"HTMLPurifier_ConfigDef_Directive":5:{s:5:"class";s:9:"directive";s:4:"type";s:4:"list";s:10:"allow_null";b:0;s:7:"allowed";b:1;s:7:"aliases";a:0:{}}s:7:"Linkify";O:32:"HTMLPurifier_ConfigDef_Directive":5:{s:5:"class";s:9:"directive";s:4:"type";s:4:"bool";s:10:"allow_null";b:0;s:7:"allowed";b:1;s:7:"aliases";a:0:{}}s:15:"PurifierLinkify";O:32:"HTMLPurifier_ConfigDef_Directive":5:{s:5:"class";s:9:"directive";s:4:"type";s:4:"bool";s:10:"allow_null";b:0;s:7:"allowed";b:1;s:7:"aliases";a:0:{}}}s:15:"AutoFormatParam";a:1:{s:21:"PurifierLinkifyDocURL";O:32:"HTMLPurifier_ConfigDef_Directive":5:{s:5:"class";s:9:"directive";s:4:"type";s:6:"string";s:10:"allow_null";b:0;s:7:"allowed";b:1;s:7:"aliases";a:0:{}}}s:3:"CSS";a:5:{s:14:"AllowImportant";O:32:"HTMLPurifier_ConfigDef_Directive":5:{s:5:"class";s:9:"directive";s:4:"type";s:4:"bool";s:10:"allow_null";b:0;s:7:"allowed";b:1;s:7:"aliases";a:0:{}}s:11:"AllowTricky";O:32:"HTMLPurifier_ConfigDef_Directive":5:{s:5:"class";s:9:"directive";s:4:"type";s:4:"bool";s:10:"allow_null";b:0;s:7:"allowed";b:1;s:7:"aliases";a:0:{}}s:17:"AllowedProperties";O:32:"HTMLPurifier_ConfigDef_Directive":5:{s:5:"class";s:9:"directive";s:4:"type";s:6:"lookup";s:10:"allow_null";b:1;s:7:"allowed";b:1;s:7:"aliases";a:0:{}}s:13:"DefinitionRev";O:32:"HTMLPurifier_ConfigDef_Directive":5:{s:5:"class";s:9:"directive";s:4:"type";s:3:"int";s:10:"allow_null";b:0;s:7:"allowed";b:1;s:7:"aliases";a:0:{}}s:11:"Proprietary";O:32:"HTMLPurifier_ConfigDef_Directive":5:{s:5:"class";s:9:"directive";s:4:"type";s:4:"bool";s:10:"allow_null";b:0;s:7:"allowed";b:1;s:7:"aliases";a:0:{}}}s:5:"Cache";a:2:{s:14:"DefinitionImpl";O:32:"HTMLPurifier_ConfigDef_Directive":5:{s:5:"class";s:9:"directive";s:4:"type";s:6:"string";s:10:"allow_null";b:1;s:7:"allowed";b:1;s:7:"aliases";a:0:{}}s:14:"SerializerPath";O:32:"HTMLPurifier_ConfigDef_Directive":5:{s:5:"class";s:9:"directive";s:4:"type";s:6:"string";s:10:"allow_null";b:1;s:7:"allowed";b:1;s:7:"aliases";a:0:{}}}s:4:"Core";a:20:{s:15:"DefinitionCache";O:37:"HTMLPurifier_ConfigDef_DirectiveAlias":3:{s:5:"class";s:5:"alias";s:9:"namespace";s:5:"Cache";s:4:"name";s:14:"DefinitionImpl";}s:17:"AggressivelyFixLt";O:32:"HTMLPurifier_ConfigDef_Directive":5:{s:5:"class";s:9:"directive";s:4:"type";s:4:"bool";s:10:"allow_null";b:0;s:7:"allowed";b:1;s:7:"aliases";a:0:{}}s:13:"CollectErrors";O:32:"HTMLPurifier_ConfigDef_Directive":5:{s:5:"class";s:9:"directive";s:4:"type";s:4:"bool";s:10:"allow_null";b:0;s:7:"allowed";b:1;s:7:"aliases";a:0:{}}s:13:"ColorKeywords";O:32:"HTMLPurifier_ConfigDef_Directive":5:{s:5:"class";s:9:"directive";s:4:"type";s:4:"hash";s:10:"allow_null";b:0;s:7:"allowed";b:1;s:7:"aliases";a:0:{}}s:25:"ConvertDocumentToFragment";O:32:"HTMLPurifier_ConfigDef_Directive":5:{s:5:"class";s:9:"directive";s:4:"type";s:4:"bool";s:10:"allow_null";b:0;s:7:"allowed";b:1;s:7:"aliases";a:0:{}}s:19:"AcceptFullDocuments";O:37:"HTMLPurifier_ConfigDef_DirectiveAlias":3:{s:5:"class";s:5:"alias";s:9:"namespace";s:4:"Core";s:4:"name";s:25:"ConvertDocumentToFragment";}s:31:"DirectLexLineNumberSyncInterval";O:32:"HTMLPurifier_ConfigDef_Directive":5:{s:5:"class";s:9:"directive";s:4:"type";s:3:"int";s:10:"allow_null";b:0;s:7:"allowed";b:1;s:7:"aliases";a:0:{}}s:8:"Encoding";O:32:"HTMLPurifier_ConfigDef_Directive":5:{s:5:"class";s:9:"directive";s:4:"type";s:7:"istring";s:10:"allow_null";b:0;s:7:"allowed";b:1;s:7:"aliases";a:0:{}}s:21:"EscapeInvalidChildren";O:32:"HTMLPurifier_ConfigDef_Directive":5:{s:5:"class";s:9:"directive";s:4:"type";s:4:"bool";s:10:"allow_null";b:0;s:7:"allowed";b:1;s:7:"aliases";a:0:{}}s:17:"EscapeInvalidTags";O:32:"HTMLPurifier_ConfigDef_Directive":5:{s:5:"class";s:9:"directive";s:4:"type";s:4:"bool";s:10:"allow_null";b:0;s:7:"allowed";b:1;s:7:"aliases";a:0:{}}s:24:"EscapeNonASCIICharacters";O:32:"HTMLPurifier_ConfigDef_Directive":5:{s:5:"class";s:9:"directive";s:4:"type";s:4:"bool";s:10:"allow_null";b:0;s:7:"allowed";b:1;s:7:"aliases";a:0:{}}s:14:"HiddenElements";O:32:"HTMLPurifier_ConfigDef_Directive":5:{s:5:"class";s:9:"directive";s:4:"type";s:6:"lookup";s:10:"allow_null";b:0;s:7:"allowed";b:1;s:7:"aliases";a:0:{}}s:8:"Language";O:32:"HTMLPurifier_ConfigDef_Directive":5:{s:5:"class";s:9:"directive";s:4:"type";s:6:"string";s:10:"allow_null";b:0;s:7:"allowed";b:1;s:7:"aliases";a:0:{}}s:9:"LexerImpl";O:32:"HTMLPurifier_ConfigDef_Directive":5:{s:5:"class";s:9:"directive";s:4:"type";s:5:"mixed";s:10:"allow_null";b:1;s:7:"allowed";b:1;s:7:"aliases";a:0:{}}s:19:"MaintainLineNumbers";O:32:"HTMLPurifier_ConfigDef_Directive":5:{s:5:"class";s:9:"directive";s:4:"type";s:4:"bool";s:10:"allow_null";b:1;s:7:"allowed";b:1;s:7:"aliases";a:0:{}}s:16:"RemoveInvalidImg";O:32:"HTMLPurifier_ConfigDef_Directive":5:{s:5:"class";s:9:"directive";s:4:"type";s:4:"bool";s:10:"allow_null";b:0;s:7:"allowed";b:1;s:7:"aliases";a:0:{}}s:20:"RemoveScriptContents";O:32:"HTMLPurifier_ConfigDef_Directive":5:{s:5:"class";s:9:"directive";s:4:"type";s:4:"bool";s:10:"allow_null";b:1;s:7:"allowed";b:1;s:7:"aliases";a:0:{}}s:5:"XHTML";O:37:"HTMLPurifier_ConfigDef_DirectiveAlias":3:{s:5:"class";s:5:"alias";s:9:"namespace";s:4:"HTML";s:4:"name";s:5:"XHTML";}s:21:"CommentScriptContents";O:37:"HTMLPurifier_ConfigDef_DirectiveAlias":3:{s:5:"class";s:5:"alias";s:9:"namespace";s:6:"Output";s:4:"name";s:21:"CommentScriptContents";}s:10:"TidyFormat";O:37:"HTMLPurifier_ConfigDef_DirectiveAlias":3:{s:5:"class";s:5:"alias";s:9:"namespace";s:6:"Output";s:4:"name";s:10:"TidyFormat";}}s:6:"Filter";a:5:{s:6:"Custom";O:32:"HTMLPurifier_ConfigDef_Directive":5:{s:5:"class";s:9:"directive";s:4:"type";s:4:"list";s:10:"allow_null";b:0;s:7:"allowed";b:1;s:7:"aliases";a:0:{}}s:18:"ExtractStyleBlocks";O:32:"HTMLPurifier_ConfigDef_Directive":5:{s:5:"class";s:9:"directive";s:4:"type";s:4:"bool";s:10:"allow_null";b:0;s:7:"allowed";b:1;s:7:"aliases";a:0:{}}s:7:"YouTube";O:32:"HTMLPurifier_ConfigDef_Directive":5:{s:5:"class";s:9:"directive";s:4:"type";s:4:"bool";s:10:"allow_null";b:0;s:7:"allowed";b:1;s:7:"aliases";a:0:{}}s:26:"ExtractStyleBlocksEscaping";O:37:"HTMLPurifier_ConfigDef_DirectiveAlias":3:{s:5:"class";s:5:"alias";s:9:"namespace";s:11:"FilterParam";s:4:"name";s:26:"ExtractStyleBlocksEscaping";}s:23:"ExtractStyleBlocksScope";O:37:"HTMLPurifier_ConfigDef_DirectiveAlias":3:{s:5:"class";s:5:"alias";s:9:"namespace";s:11:"FilterParam";s:4:"name";s:23:"ExtractStyleBlocksScope";}}s:11:"FilterParam";a:3:{s:26:"ExtractStyleBlocksEscaping";O:32:"HTMLPurifier_ConfigDef_Directive":5:{s:5:"class";s:9:"directive";s:4:"type";s:4:"bool";s:10:"allow_null";b:0;s:7:"allowed";b:1;s:7:"aliases";a:0:{}}s:23:"ExtractStyleBlocksScope";O:32:"HTMLPurifier_ConfigDef_Directive":5:{s:5:"class";s:9:"directive";s:4:"type";s:6:"string";s:10:"allow_null";b:1;s:7:"allowed";b:1;s:7:"aliases";a:0:{}}s:26:"ExtractStyleBlocksTidyImpl";O:32:"HTMLPurifier_ConfigDef_Directive":5:{s:5:"class";s:9:"directive";s:4:"type";s:5:"mixed";s:10:"allow_null";b:1;s:7:"allowed";b:1;s:7:"aliases";a:0:{}}}s:4:"HTML";a:21:{s:12:"EnableAttrID";O:37:"HTMLPurifier_ConfigDef_DirectiveAlias":3:{s:5:"class";s:5:"alias";s:9:"namespace";s:4:"Attr";s:4:"name";s:8:"EnableID";}s:7:"Allowed";O:32:"HTMLPurifier_ConfigDef_Directive":5:{s:5:"class";s:9:"directive";s:4:"type";s:5:"itext";s:10:"allow_null";b:1;s:7:"allowed";b:1;s:7:"aliases";a:0:{}}s:17:"AllowedAttributes";O:32:"HTMLPurifier_ConfigDef_Directive":5:{s:5:"class";s:9:"directive";s:4:"type";s:6:"lookup";s:10:"allow_null";b:1;s:7:"allowed";b:1;s:7:"aliases";a:0:{}}s:15:"AllowedElements";O:32:"HTMLPurifier_ConfigDef_Directive":5:{s:5:"class";s:9:"directive";s:4:"type";s:6:"lookup";s:10:"allow_null";b:1;s:7:"allowed";b:1;s:7:"aliases";a:0:{}}s:14:"AllowedModules";O:32:"HTMLPurifier_ConfigDef_Directive":5:{s:5:"class";s:9:"directive";s:4:"type";s:6:"lookup";s:10:"allow_null";b:1;s:7:"allowed";b:1;s:7:"aliases";a:0:{}}s:12:"BlockWrapper";O:32:"HTMLPurifier_ConfigDef_Directive":5:{s:5:"class";s:9:"directive";s:4:"type";s:6:"string";s:10:"allow_null";b:0;s:7:"allowed";b:1;s:7:"aliases";a:0:{}}s:11:"CoreModules";O:32:"HTMLPurifier_ConfigDef_Directive":5:{s:5:"class";s:9:"directive";s:4:"type";s:6:"lookup";s:10:"allow_null";b:0;s:7:"allowed";b:1;s:7:"aliases";a:0:{}}s:13:"CustomDoctype";O:32:"HTMLPurifier_ConfigDef_Directive":5:{s:5:"class";s:9:"directive";s:4:"type";s:6:"string";s:10:"allow_null";b:1;s:7:"allowed";b:1;s:7:"aliases";a:0:{}}s:12:"DefinitionID";O:32:"HTMLPurifier_ConfigDef_Directive":5:{s:5:"class";s:9:"directive";s:4:"type";s:6:"string";s:10:"allow_null";b:1;s:7:"allowed";b:1;s:7:"aliases";a:0:{}}s:13:"DefinitionRev";O:32:"HTMLPurifier_ConfigDef_Directive":5:{s:5:"class";s:9:"directive";s:4:"type";s:3:"int";s:10:"allow_null";b:0;s:7:"allowed";b:1;s:7:"aliases";a:0:{}}s:7:"Doctype";O:32:"HTMLPurifier_ConfigDef_Directive":5:{s:5:"class";s:9:"directive";s:4:"type";s:6:"string";s:10:"allow_null";b:1;s:7:"allowed";a:5:{s:22:"HTML 4.01 Transitional";b:1;s:16:"HTML 4.01 Strict";b:1;s:22:"XHTML 1.0 Transitional";b:1;s:16:"XHTML 1.0 Strict";b:1;s:9:"XHTML 1.1";b:1;}s:7:"aliases";a:0:{}}s:19:"ForbiddenAttributes";O:32:"HTMLPurifier_ConfigDef_Directive":5:{s:5:"class";s:9:"directive";s:4:"type";s:6:"lookup";s:10:"allow_null";b:0;s:7:"allowed";b:1;s:7:"aliases";a:0:{}}s:17:"ForbiddenElements";O:32:"HTMLPurifier_ConfigDef_Directive":5:{s:5:"class";s:9:"directive";s:4:"type";s:6:"lookup";s:10:"allow_null";b:0;s:7:"allowed";b:1;s:7:"aliases";a:0:{}}s:6:"Parent";O:32:"HTMLPurifier_ConfigDef_Directive":5:{s:5:"class";s:9:"directive";s:4:"type";s:6:"string";s:10:"allow_null";b:0;s:7:"allowed";b:1;s:7:"aliases";a:0:{}}s:11:"Proprietary";O:32:"HTMLPurifier_ConfigDef_Directive":5:{s:5:"class";s:9:"directive";s:4:"type";s:4:"bool";s:10:"allow_null";b:0;s:7:"allowed";b:1;s:7:"aliases";a:0:{}}s:6:"Strict";O:32:"HTMLPurifier_ConfigDef_Directive":5:{s:5:"class";s:9:"directive";s:4:"type";s:4:"bool";s:10:"allow_null";b:0;s:7:"allowed";b:1;s:7:"aliases";a:0:{}}s:7:"TidyAdd";O:32:"HTMLPurifier_ConfigDef_Directive":5:{s:5:"class";s:9:"directive";s:4:"type";s:6:"lookup";s:10:"allow_null";b:0;s:7:"allowed";b:1;s:7:"aliases";a:0:{}}s:9:"TidyLevel";O:32:"HTMLPurifier_ConfigDef_Directive":5:{s:5:"class";s:9:"directive";s:4:"type";s:6:"string";s:10:"allow_null";b:0;s:7:"allowed";a:4:{s:4:"none";b:1;s:5:"light";b:1;s:6:"medium";b:1;s:5:"heavy";b:1;}s:7:"aliases";a:0:{}}s:10:"TidyRemove";O:32:"HTMLPurifier_ConfigDef_Directive":5:{s:5:"class";s:9:"directive";s:4:"type";s:6:"lookup";s:10:"allow_null";b:0;s:7:"allowed";b:1;s:7:"aliases";a:0:{}}s:7:"Trusted";O:32:"HTMLPurifier_ConfigDef_Directive":5:{s:5:"class";s:9:"directive";s:4:"type";s:4:"bool";s:10:"allow_null";b:0;s:7:"allowed";b:1;s:7:"aliases";a:0:{}}s:5:"XHTML";O:32:"HTMLPurifier_ConfigDef_Directive":5:{s:5:"class";s:9:"directive";s:4:"type";s:4:"bool";s:10:"allow_null";b:0;s:7:"allowed";b:1;s:7:"aliases";a:0:{}}}s:6:"Output";a:3:{s:21:"CommentScriptContents";O:32:"HTMLPurifier_ConfigDef_Directive":5:{s:5:"class";s:9:"directive";s:4:"type";s:4:"bool";s:10:"allow_null";b:0;s:7:"allowed";b:1;s:7:"aliases";a:0:{}}s:7:"Newline";O:32:"HTMLPurifier_ConfigDef_Directive":5:{s:5:"class";s:9:"directive";s:4:"type";s:6:"string";s:10:"allow_null";b:1;s:7:"allowed";b:1;s:7:"aliases";a:0:{}}s:10:"TidyFormat";O:32:"HTMLPurifier_ConfigDef_Directive":5:{s:5:"class";s:9:"directive";s:4:"type";s:4:"bool";s:10:"allow_null";b:0;s:7:"allowed";b:1;s:7:"aliases";a:0:{}}}s:4:"Test";a:1:{s:12:"ForceNoIconv";O:32:"HTMLPurifier_ConfigDef_Directive":5:{s:5:"class";s:9:"directive";s:4:"type";s:4:"bool";s:10:"allow_null";b:0;s:7:"allowed";b:1;s:7:"aliases";a:0:{}}}s:3:"URI";a:14:{s:14:"AllowedSchemes";O:32:"HTMLPurifier_ConfigDef_Directive":5:{s:5:"class";s:9:"directive";s:4:"type";s:6:"lookup";s:10:"allow_null";b:0;s:7:"allowed";b:1;s:7:"aliases";a:0:{}}s:4:"Base";O:32:"HTMLPurifier_ConfigDef_Directive":5:{s:5:"class";s:9:"directive";s:4:"type";s:6:"string";s:10:"allow_null";b:1;s:7:"allowed";b:1;s:7:"aliases";a:0:{}}s:13:"DefaultScheme";O:32:"HTMLPurifier_ConfigDef_Directive":5:{s:5:"class";s:9:"directive";s:4:"type";s:6:"string";s:10:"allow_null";b:0;s:7:"allowed";b:1;s:7:"aliases";a:0:{}}s:12:"DefinitionID";O:32:"HTMLPurifier_ConfigDef_Directive":5:{s:5:"class";s:9:"directive";s:4:"type";s:6:"string";s:10:"allow_null";b:1;s:7:"allowed";b:1;s:7:"aliases";a:0:{}}s:13:"DefinitionRev";O:32:"HTMLPurifier_ConfigDef_Directive":5:{s:5:"class";s:9:"directive";s:4:"type";s:3:"int";s:10:"allow_null";b:0;s:7:"allowed";b:1;s:7:"aliases";a:0:{}}s:7:"Disable";O:32:"HTMLPurifier_ConfigDef_Directive":5:{s:5:"class";s:9:"directive";s:4:"type";s:4:"bool";s:10:"allow_null";b:0;s:7:"allowed";b:1;s:7:"aliases";a:0:{}}s:15:"DisableExternal";O:32:"HTMLPurifier_ConfigDef_Directive":5:{s:5:"class";s:9:"directive";s:4:"type";s:4:"bool";s:10:"allow_null";b:0;s:7:"allowed";b:1;s:7:"aliases";a:0:{}}s:24:"DisableExternalResources";O:32:"HTMLPurifier_ConfigDef_Directive":5:{s:5:"class";s:9:"directive";s:4:"type";s:4:"bool";s:10:"allow_null";b:0;s:7:"allowed";b:1;s:7:"aliases";a:0:{}}s:16:"DisableResources";O:32:"HTMLPurifier_ConfigDef_Directive":5:{s:5:"class";s:9:"directive";s:4:"type";s:4:"bool";s:10:"allow_null";b:0;s:7:"allowed";b:1;s:7:"aliases";a:0:{}}s:4:"Host";O:32:"HTMLPurifier_ConfigDef_Directive":5:{s:5:"class";s:9:"directive";s:4:"type";s:6:"string";s:10:"allow_null";b:1;s:7:"allowed";b:1;s:7:"aliases";a:0:{}}s:13:"HostBlacklist";O:32:"HTMLPurifier_ConfigDef_Directive":5:{s:5:"class";s:9:"directive";s:4:"type";s:4:"list";s:10:"allow_null";b:0;s:7:"allowed";b:1;s:7:"aliases";a:0:{}}s:12:"MakeAbsolute";O:32:"HTMLPurifier_ConfigDef_Directive":5:{s:5:"class";s:9:"directive";s:4:"type";s:4:"bool";s:10:"allow_null";b:0;s:7:"allowed";b:1;s:7:"aliases";a:0:{}}s:5:"Munge";O:32:"HTMLPurifier_ConfigDef_Directive":5:{s:5:"class";s:9:"directive";s:4:"type";s:6:"string";s:10:"allow_null";b:1;s:7:"allowed";b:1;s:7:"aliases";a:0:{}}s:22:"OverrideAllowedSchemes";O:32:"HTMLPurifier_ConfigDef_Directive":5:{s:5:"class";s:9:"directive";s:4:"type";s:4:"bool";s:10:"allow_null";b:0;s:7:"allowed";b:1;s:7:"aliases";a:0:{}}}}s:9:"\0*\0parser";O:31:"HTMLPurifier_VarParser_Flexible":0:{}}
\ No newline at end of file
--- /dev/null
+Attr.AllowedFrameTargets\r
+TYPE: lookup\r
+DEFAULT: array()\r
+--DESCRIPTION--\r
+Lookup table of all allowed link frame targets. Some commonly used link\r
+targets include _blank, _self, _parent and _top. Values should be\r
+lowercase, as validation will be done in a case-sensitive manner despite\r
+W3C's recommendation. XHTML 1.0 Strict does not permit the target attribute\r
+so this directive will have no effect in that doctype. XHTML 1.1 does not\r
+enable the Target module by default, you will have to manually enable it\r
+(see the module documentation for more details.)\r
--- /dev/null
+Attr.AllowedRel\r
+TYPE: lookup\r
+VERSION: 1.6.0\r
+DEFAULT: array()\r
+--DESCRIPTION--\r
+List of allowed forward document relationships in the rel attribute. Common\r
+values may be nofollow or print. By default, this is empty, meaning that no\r
+document relationships are allowed. \r
--- /dev/null
+Attr.AllowedRev\r
+TYPE: lookup\r
+VERSION: 1.6.0\r
+DEFAULT: array()\r
+--DESCRIPTION--\r
+List of allowed reverse document relationships in the rev attribute. This\r
+attribute is a bit of an edge-case; if you don't know what it is for, stay\r
+away. \r
--- /dev/null
+Attr.DefaultInvalidImage\r
+TYPE: string\r
+DEFAULT: ''\r
+--DESCRIPTION--\r
+This is the default image an img tag will be pointed to if it does not have\r
+a valid src attribute. In future versions, we may allow the image tag to\r
+be removed completely, but due to design issues, this is not possible right\r
+now.\r
--- /dev/null
+Attr.DefaultInvalidImageAlt\r
+TYPE: string\r
+DEFAULT: 'Invalid image'\r
+--DESCRIPTION--\r
+This is the content of the alt tag of an invalid image if the user had not\r
+previously specified an alt attribute. It has no effect when the image is\r
+valid but there was no alt attribute present.\r
--- /dev/null
+Attr.DefaultTextDir\r
+TYPE: string\r
+DEFAULT: 'ltr'\r
+--DESCRIPTION--\r
+Defines the default text direction (ltr or rtl) of the document being\r
+parsed. This generally is the same as the value of the dir attribute in\r
+HTML, or ltr if that is not specified.\r
+--ALLOWED--\r
+'ltr', 'rtl'\r
--- /dev/null
+Attr.EnableID\r
+TYPE: bool\r
+DEFAULT: false\r
+VERSION: 1.2.0\r
+--DESCRIPTION--\r
+Allows the ID attribute in HTML. This is disabled by default due to the\r
+fact that without proper configuration user input can easily break the\r
+validation of a webpage by specifying an ID that is already on the\r
+surrounding HTML. If you don't mind throwing caution to the wind, enable\r
+this directive, but I strongly recommend you also consider blacklisting IDs\r
+you use (%Attr.IDBlacklist) or prefixing all user supplied IDs\r
+(%Attr.IDPrefix). When set to true HTML Purifier reverts to the behavior of\r
+pre-1.2.0 versions.\r
+--ALIASES--\r
+HTML.EnableAttrID\r
--- /dev/null
+Attr.IDBlacklist\r
+TYPE: list\r
+DEFAULT: array()\r
+DESCRIPTION: Array of IDs not allowed in the document.\r
--- /dev/null
+Attr.IDBlacklistRegexp\r
+TYPE: string/null\r
+VERSION: 1.6.0\r
+DEFAULT: NULL\r
+--DESCRIPTION--\r
+PCRE regular expression to be matched against all IDs. If the expression is\r
+matches, the ID is rejected. Use this with care: may cause significant\r
+degradation. ID matching is done after all other validation. \r
--- /dev/null
+Attr.IDPrefix\r
+TYPE: string\r
+VERSION: 1.2.0\r
+DEFAULT: ''\r
+--DESCRIPTION--\r
+String to prefix to IDs. If you have no idea what IDs your pages may use,\r
+you may opt to simply add a prefix to all user-submitted ID attributes so\r
+that they are still usable, but will not conflict with core page IDs.\r
+Example: setting the directive to 'user_' will result in a user submitted\r
+'foo' to become 'user_foo' Be sure to set %HTML.EnableAttrID to true\r
+before using this. \r
--- /dev/null
+Attr.IDPrefixLocal\r
+TYPE: string\r
+VERSION: 1.2.0\r
+DEFAULT: ''\r
+--DESCRIPTION--\r
+Temporary prefix for IDs used in conjunction with %Attr.IDPrefix. If you\r
+need to allow multiple sets of user content on web page, you may need to\r
+have a seperate prefix that changes with each iteration. This way,\r
+seperately submitted user content displayed on the same page doesn't\r
+clobber each other. Ideal values are unique identifiers for the content it\r
+represents (i.e. the id of the row in the database). Be sure to add a\r
+seperator (like an underscore) at the end. Warning: this directive will\r
+not work unless %Attr.IDPrefix is set to a non-empty value! \r
--- /dev/null
+Attr\r
+DESCRIPTION: Features regarding attribute validation.\r
--- /dev/null
+AutoFormat.AutoParagraph\r
+TYPE: bool\r
+VERSION: 2.0.1\r
+DEFAULT: false\r
+--DESCRIPTION--\r
+\r
+<p>\r
+ This directive turns on auto-paragraphing, where double newlines are\r
+ converted in to paragraphs whenever possible. Auto-paragraphing:\r
+</p>\r
+<ul>\r
+ <li>Always applies to inline elements or text in the root node,</li>\r
+ <li>Applies to inline elements or text with double newlines in nodes\r
+ that allow paragraph tags,</li>\r
+ <li>Applies to double newlines in paragraph tags</li>\r
+</ul>\r
+<p>\r
+ <code>p</code> tags must be allowed for this directive to take effect.\r
+ We do not use <code>br</code> tags for paragraphing, as that is\r
+ semantically incorrect.\r
+</p>\r
+<p>\r
+ To prevent auto-paragraphing as a content-producer, refrain from using\r
+ double-newlines except to specify a new paragraph or in contexts where\r
+ it has special meaning (whitespace usually has no meaning except in\r
+ tags like <code>pre</code>, so this should not be difficult.) To prevent\r
+ the paragraphing of inline text adjacent to block elements, wrap them\r
+ in <code>div</code> tags (the behavior is slightly different outside of\r
+ the root node.)\r
+</p>\r
--- /dev/null
+AutoFormat.Custom\r
+TYPE: list\r
+VERSION: 2.0.1\r
+DEFAULT: array()\r
+--DESCRIPTION--\r
+\r
+<p>\r
+ This directive can be used to add custom auto-format injectors.\r
+ Specify an array of injector names (class name minus the prefix)\r
+ or concrete implementations. Injector class must exist. \r
+</p>\r
+\r
--- /dev/null
+AutoFormat.Linkify\r
+TYPE: bool\r
+VERSION: 2.0.1\r
+DEFAULT: false\r
+--DESCRIPTION--\r
+\r
+<p>\r
+ This directive turns on linkification, auto-linking http, ftp and\r
+ https URLs. <code>a</code> tags with the <code>href</code> attribute\r
+ must be allowed. \r
+</p>\r
+\r
--- /dev/null
+AutoFormat.PurifierLinkify\r
+TYPE: bool\r
+VERSION: 2.0.1\r
+DEFAULT: false\r
+--DESCRIPTION--\r
+\r
+<p>\r
+ Internal auto-formatter that converts configuration directives in\r
+ syntax <a>%Namespace.Directive</a> to links. <code>a</code> tags\r
+ with the <code>href</code> attribute must be allowed.\r
+</p>\r
+\r
--- /dev/null
+AutoFormat\r
+DESCRIPTION: Configuration for activating auto-formatting functionality (also known as <code>Injector</code>s)\r
--- /dev/null
+AutoFormatParam.PurifierLinkifyDocURL\r
+TYPE: string\r
+VERSION: 2.0.1\r
+DEFAULT: '#%s'\r
+--DESCRIPTION--\r
+\r
+<p>\r
+ Location of configuration documentation to link to, let %s substitute\r
+ into the configuration's namespace and directive names sans the percent\r
+ sign. \r
+</p>\r
+\r
--- /dev/null
+AutoFormatParam\r
+DESCRIPTION: Configuration for customizing auto-formatting functionality\r
--- /dev/null
+CSS.AllowImportant
+TYPE: bool
+DEFAULT: false
+VERSION: 3.1.0
+--DESCRIPTION--
+This parameter determines whether or not !important cascade modifiers should
+be allowed in user CSS. If false, !important will stripped.
--- /dev/null
+CSS.AllowTricky
+TYPE: bool
+DEFAULT: false
+VERSION: 3.1.0
+--DESCRIPTION--
+This parameter determines whether or not to allow "tricky" CSS properties and
+values. Tricky CSS properties/values can drastically modify page layout or
+be used for deceptive practices but do not directly constitute a security risk.
+For example, <code>display:none;</code> is considered a tricky property that
+will only be allowed if this directive is set to true.
--- /dev/null
+CSS.AllowedProperties
+TYPE: lookup/null
+VERSION: 3.1.0
+DEFAULT: NULL
+--DESCRIPTION--
+
+<p>
+ If HTML Purifier's style attributes set is unsatisfactory for your needs,
+ you can overload it with your own list of tags to allow. Note that this
+ method is subtractive: it does its job by taking away from HTML Purifier
+ usual feature set, so you cannot add an attribute that HTML Purifier never
+ supported in the first place.
+</p>
+<p>
+ <strong>Warning:</strong> If another directive conflicts with the
+ elements here, <em>that</em> directive will win and override.
+</p>
--- /dev/null
+CSS.DefinitionRev\r
+TYPE: int\r
+VERSION: 2.0.0\r
+DEFAULT: 1\r
+--DESCRIPTION--\r
+\r
+<p>\r
+ Revision identifier for your custom definition. See\r
+ %HTML.DefinitionRev for details. \r
+</p>\r
+\r
--- /dev/null
+CSS.Proprietary\r
+TYPE: bool\r
+VERSION: 3.0.0\r
+DEFAULT: false\r
+--DESCRIPTION--\r
+\r
+<p>\r
+ Whether or not to allow safe, proprietary CSS values. \r
+</p>\r
+\r
--- /dev/null
+CSS\r
+DESCRIPTION: Configuration regarding allowed CSS.\r
--- /dev/null
+Cache.DefinitionImpl\r
+TYPE: string/null\r
+VERSION: 2.0.0\r
+DEFAULT: 'Serializer'\r
+--DESCRIPTION--\r
+\r
+This directive defines which method to use when caching definitions,\r
+the complex data-type that makes HTML Purifier tick. Set to null\r
+to disable caching (not recommended, as you will see a definite\r
+performance degradation). \r
+\r
+--ALIASES--\r
+Core.DefinitionCache\r
--- /dev/null
+Cache.SerializerPath\r
+TYPE: string/null\r
+VERSION: 2.0.0\r
+DEFAULT: NULL\r
+--DESCRIPTION--\r
+\r
+<p>\r
+ Absolute path with no trailing slash to store serialized definitions in.\r
+ Default is within the\r
+ HTML Purifier library inside DefinitionCache/Serializer. This\r
+ path must be writable by the webserver. \r
+</p>\r
+\r
--- /dev/null
+Cache\r
+DESCRIPTION: Configuration for DefinitionCache and related subclasses.\r
--- /dev/null
+Core.AggressivelyFixLt\r
+TYPE: bool\r
+VERSION: 2.1.0\r
+DEFAULT: false\r
+--DESCRIPTION--\r
+\r
+This directive enables aggressive pre-filter fixes HTML Purifier can\r
+perform in order to ensure that open angled-brackets do not get killed\r
+during parsing stage. Enabling this will result in two preg_replace_callback\r
+calls and one preg_replace call for every bit of HTML passed through here.\r
+It is not necessary and will have no effect for PHP 4.\r
+\r
+\r
--- /dev/null
+Core.CollectErrors\r
+TYPE: bool\r
+VERSION: 2.0.0\r
+DEFAULT: false\r
+--DESCRIPTION--\r
+\r
+Whether or not to collect errors found while filtering the document. This\r
+is a useful way to give feedback to your users. <strong>Warning:</strong>\r
+Currently this feature is very patchy and experimental, with lots of\r
+possible error messages not yet implemented. It will not cause any\r
+problems, but it may not help your users either. \r
--- /dev/null
+Core.ColorKeywords\r
+TYPE: hash\r
+VERSION: 2.0.0\r
+--DEFAULT--\r
+array (\r
+ 'maroon' => '#800000',\r
+ 'red' => '#FF0000',\r
+ 'orange' => '#FFA500',\r
+ 'yellow' => '#FFFF00',\r
+ 'olive' => '#808000',\r
+ 'purple' => '#800080',\r
+ 'fuchsia' => '#FF00FF',\r
+ 'white' => '#FFFFFF',\r
+ 'lime' => '#00FF00',\r
+ 'green' => '#008000',\r
+ 'navy' => '#000080',\r
+ 'blue' => '#0000FF',\r
+ 'aqua' => '#00FFFF',\r
+ 'teal' => '#008080',\r
+ 'black' => '#000000',\r
+ 'silver' => '#C0C0C0',\r
+ 'gray' => '#808080',\r
+)\r
+--DESCRIPTION--\r
+\r
+Lookup array of color names to six digit hexadecimal number corresponding\r
+to color, with preceding hash mark. Used when parsing colors.\r
+\r
+\r
--- /dev/null
+Core.ConvertDocumentToFragment\r
+TYPE: bool\r
+DEFAULT: true\r
+--DESCRIPTION--\r
+\r
+This parameter determines whether or not the filter should convert\r
+input that is a full document with html and body tags to a fragment\r
+of just the contents of a body tag. This parameter is simply something\r
+HTML Purifier can do during an edge-case: for most inputs, this\r
+processing is not necessary.\r
+\r
+--ALIASES--\r
+Core.AcceptFullDocuments\r
--- /dev/null
+Core.DirectLexLineNumberSyncInterval\r
+TYPE: int\r
+VERSION: 2.0.0\r
+DEFAULT: 0\r
+--DESCRIPTION--\r
+\r
+<p>\r
+ Specifies the number of tokens the DirectLex line number tracking\r
+ implementations should process before attempting to resyncronize the\r
+ current line count by manually counting all previous new-lines. When\r
+ at 0, this functionality is disabled. Lower values will decrease\r
+ performance, and this is only strictly necessary if the counting\r
+ algorithm is buggy (in which case you should report it as a bug).\r
+ This has no effect when %Core.MaintainLineNumbers is disabled or DirectLex is\r
+ not being used. \r
+</p>\r
+\r
--- /dev/null
+Core.Encoding\r
+TYPE: istring\r
+DEFAULT: 'utf-8'\r
+--DESCRIPTION--\r
+If for some reason you are unable to convert all webpages to UTF-8, you can\r
+use this directive as a stop-gap compatibility change to let HTML Purifier\r
+deal with non UTF-8 input. This technique has notable deficiencies:\r
+absolutely no characters outside of the selected character encoding will be\r
+preserved, not even the ones that have been ampersand escaped (this is due\r
+to a UTF-8 specific <em>feature</em> that automatically resolves all\r
+entities), making it pretty useless for anything except the most I18N-blind\r
+applications, although %Core.EscapeNonASCIICharacters offers fixes this\r
+trouble with another tradeoff. This directive only accepts ISO-8859-1 if\r
+iconv is not enabled.\r
--- /dev/null
+Core.EscapeInvalidChildren\r
+TYPE: bool\r
+DEFAULT: false\r
+--DESCRIPTION--\r
+When true, a child is found that is not allowed in the context of the\r
+parent element will be transformed into text as if it were ASCII. When\r
+false, that element and all internal tags will be dropped, though text will\r
+be preserved. There is no option for dropping the element but preserving\r
+child nodes.\r
--- /dev/null
+Core.EscapeInvalidTags\r
+TYPE: bool\r
+DEFAULT: false\r
+--DESCRIPTION--\r
+When true, invalid tags will be written back to the document as plain text.\r
+Otherwise, they are silently dropped.\r
--- /dev/null
+Core.EscapeNonASCIICharacters\r
+TYPE: bool\r
+VERSION: 1.4.0\r
+DEFAULT: false\r
+--DESCRIPTION--\r
+This directive overcomes a deficiency in %Core.Encoding by blindly\r
+converting all non-ASCII characters into decimal numeric entities before\r
+converting it to its native encoding. This means that even characters that\r
+can be expressed in the non-UTF-8 encoding will be entity-ized, which can\r
+be a real downer for encodings like Big5. It also assumes that the ASCII\r
+repetoire is available, although this is the case for almost all encodings.\r
+Anyway, use UTF-8! \r
--- /dev/null
+Core.HiddenElements\r
+TYPE: lookup\r
+--DEFAULT--\r
+array (\r
+ 'script' => true,\r
+ 'style' => true,\r
+)\r
+--DESCRIPTION--\r
+\r
+<p>\r
+ This directive is a lookup array of elements which should have their\r
+ contents removed when they are not allowed by the HTML definition.\r
+ For example, the contents of a <code>script</code> tag are not \r
+ normally shown in a document, so if script tags are to be removed,\r
+ their contents should be removed to. This is opposed to a <code>b</code>\r
+ tag, which defines some presentational changes but does not hide its\r
+ contents.\r
+</p>\r
+\r
--- /dev/null
+Core.Language\r
+TYPE: string\r
+VERSION: 2.0.0\r
+DEFAULT: 'en'\r
+--DESCRIPTION--\r
+\r
+ISO 639 language code for localizable things in HTML Purifier to use,\r
+which is mainly error reporting. There is currently only an English (en)\r
+translation, so this directive is currently useless.\r
+\r
+\r
--- /dev/null
+Core.LexerImpl\r
+TYPE: mixed/null\r
+VERSION: 2.0.0\r
+DEFAULT: NULL\r
+--DESCRIPTION--\r
+\r
+<p>\r
+ This parameter determines what lexer implementation can be used. The\r
+ valid values are:\r
+</p>\r
+<dl>\r
+ <dt><em>null</em></dt>\r
+ <dd>\r
+ Recommended, the lexer implementation will be auto-detected based on\r
+ your PHP-version and configuration.\r
+ </dd>\r
+ <dt><em>string</em> lexer identifier</dt>\r
+ <dd>\r
+ This is a slim way of manually overridding the implementation.\r
+ Currently recognized values are: DOMLex (the default PHP5\r
+implementation)\r
+ and DirectLex (the default PHP4 implementation). Only use this if\r
+ you know what you are doing: usually, the auto-detection will\r
+ manage things for cases you aren't even aware of.\r
+ </dd>\r
+ <dt><em>object</em> lexer instance</dt>\r
+ <dd>\r
+ Super-advanced: you can specify your own, custom, implementation that\r
+ implements the interface defined by <code>HTMLPurifier_Lexer</code>.\r
+ I may remove this option simply because I don't expect anyone\r
+ to use it.\r
+ </dd>\r
+</dl>\r
--- /dev/null
+Core.MaintainLineNumbers\r
+TYPE: bool/null\r
+VERSION: 2.0.0\r
+DEFAULT: NULL\r
+--DESCRIPTION--\r
+\r
+<p>\r
+ If true, HTML Purifier will add line number information to all tokens.\r
+ This is useful when error reporting is turned on, but can result in\r
+ significant performance degradation and should not be used when\r
+ unnecessary. This directive must be used with the DirectLex lexer,\r
+ as the DOMLex lexer does not (yet) support this functionality. \r
+ If the value is null, an appropriate value will be selected based\r
+ on other configuration. \r
+</p>\r
+\r
--- /dev/null
+Core.RemoveInvalidImg\r
+TYPE: bool\r
+DEFAULT: true\r
+VERSION: 1.3.0\r
+--DESCRIPTION--\r
+\r
+<p>\r
+ This directive enables pre-emptive URI checking in <code>img</code> \r
+ tags, as the attribute validation strategy is not authorized to \r
+ remove elements from the document. Revert to pre-1.3.0 behavior by setting to false.\r
+</p>\r
+\r
--- /dev/null
+Core.RemoveScriptContents\r
+TYPE: bool/null\r
+DEFAULT: NULL\r
+VERSION: 2.0.0\r
+DEPRECATED-VERSION: 2.1.0\r
+DEPRECATED-USE: Core.HiddenElements\r
+--DESCRIPTION--\r
+<p>\r
+ This directive enables HTML Purifier to remove not only script tags\r
+ but all of their contents.\r
+</p>\r
--- /dev/null
+Core\r
+DESCRIPTION: Core features that are always available.\r
--- /dev/null
+Filter.Custom
+TYPE: list
+VERSION: 3.1.0
+DEFAULT: array()
+--DESCRIPTION--
+<p>
+ This directive can be used to add custom filters; it is nearly the
+ equivalent of the now deprecated <code>HTMLPurifier->addFilter()</code>
+ method. Specify an array of concrete implementations.
+</p>
--- /dev/null
+Filter.ExtractStyleBlocks
+TYPE: bool
+VERSION: 3.1.0
+DEFAULT: false
+EXTERNAL: CSSTidy
+--DESCRIPTION--
+<p>
+ This directive turns on the style block extraction filter, which removes
+ <code>style</code> blocks from input HTML, cleans them up with CSSTidy,
+ and places them in the <code>StyleBlocks</code> context variable, for further
+ use by you, usually to be placed in an external stylesheet, or a
+ <code>style</code> block in the <code>head</code> of your document.
+</p>
+<p>
+ Sample usage:
+</p>
+<pre><![CDATA[$config = HTMLPurifier_Config::createDefault();
+$config->set('Filter', 'ExtractStyleBlocks', true);
+$purifier = new HTMLPurifier($config);
+$styles = $purifier->context->get('StyleBlocks');
+foreach ($styles as $style) {
+ echo '<style type="text/css">' . $style . "</style>\n";
+}]]></pre>
+<p>
+ <strong>Warning:</strong> It is possible for a user to mount an
+ imagecrash attack using this CSS. Counter-measures are difficult;
+ it is not simply enough to limit the range of CSS lengths (using
+ relative lengths with many nesting levels allows for large values
+ to be attained without actually specifying them in the stylesheet),
+ and the flexible nature of selectors makes it difficult to selectively
+ disable lengths on image tags (HTML Purifier, however, does disable
+ CSS width and height in inline styling). There are probably two effective
+ counter measures: an explicit width and height set to auto in all
+ images in your document (unlikely) or the disabling of width and
+ height (somewhat reasonable). Whether or not these measures should be
+ used is left to the reader.
+</p>
--- /dev/null
+Filter.YouTube
+TYPE: bool
+VERSION: 3.1.0
+DEFAULT: false
+--DESCRIPTION--
+<p>
+ This directive enables YouTube video embedding in HTML Purifier. Check
+ <a href="http://htmlpurifier.org/docs/enduser-youtube.html">this document
+ on embedding videos</a> for more information on what this filter does.
+</p>
--- /dev/null
+Filter\r
+DESCRIPTION: Directives for turning filters on and off, or specifying custom filters.\r
--- /dev/null
+FilterParam.ExtractStyleBlocksEscaping\r
+TYPE: bool\r
+VERSION: 3.0.0\r
+DEFAULT: true\r
+ALIASES: Filter.ExtractStyleBlocksEscaping\r
+--DESCRIPTION--\r
+\r
+<p>\r
+ Whether or not to escape the dangerous characters <, > and &\r
+ as \3C, \3E and \26, respectively. This is can be safely set to false\r
+ if the contents of StyleBlocks will be placed in an external stylesheet,\r
+ where there is no risk of it being interpreted as HTML. \r
+</p>\r
+\r
--- /dev/null
+FilterParam.ExtractStyleBlocksScope\r
+TYPE: string/null\r
+VERSION: 3.0.0\r
+DEFAULT: NULL\r
+ALIASES: Filter.ExtractStyleBlocksScope\r
+--DESCRIPTION--\r
+\r
+<p>\r
+ If you would like users to be able to define external stylesheets, but\r
+ only allow them to specify CSS declarations for a specific node and\r
+ prevent them from fiddling with other elements, use this directive.\r
+ It accepts any valid CSS selector, and will prepend this to any\r
+ CSS declaration extracted from the document. For example, if this\r
+ directive is set to <code>#user-content</code> and a user uses the\r
+ selector <code>a:hover</code>, the final selector will be\r
+ <code>#user-content a:hover</code>.\r
+</p>\r
+<p>\r
+ The comma shorthand may be used; consider the above example, with\r
+ <code>#user-content, #user-content2</code>, the final selector will\r
+ be <code>#user-content a:hover, #user-content2 a:hover</code>.\r
+</p>\r
+<p>\r
+ <strong>Warning:</strong> It is possible for users to bypass this measure\r
+ using a naughty + selector. This is a bug in CSS Tidy 1.3, not HTML\r
+ Purifier, and I am working to get it fixed. Until then, HTML Purifier\r
+ performs a basic check to prevent this.\r
+</p>\r
--- /dev/null
+FilterParam.ExtractStyleBlocksTidyImpl
+TYPE: mixed/null
+VERSION: 3.1.0
+DEFAULT: NULL
+--DESCRIPTION--
+<p>
+ If left NULL, HTML Purifier will attempt to instantiate a <code>csstidy</code>
+ class to use for internal cleaning. This will usually be good enough.
+</p>
+<p>
+ However, for trusted user input, you can set this to <code>false</code> to
+ disable cleaning. In addition, you can supply your own concrete implementation
+ of Tidy's interface to use, although I don't know why you'd want to do that.
+</p>
--- /dev/null
+FilterParam
+DESCRIPTION: Configuration for filters.
--- /dev/null
+HTML.Allowed\r
+TYPE: itext/null\r
+VERSION: 2.0.0\r
+DEFAULT: NULL\r
+--DESCRIPTION--\r
+\r
+<p>\r
+ This is a convenience directive that rolls the functionality of\r
+ %HTML.AllowedElements and %HTML.AllowedAttributes into one directive.\r
+ Specify elements and attributes that are allowed using:\r
+ <code>element1[attr1|attr2],element2...</code>. You can also use\r
+ newlines instead of commas to separate elements.\r
+</p>\r
+<p>\r
+ <strong>Warning</strong>:\r
+ All of the constraints on the component directives are still enforced.\r
+ The syntax is a <em>subset</em> of TinyMCE's <code>valid_elements</code>\r
+ whitelist: directly copy-pasting it here will probably result in\r
+ broken whitelists. If %HTML.AllowedElements or %HTML.AllowedAttributes\r
+ are set, this directive has no effect.\r
+</p>\r
+\r
--- /dev/null
+HTML.AllowedAttributes\r
+TYPE: lookup/null\r
+VERSION: 1.3.0\r
+DEFAULT: NULL\r
+--DESCRIPTION--\r
+\r
+<p>\r
+ If HTML Purifier's attribute set is unsatisfactory, overload it! \r
+ The syntax is "tag.attr" or "*.attr" for the global attributes \r
+ (style, id, class, dir, lang, xml:lang).\r
+</p>\r
+<p>\r
+ <strong>Warning:</strong> If another directive conflicts with the \r
+ elements here, <em>that</em> directive will win and override. For \r
+ example, %HTML.EnableAttrID will take precedence over *.id in this \r
+ directive. You must set that directive to true before you can use \r
+ IDs at all. \r
+</p>\r
+\r
--- /dev/null
+HTML.AllowedElements\r
+TYPE: lookup/null\r
+VERSION: 1.3.0\r
+DEFAULT: NULL\r
+--DESCRIPTION--\r
+<p>\r
+ If HTML Purifier's tag set is unsatisfactory for your needs, you \r
+ can overload it with your own list of tags to allow. Note that this \r
+ method is subtractive: it does its job by taking away from HTML Purifier \r
+ usual feature set, so you cannot add a tag that HTML Purifier never \r
+ supported in the first place (like embed, form or head). If you \r
+ change this, you probably also want to change %HTML.AllowedAttributes.\r
+</p>\r
+<p>\r
+ <strong>Warning:</strong> If another directive conflicts with the \r
+ elements here, <em>that</em> directive will win and override. \r
+</p>\r
+\r
--- /dev/null
+HTML.AllowedModules\r
+TYPE: lookup/null\r
+VERSION: 2.0.0\r
+DEFAULT: NULL\r
+--DESCRIPTION--\r
+\r
+<p>\r
+ A doctype comes with a set of usual modules to use. Without having\r
+ to mucking about with the doctypes, you can quickly activate or\r
+ disable these modules by specifying which modules you wish to allow\r
+ with this directive. This is most useful for unit testing specific\r
+ modules, although end users may find it useful for their own ends.\r
+</p>\r
+<p>\r
+ If you specify a module that does not exist, the manager will silently\r
+ fail to use it, so be careful! User-defined modules are not affected\r
+ by this directive. Modules defined in %HTML.CoreModules are not\r
+ affected by this directive. \r
+</p>\r
+\r
--- /dev/null
+HTML.BlockWrapper\r
+TYPE: string\r
+VERSION: 1.3.0\r
+DEFAULT: 'p'\r
+--DESCRIPTION--\r
+\r
+<p>\r
+ String name of element to wrap inline elements that are inside a block\r
+ context. This only occurs in the children of blockquote in strict mode.\r
+</p>\r
+<p>\r
+ Example: by default value,\r
+ <code><blockquote>Foo</blockquote></code> would become\r
+ <code><blockquote><p>Foo</p></blockquote></code>.\r
+ The <code><p></code> tags can be replaced with whatever you desire,\r
+ as long as it is a block level element. \r
+</p>\r
+\r
--- /dev/null
+HTML.CoreModules\r
+TYPE: lookup\r
+VERSION: 2.0.0\r
+--DEFAULT--\r
+array (\r
+ 'Structure' => true,\r
+ 'Text' => true,\r
+ 'Hypertext' => true,\r
+ 'List' => true,\r
+ 'NonXMLCommonAttributes' => true,\r
+ 'XMLCommonAttributes' => true,\r
+ 'CommonAttributes' => true,\r
+)\r
+--DESCRIPTION--\r
+\r
+<p>\r
+ Certain modularized doctypes (XHTML, namely), have certain modules\r
+ that must be included for the doctype to be an conforming document\r
+ type: put those modules here. By default, XHTML's core modules\r
+ are used. You can set this to a blank array to disable core module\r
+ protection, but this is not recommended. \r
+</p>\r
+\r
--- /dev/null
+HTML.CustomDoctype\r
+TYPE: string/null\r
+VERSION: 2.0.1\r
+DEFAULT: NULL\r
+--DESCRIPTION--\r
+\r
+A custom doctype for power-users who defined there own document\r
+type. This directive only applies when %HTML.Doctype is blank.\r
+\r
+\r
--- /dev/null
+HTML.DefinitionID\r
+TYPE: string/null\r
+DEFAULT: NULL\r
+VERSION: 2.0.0\r
+--DESCRIPTION--\r
+\r
+<p>\r
+ Unique identifier for a custom-built HTML definition. If you edit\r
+ the raw version of the HTMLDefinition, introducing changes that the\r
+ configuration object does not reflect, you must specify this variable.\r
+ If you change your custom edits, you should change this directive, or\r
+ clear your cache. Example:\r
+</p>\r
+<pre>\r
+$config = HTMLPurifier_Config::createDefault();\r
+$config->set('HTML', 'DefinitionID', '1');\r
+$def = $config->getHTMLDefinition();\r
+$def->addAttribute('a', 'tabindex', 'Number');\r
+</pre>\r
+<p>\r
+ In the above example, the configuration is still at the defaults, but\r
+ using the advanced API, an extra attribute has been added. The\r
+ configuration object normally has no way of knowing that this change\r
+ has taken place, so it needs an extra directive: %HTML.DefinitionID.\r
+ If someone else attempts to use the default configuration, these two\r
+ pieces of code will not clobber each other in the cache, since one has\r
+ an extra directive attached to it.\r
+</p>\r
+<p>\r
+ You <em>must</em> specify a value to this directive to use the\r
+ advanced API features.\r
+</p>\r
+\r
--- /dev/null
+HTML.DefinitionRev\r
+TYPE: int\r
+VERSION: 2.0.0\r
+DEFAULT: 1\r
+--DESCRIPTION--\r
+\r
+<p>\r
+ Revision identifier for your custom definition specified in\r
+ %HTML.DefinitionID. This serves the same purpose: uniquely identifying\r
+ your custom definition, but this one does so in a chronological\r
+ context: revision 3 is more up-to-date then revision 2. Thus, when\r
+ this gets incremented, the cache handling is smart enough to clean\r
+ up any older revisions of your definition as well as flush the\r
+ cache. \r
+</p>\r
+\r
--- /dev/null
+HTML.Doctype\r
+TYPE: string/null\r
+DEFAULT: NULL\r
+--DESCRIPTION--\r
+Doctype to use during filtering. Technically speaking this is not actually\r
+a doctype (as it does not identify a corresponding DTD), but we are using\r
+this name for sake of simplicity. When non-blank, this will override any\r
+older directives like %HTML.XHTML or %HTML.Strict.\r
+--ALLOWED--\r
+'HTML 4.01 Transitional', 'HTML 4.01 Strict', 'XHTML 1.0 Transitional', 'XHTML 1.0 Strict', 'XHTML 1.1'\r
--- /dev/null
+HTML.ForbiddenAttributes
+TYPE: lookup
+VERSION: 3.1.0
+DEFAULT: array()
+--DESCRIPTION--
+<p>
+ While this directive is similar to %HTML.AllowedAttributes, for
+ forwards-compatibility with XML, this attribute has a different syntax. Instead of
+ <code>tag.attr</code>, use <code>tag@attr</code>. To disallow <code>href</code>
+ attributes in <code>a</code> tags, set this directive to
+ <code>a@href</code>. You can also disallow an attribute globally with
+ <code>attr</code> or <code>*@attr</code> (either syntax is fine; the latter
+ is provided for consistency with %HTML.AllowedAttributes).
+</p>
+<p>
+ <strong>Warning:</strong> This directive complements %HTML.ForbiddenElements,
+ accordingly, check
+ out that directive for a discussion of why you
+ should think twice before using this directive.
+</p>
--- /dev/null
+HTML.ForbiddenElements
+TYPE: lookup
+VERSION: 3.1.0
+DEFAULT: array()
+--DESCRIPTION--
+<p>
+ This was, perhaps, the most requested feature ever in HTML
+ Purifier. Please don't abuse it! This is the logical inverse of
+ %HTML.AllowedElements, and it will override that directive, or any
+ other directive.
+</p>
+<p>
+ If possible, %HTML.Allowed is recommended over this directive, because it
+ can sometimes be difficult to tell whether or not you've forbidden all of
+ the behavior you would like to disallow. If you forbid <code>img</code>
+ with the expectation of preventing images on your site, you'll be in for
+ a nasty surprise when people start using the <code>background-image</code>
+ CSS property.
+</p>
--- /dev/null
+HTML.Parent\r
+TYPE: string\r
+VERSION: 1.3.0\r
+DEFAULT: 'div'\r
+--DESCRIPTION--\r
+\r
+<p>\r
+ String name of element that HTML fragment passed to library will be \r
+ inserted in. An interesting variation would be using span as the \r
+ parent element, meaning that only inline tags would be allowed. \r
+</p>\r
+\r
--- /dev/null
+HTML.Proprietary
+TYPE: bool
+VERSION: 3.1.0
+DEFAULT: false
+--DESCRIPTION--
+<p>
+ Whether or not to allow proprietary elements and attributes in your
+ documents, as per <code>HTMLPurifier_HTMLModule_Proprietary</code>.
+ <strong>Warning:</strong> This can cause your documents to stop
+ validating!
+</p>
--- /dev/null
+HTML.Strict\r
+TYPE: bool\r
+VERSION: 1.3.0\r
+DEFAULT: false\r
+DEPRECATED-VERSION: 1.7.0\r
+DEPRECATED-USE: HTML.Doctype\r
+--DESCRIPTION--\r
+Determines whether or not to use Transitional (loose) or Strict rulesets.\r
--- /dev/null
+HTML.TidyAdd\r
+TYPE: lookup\r
+VERSION: 2.0.0\r
+DEFAULT: array()\r
+--DESCRIPTION--\r
+\r
+Fixes to add to the default set of Tidy fixes as per your level. \r
+\r
--- /dev/null
+HTML.TidyLevel\r
+TYPE: string\r
+VERSION: 2.0.0\r
+DEFAULT: 'medium'\r
+--DESCRIPTION--\r
+\r
+<p>General level of cleanliness the Tidy module should enforce.\r
+There are four allowed values:</p>\r
+<dl>\r
+ <dt>none</dt>\r
+ <dd>No extra tidying should be done</dd>\r
+ <dt>light</dt>\r
+ <dd>Only fix elements that would be discarded otherwise due to\r
+ lack of support in doctype</dd>\r
+ <dt>medium</dt>\r
+ <dd>Enforce best practices</dd>\r
+ <dt>heavy</dt>\r
+ <dd>Transform all deprecated elements and attributes to standards\r
+ compliant equivalents</dd>\r
+</dl>\r
+\r
+--ALLOWED--\r
+'none', 'light', 'medium', 'heavy'\r
--- /dev/null
+HTML.TidyRemove\r
+TYPE: lookup\r
+VERSION: 2.0.0\r
+DEFAULT: array()\r
+--DESCRIPTION--\r
+\r
+Fixes to remove from the default set of Tidy fixes as per your level. \r
+\r
--- /dev/null
+HTML.Trusted\r
+TYPE: bool\r
+VERSION: 2.0.0\r
+DEFAULT: false\r
+--DESCRIPTION--\r
+Indicates whether or not the user input is trusted or not. If the input is\r
+trusted, a more expansive set of allowed tags and attributes will be used. \r
--- /dev/null
+HTML.XHTML\r
+TYPE: bool\r
+DEFAULT: true\r
+VERSION: 1.1.0\r
+DEPRECATED-VERSION: 1.7.0\r
+DEPRECATED-USE: HTML.Doctype\r
+--DESCRIPTION--\r
+Determines whether or not output is XHTML 1.0 or HTML 4.01 flavor.\r
+--ALIASES--\r
+Core.XHTML\r
--- /dev/null
+HTML\r
+DESCRIPTION: Configuration regarding allowed HTML.\r
--- /dev/null
+Output.CommentScriptContents\r
+TYPE: bool\r
+VERSION: 2.0.0\r
+DEFAULT: true\r
+--DESCRIPTION--\r
+Determines whether or not HTML Purifier should attempt to fix up the\r
+contents of script tags for legacy browsers with comments. \r
+--ALIASES--\r
+Core.CommentScriptContents\r
--- /dev/null
+Output.Newline\r
+TYPE: string/null\r
+VERSION: 2.0.1\r
+DEFAULT: NULL\r
+--DESCRIPTION--\r
+\r
+<p>\r
+ Newline string to format final output with. If left null, HTML Purifier\r
+ will auto-detect the default newline type of the system and use that;\r
+ you can manually override it here. Remember, \r\n is Windows, \r\r
+ is Mac, and \n is Unix. \r
+</p>\r
+\r
--- /dev/null
+Output.TidyFormat\r
+TYPE: bool\r
+VERSION: 1.1.1\r
+DEFAULT: false\r
+--DESCRIPTION--\r
+<p>\r
+ Determines whether or not to run Tidy on the final output for pretty \r
+ formatting reasons, such as indentation and wrap.\r
+</p>\r
+<p>\r
+ This can greatly improve readability for editors who are hand-editing\r
+ the HTML, but is by no means necessary as HTML Purifier has already\r
+ fixed all major errors the HTML may have had. Tidy is a non-default\r
+ extension, and this directive will silently fail if Tidy is not\r
+ available.\r
+</p>\r
+<p>\r
+ If you are looking to make the overall look of your page's source\r
+ better, I recommend running Tidy on the entire page rather than just\r
+ user-content (after all, the indentation relative to the containing\r
+ blocks will be incorrect).\r
+</p>\r
+--ALIASES--\r
+Core.TidyFormat\r
--- /dev/null
+Output\r
+DESCRIPTION: Configuration relating to the generation of (X)HTML.\r
--- /dev/null
+Test.ForceNoIconv\r
+TYPE: bool\r
+DEFAULT: false\r
+--DESCRIPTION--\r
+When set to true, HTMLPurifier_Encoder will act as if iconv does not exist\r
+and use only pure PHP implementations.\r
--- /dev/null
+Test\r
+DESCRIPTION: Developer testing configuration for our unit tests.\r
--- /dev/null
+URI.AllowedSchemes\r
+TYPE: lookup\r
+--DEFAULT--\r
+array (\r
+ 'http' => true,\r
+ 'https' => true,\r
+ 'mailto' => true,\r
+ 'ftp' => true,\r
+ 'nntp' => true,\r
+ 'news' => true,\r
+)\r
+--DESCRIPTION--\r
+Whitelist that defines the schemes that a URI is allowed to have. This\r
+prevents XSS attacks from using pseudo-schemes like javascript or mocha.\r
--- /dev/null
+URI.Base\r
+TYPE: string/null\r
+VERSION: 2.1.0\r
+DEFAULT: NULL\r
+--DESCRIPTION--\r
+\r
+<p>\r
+ The base URI is the URI of the document this purified HTML will be\r
+ inserted into. This information is important if HTML Purifier needs\r
+ to calculate absolute URIs from relative URIs, such as when %URI.MakeAbsolute\r
+ is on. You may use a non-absolute URI for this value, but behavior\r
+ may vary (%URI.MakeAbsolute deals nicely with both absolute and \r
+ relative paths, but forwards-compatibility is not guaranteed).\r
+ <strong>Warning:</strong> If set, the scheme on this URI\r
+ overrides the one specified by %URI.DefaultScheme. \r
+</p>\r
+\r
--- /dev/null
+URI.DefaultScheme\r
+TYPE: string\r
+DEFAULT: 'http'\r
+--DESCRIPTION--\r
+\r
+<p>\r
+ Defines through what scheme the output will be served, in order to \r
+ select the proper object validator when no scheme information is present.\r
+</p>\r
+\r
--- /dev/null
+URI.DefinitionID\r
+TYPE: string/null\r
+VERSION: 2.1.0\r
+DEFAULT: NULL\r
+--DESCRIPTION--\r
+\r
+<p>\r
+ Unique identifier for a custom-built URI definition. If you want\r
+ to add custom URIFilters, you must specify this value.\r
+</p>\r
+\r
--- /dev/null
+URI.DefinitionRev\r
+TYPE: int\r
+VERSION: 2.1.0\r
+DEFAULT: 1\r
+--DESCRIPTION--\r
+\r
+<p>\r
+ Revision identifier for your custom definition. See\r
+ %HTML.DefinitionRev for details. \r
+</p>\r
+\r
--- /dev/null
+URI.Disable\r
+TYPE: bool\r
+VERSION: 1.3.0\r
+DEFAULT: false\r
+--DESCRIPTION--\r
+\r
+<p>\r
+ Disables all URIs in all forms. Not sure why you'd want to do that \r
+ (after all, the Internet's founded on the notion of a hyperlink). \r
+</p>\r
+\r
+--ALIASES--\r
+Attr.DisableURI\r
--- /dev/null
+URI.DisableExternal\r
+TYPE: bool\r
+VERSION: 1.2.0\r
+DEFAULT: false\r
+--DESCRIPTION--\r
+Disables links to external websites. This is a highly effective anti-spam\r
+and anti-pagerank-leech measure, but comes at a hefty price: nolinks or\r
+images outside of your domain will be allowed. Non-linkified URIs will\r
+still be preserved. If you want to be able to link to subdomains or use\r
+absolute URIs, specify %URI.Host for your website. \r
--- /dev/null
+URI.DisableExternalResources\r
+TYPE: bool\r
+VERSION: 1.3.0\r
+DEFAULT: false\r
+--DESCRIPTION--\r
+Disables the embedding of external resources, preventing users from\r
+embedding things like images from other hosts. This prevents access\r
+tracking (good for email viewers), bandwidth leeching, cross-site request\r
+forging, goatse.cx posting, and other nasties, but also results in a loss\r
+of end-user functionality (they can't directly post a pic they posted from\r
+Flickr anymore). Use it if you don't have a robust user-content moderation\r
+team. \r
--- /dev/null
+URI.DisableResources\r
+TYPE: bool\r
+VERSION: 1.3.0\r
+DEFAULT: false\r
+--DESCRIPTION--\r
+\r
+<p>\r
+ Disables embedding resources, essentially meaning no pictures. You can\r
+ still link to them though. See %URI.DisableExternalResources for why \r
+ this might be a good idea. \r
+</p>\r
+\r
--- /dev/null
+URI.Host\r
+TYPE: string/null\r
+VERSION: 1.2.0\r
+DEFAULT: NULL\r
+--DESCRIPTION--\r
+\r
+<p>\r
+ Defines the domain name of the server, so we can determine whether or \r
+ an absolute URI is from your website or not. Not strictly necessary, \r
+ as users should be using relative URIs to reference resources on your \r
+ website. It will, however, let you use absolute URIs to link to \r
+ subdomains of the domain you post here: i.e. example.com will allow \r
+ sub.example.com. However, higher up domains will still be excluded: \r
+ if you set %URI.Host to sub.example.com, example.com will be blocked. \r
+ <strong>Note:</strong> This directive overrides %URI.Base because\r
+ a given page may be on a sub-domain, but you wish HTML Purifier to be\r
+ more relaxed and allow some of the parent domains too.\r
+</p>\r
+\r
--- /dev/null
+URI.HostBlacklist\r
+TYPE: list\r
+VERSION: 1.3.0\r
+DEFAULT: array()\r
+--DESCRIPTION--\r
+List of strings that are forbidden in the host of any URI. Use it to kill\r
+domain names of spam, etc. Note that it will catch anything in the domain,\r
+so <tt>moo.com</tt> will catch <tt>moo.com.example.com</tt>. \r
--- /dev/null
+URI.MakeAbsolute\r
+TYPE: bool\r
+VERSION: 2.1.0\r
+DEFAULT: false\r
+--DESCRIPTION--\r
+\r
+<p>\r
+ Converts all URIs into absolute forms. This is useful when the HTML\r
+ being filtered assumes a specific base path, but will actually be\r
+ viewed in a different context (and setting an alternate base URI is\r
+ not possible). %URI.Base must be set for this directive to work.\r
+</p>\r
--- /dev/null
+URI.Munge\r
+TYPE: string/null\r
+VERSION: 1.3.0\r
+DEFAULT: NULL\r
+--DESCRIPTION--\r
+\r
+<p>\r
+ Munges all browsable (usually http, https and ftp)\r
+ absolute URI's into another URI, usually a URI redirection service.\r
+ This directive accepts a URI, formatted with a <code>%s</code> where \r
+ the url-encoded original URI should be inserted (sample: \r
+ <code>http://www.google.com/url?q=%s</code>).\r
+</p>\r
+<p>\r
+ Uses for this directive:\r
+</p>\r
+<ul>\r
+ <li>\r
+ Prevent PageRank leaks, while being fairly transparent \r
+ to users (you may also want to add some client side JavaScript to \r
+ override the text in the statusbar). <strong>Notice</strong>:\r
+ Many security experts believe that this form of protection does\r
+not deter spam-bots. \r
+ </li>\r
+ <li>\r
+ Redirect users to a splash page telling them they are leaving your\r
+ website. While this is poor usability practice, it is often\r
+mandated\r
+ in corporate environments.\r
+ </li>\r
+</ul>\r
--- /dev/null
+URI.OverrideAllowedSchemes\r
+TYPE: bool\r
+DEFAULT: true\r
+--DESCRIPTION--\r
+If this is set to true (which it is by default), you can override\r
+%URI.AllowedSchemes by simply registering a HTMLPurifier_URIScheme to the\r
+registry. If false, you will also have to update that directive in order\r
+to add more schemes.\r
--- /dev/null
+URI\r
+DESCRIPTION: Features regarding Uniform Resource Identifiers.\r
--- /dev/null
+name = "HTML Purifier"
<?php
-// common defs that we'll support by default
-require_once 'HTMLPurifier/ChildDef.php';
-require_once 'HTMLPurifier/ChildDef/Empty.php';
-require_once 'HTMLPurifier/ChildDef/Required.php';
-require_once 'HTMLPurifier/ChildDef/Optional.php';
-require_once 'HTMLPurifier/ChildDef/Custom.php';
-
-// NOT UNIT TESTED!!!
-
+/**
+ * @todo Unit test
+ */
class HTMLPurifier_ContentSets
{
/**
* List of content set strings (pipe seperators) indexed by name.
- * @public
*/
- var $info = array();
+ public $info = array();
/**
* List of content set lookups (element => true) indexed by name.
* @note This is in HTMLPurifier_HTMLDefinition->info_content_sets
- * @public
*/
- var $lookup = array();
+ public $lookup = array();
/**
* Synchronized list of defined content sets (keys of info)
*/
- var $keys = array();
+ protected $keys = array();
/**
* Synchronized list of defined content values (values of info)
*/
- var $values = array();
+ protected $values = array();
/**
* Merges in module's content sets, expands identifiers in the content
* sets and populates the keys, values and lookup member variables.
* @param $modules List of HTMLPurifier_HTMLModule
*/
- function HTMLPurifier_ContentSets($modules) {
+ public function __construct($modules) {
if (!is_array($modules)) $modules = array($modules);
// populate content_sets based on module hints
// sorry, no way of overloading
* @param $def HTMLPurifier_ElementDef reference
* @param $module Module that defined the ElementDef
*/
- function generateChildDef(&$def, $module) {
+ public function generateChildDef(&$def, $module) {
if (!empty($def->child)) return; // already done!
$content_model = $def->content_model;
if (is_string($content_model)) {
* @param $def HTMLPurifier_ElementDef to have ChildDef extracted
* @return HTMLPurifier_ChildDef corresponding to ElementDef
*/
- function getChildDef($def, $module) {
+ public function getChildDef($def, $module) {
$value = $def->content_model;
if (is_object($value)) {
trigger_error(
* @param $string List of elements
* @return Lookup array of elements
*/
- function convertToLookup($string) {
+ protected function convertToLookup($string) {
$array = explode('|', str_replace(' ', '', $string));
$ret = array();
foreach ($array as $i => $k) {
* Registry object that contains information about the current context.
* @warning Is a bit buggy when variables are set to null: it thinks
* they don't exist! So use false instead, please.
+ * @note Since the variables Context deals with may not be objects,
+ * references are very important here! Do not remove!
*/
class HTMLPurifier_Context
{
/**
* Private array that stores the references.
- * @private
*/
- var $_storage = array();
+ private $_storage = array();
/**
* Registers a variable into the context.
* @param $name String name
- * @param $ref Variable to be registered
+ * @param $ref Reference to variable to be registered
*/
- function register($name, &$ref) {
+ public function register($name, &$ref) {
if (isset($this->_storage[$name])) {
trigger_error("Name $name produces collision, cannot re-register",
E_USER_ERROR);
* @param $name String name
* @param $ignore_error Boolean whether or not to ignore error
*/
- function &get($name, $ignore_error = false) {
+ public function &get($name, $ignore_error = false) {
if (!isset($this->_storage[$name])) {
if (!$ignore_error) {
trigger_error("Attempted to retrieve non-existent variable $name",
* Destorys a variable in the context.
* @param $name String name
*/
- function destroy($name) {
+ public function destroy($name) {
if (!isset($this->_storage[$name])) {
trigger_error("Attempted to destroy non-existent variable $name",
E_USER_ERROR);
* Checks whether or not the variable exists.
* @param $name String name
*/
- function exists($name) {
+ public function exists($name) {
return isset($this->_storage[$name]);
}
* Loads a series of variables from an associative array
* @param $context_array Assoc array of variables to load
*/
- function loadArray(&$context_array) {
+ public function loadArray($context_array) {
foreach ($context_array as $key => $discard) {
$this->register($key, $context_array[$key]);
}
* Super-class for definition datatype objects, implements serialization
* functions for the class.
*/
-class HTMLPurifier_Definition
+abstract class HTMLPurifier_Definition
{
/**
* Has setup() been called yet?
*/
- var $setup = false;
+ public $setup = false;
/**
* What type of definition is it?
*/
- var $type;
+ public $type;
/**
* Sets up the definition object into the final form, something
* not done by the constructor
* @param $config HTMLPurifier_Config instance
*/
- function doSetup($config) {
- trigger_error('Cannot call abstract method', E_USER_ERROR);
- }
+ abstract protected function doSetup($config);
/**
* Setup function that aborts if already setup
* @param $config HTMLPurifier_Config instance
*/
- function setup($config) {
+ public function setup($config) {
if ($this->setup) return;
$this->setup = true;
$this->doSetup($config);
<?php
-require_once 'HTMLPurifier/DefinitionCache/Serializer.php';
-require_once 'HTMLPurifier/DefinitionCache/Null.php';
-
-require_once 'HTMLPurifier/DefinitionCache/Decorator.php';
-require_once 'HTMLPurifier/DefinitionCache/Decorator/Memory.php';
-require_once 'HTMLPurifier/DefinitionCache/Decorator/Cleanup.php';
-
/**
* Abstract class representing Definition cache managers that implements
* useful common methods and is a factory.
- * @todo Get some sort of versioning variable so the library can easily
- * invalidate the cache with a new version
- * @todo Make the test runner cache aware and allow the user to easily
- * flush the cache
* @todo Create a separate maintenance file advanced users can use to
* cache their custom HTMLDefinition, which can be loaded
* via a configuration directive
* @todo Implement memcached
*/
-class HTMLPurifier_DefinitionCache
+abstract class HTMLPurifier_DefinitionCache
{
- var $type;
+ public $type;
/**
* @param $name Type of definition objects this instance of the
* cache will handle.
*/
- function HTMLPurifier_DefinitionCache($type) {
+ public function __construct($type) {
$this->type = $type;
}
* Generates a unique identifier for a particular configuration
* @param Instance of HTMLPurifier_Config
*/
- function generateKey($config) {
- return $config->version . '-' . // possibly replace with function calls
- $config->getBatchSerial($this->type) . '-' .
+ public function generateKey($config) {
+ return $config->version . ',' . // possibly replace with function calls
+ $config->getBatchSerial($this->type) . ',' .
$config->get($this->type, 'DefinitionRev');
}
* @param $key Key to test
* @param $config Instance of HTMLPurifier_Config to test against
*/
- function isOld($key, $config) {
- if (substr_count($key, '-') < 2) return true;
- list($version, $hash, $revision) = explode('-', $key, 3);
+ public function isOld($key, $config) {
+ if (substr_count($key, ',') < 2) return true;
+ list($version, $hash, $revision) = explode(',', $key, 3);
$compare = version_compare($version, $config->version);
// version mismatch, is always old
if ($compare != 0) return true;
* @param $def Definition object to check
* @return Boolean true if good, false if not
*/
- function checkDefType($def) {
+ public function checkDefType($def) {
if ($def->type !== $this->type) {
trigger_error("Cannot use definition of type {$def->type} in cache for {$this->type}");
return false;
/**
* Adds a definition object to the cache
*/
- function add($def, $config) {
- trigger_error('Cannot call abstract method', E_USER_ERROR);
- }
+ abstract public function add($def, $config);
/**
* Unconditionally saves a definition object to the cache
*/
- function set($def, $config) {
- trigger_error('Cannot call abstract method', E_USER_ERROR);
- }
+ abstract public function set($def, $config);
/**
* Replace an object in the cache
*/
- function replace($def, $config) {
- trigger_error('Cannot call abstract method', E_USER_ERROR);
- }
+ abstract public function replace($def, $config);
/**
* Retrieves a definition object from the cache
*/
- function get($config) {
- trigger_error('Cannot call abstract method', E_USER_ERROR);
- }
+ abstract public function get($config);
/**
* Removes a definition object to the cache
*/
- function remove($config) {
- trigger_error('Cannot call abstract method', E_USER_ERROR);
- }
+ abstract public function remove($config);
/**
* Clears all objects from cache
*/
- function flush($config) {
- trigger_error('Cannot call abstract method', E_USER_ERROR);
- }
+ abstract public function flush($config);
/**
* Clears all expired (older version or revision) objects from cache
* not interfere with other Definition types, and cleanup()
* should not be repeatedly called by userland code.
*/
- function cleanup($config) {
- trigger_error('Cannot call abstract method', E_USER_ERROR);
- }
+ abstract public function cleanup($config);
+
}
<?php
-require_once 'HTMLPurifier/DefinitionCache.php';
-
class HTMLPurifier_DefinitionCache_Decorator extends HTMLPurifier_DefinitionCache
{
/**
* Cache object we are decorating
*/
- var $cache;
+ public $cache;
- function HTMLPurifier_DefinitionCache_Decorator() {}
+ public function __construct() {}
/**
* Lazy decorator function
* @param $cache Reference to cache object to decorate
*/
- function decorate(&$cache) {
+ public function decorate(&$cache) {
$decorator = $this->copy();
// reference is necessary for mocks in PHP 4
$decorator->cache =& $cache;
/**
* Cross-compatible clone substitute
*/
- function copy() {
+ public function copy() {
return new HTMLPurifier_DefinitionCache_Decorator();
}
- function add($def, $config) {
+ public function add($def, $config) {
return $this->cache->add($def, $config);
}
- function set($def, $config) {
+ public function set($def, $config) {
return $this->cache->set($def, $config);
}
- function replace($def, $config) {
+ public function replace($def, $config) {
return $this->cache->replace($def, $config);
}
- function get($config) {
+ public function get($config) {
return $this->cache->get($config);
}
- function flush($config) {
+ public function remove($config) {
+ return $this->cache->remove($config);
+ }
+
+ public function flush($config) {
return $this->cache->flush($config);
}
- function cleanup($config) {
+ public function cleanup($config) {
return $this->cache->cleanup($config);
}
<?php
-require_once 'HTMLPurifier/DefinitionCache/Decorator.php';
-
/**
* Definition cache decorator class that cleans up the cache
* whenever there is a cache miss.
HTMLPurifier_DefinitionCache_Decorator
{
- var $name = 'Cleanup';
+ public $name = 'Cleanup';
- function copy() {
+ public function copy() {
return new HTMLPurifier_DefinitionCache_Decorator_Cleanup();
}
- function add($def, $config) {
+ public function add($def, $config) {
$status = parent::add($def, $config);
if (!$status) parent::cleanup($config);
return $status;
}
- function set($def, $config) {
+ public function set($def, $config) {
$status = parent::set($def, $config);
if (!$status) parent::cleanup($config);
return $status;
}
- function replace($def, $config) {
+ public function replace($def, $config) {
$status = parent::replace($def, $config);
if (!$status) parent::cleanup($config);
return $status;
}
- function get($config) {
+ public function get($config) {
$ret = parent::get($config);
if (!$ret) parent::cleanup($config);
return $ret;
<?php
-require_once 'HTMLPurifier/DefinitionCache/Decorator.php';
-
/**
* Definition cache decorator class that saves all cache retrievals
* to PHP's memory; good for unit tests or circumstances where
HTMLPurifier_DefinitionCache_Decorator
{
- var $definitions;
- var $name = 'Memory';
+ protected $definitions;
+ public $name = 'Memory';
- function copy() {
+ public function copy() {
return new HTMLPurifier_DefinitionCache_Decorator_Memory();
}
- function add($def, $config) {
+ public function add($def, $config) {
$status = parent::add($def, $config);
if ($status) $this->definitions[$this->generateKey($config)] = $def;
return $status;
}
- function set($def, $config) {
+ public function set($def, $config) {
$status = parent::set($def, $config);
if ($status) $this->definitions[$this->generateKey($config)] = $def;
return $status;
}
- function replace($def, $config) {
+ public function replace($def, $config) {
$status = parent::replace($def, $config);
if ($status) $this->definitions[$this->generateKey($config)] = $def;
return $status;
}
- function get($config) {
+ public function get($config) {
$key = $this->generateKey($config);
if (isset($this->definitions[$key])) return $this->definitions[$key];
$this->definitions[$key] = parent::get($config);
<?php
-require_once 'HTMLPurifier/DefinitionCache.php';
-
/**
* Null cache object to use when no caching is on.
*/
class HTMLPurifier_DefinitionCache_Null extends HTMLPurifier_DefinitionCache
{
- function add($def, $config) {
+ public function add($def, $config) {
+ return false;
+ }
+
+ public function set($def, $config) {
return false;
}
- function set($def, $config) {
+ public function replace($def, $config) {
return false;
}
- function replace($def, $config) {
+ public function remove($config) {
return false;
}
- function get($config) {
+ public function get($config) {
return false;
}
- function flush($config) {
+ public function flush($config) {
return false;
}
- function cleanup($config) {
+ public function cleanup($config) {
return false;
}
<?php
-require_once 'HTMLPurifier/DefinitionCache.php';
-
-HTMLPurifier_ConfigSchema::define(
- 'Cache', 'SerializerPath', null, 'string/null', '
-<p>
- Absolute path with no trailing slash to store serialized definitions in.
- Default is within the
- HTML Purifier library inside DefinitionCache/Serializer. This
- path must be writable by the webserver. This directive has been
- available since 2.0.0.
-</p>
-');
-
class HTMLPurifier_DefinitionCache_Serializer extends
HTMLPurifier_DefinitionCache
{
- function add($def, $config) {
+ public function add($def, $config) {
if (!$this->checkDefType($def)) return;
$file = $this->generateFilePath($config);
if (file_exists($file)) return false;
return $this->_write($file, serialize($def));
}
- function set($def, $config) {
+ public function set($def, $config) {
if (!$this->checkDefType($def)) return;
$file = $this->generateFilePath($config);
if (!$this->_prepareDir($config)) return false;
return $this->_write($file, serialize($def));
}
- function replace($def, $config) {
+ public function replace($def, $config) {
if (!$this->checkDefType($def)) return;
$file = $this->generateFilePath($config);
if (!file_exists($file)) return false;
return $this->_write($file, serialize($def));
}
- function get($config) {
+ public function get($config) {
$file = $this->generateFilePath($config);
if (!file_exists($file)) return false;
return unserialize(file_get_contents($file));
}
- function remove($config) {
+ public function remove($config) {
$file = $this->generateFilePath($config);
if (!file_exists($file)) return false;
return unlink($file);
}
- function flush($config) {
+ public function flush($config) {
if (!$this->_prepareDir($config)) return false;
$dir = $this->generateDirectoryPath($config);
$dh = opendir($dir);
}
}
- function cleanup($config) {
+ public function cleanup($config) {
if (!$this->_prepareDir($config)) return false;
$dir = $this->generateDirectoryPath($config);
$dh = opendir($dir);
/**
* Generates the file path to the serial file corresponding to
* the configuration and definition name
+ * @todo Make protected
*/
- function generateFilePath($config) {
+ public function generateFilePath($config) {
$key = $this->generateKey($config);
return $this->generateDirectoryPath($config) . '/' . $key . '.ser';
}
/**
* Generates the path to the directory contain this cache's serial files
* @note No trailing slash
+ * @todo Make protected
*/
- function generateDirectoryPath($config) {
+ public function generateDirectoryPath($config) {
$base = $this->generateBaseDirectoryPath($config);
return $base . '/' . $this->type;
}
/**
* Generates path to base directory that contains all definition type
* serials
+ * @todo Make protected
*/
- function generateBaseDirectoryPath($config) {
+ public function generateBaseDirectoryPath($config) {
$base = $config->get('Cache', 'SerializerPath');
$base = is_null($base) ? HTMLPURIFIER_PREFIX . '/HTMLPurifier/DefinitionCache/Serializer' : $base;
return $base;
* @param $data Data to write into file
* @return Number of bytes written if success, or false if failure.
*/
- function _write($file, $data) {
- static $file_put_contents;
- if ($file_put_contents === null) {
- $file_put_contents = function_exists('file_put_contents');
- }
- if ($file_put_contents) {
- return file_put_contents($file, $data);
- }
- $fh = fopen($file, 'w');
- if (!$fh) return false;
- $status = fwrite($fh, $data);
- fclose($fh);
- return $status;
+ private function _write($file, $data) {
+ return file_put_contents($file, $data);
}
/**
* Prepares the directory that this type stores the serials in
* @return True if successful
*/
- function _prepareDir($config) {
+ private function _prepareDir($config) {
$directory = $this->generateDirectoryPath($config);
if (!is_dir($directory)) {
$base = $this->generateBaseDirectoryPath($config);
} elseif (!$this->_testPermissions($base)) {
return false;
}
+ $old = umask(0022); // disable group and world writes
mkdir($directory);
+ umask($old);
} elseif (!$this->_testPermissions($directory)) {
return false;
}
* Tests permissions on a directory and throws out friendly
* error messages and attempts to chmod it itself if possible
*/
- function _testPermissions($dir) {
+ private function _testPermissions($dir) {
// early abort, if it is writable, everything is hunky-dory
if (is_writable($dir)) return true;
if (!is_dir($dir)) {
<?php
-require_once 'HTMLPurifier/DefinitionCache.php';
-require_once 'HTMLPurifier/DefinitionCache/Serializer.php';
-
-HTMLPurifier_ConfigSchema::define(
- 'Cache', 'DefinitionImpl', 'Serializer', 'string/null', '
-This directive defines which method to use when caching definitions,
-the complex data-type that makes HTML Purifier tick. Set to null
-to disable caching (not recommended, as you will see a definite
-performance degradation). This directive has been available since 2.0.0.
-');
-
-HTMLPurifier_ConfigSchema::defineAlias(
- 'Core', 'DefinitionCache',
- 'Cache', 'DefinitionImpl'
-);
-
-
/**
* Responsible for creating definition caches.
*/
class HTMLPurifier_DefinitionCacheFactory
{
- var $caches = array('Serializer' => array());
- var $implementations = array();
- var $decorators = array();
+ protected $caches = array('Serializer' => array());
+ protected $implementations = array();
+ protected $decorators = array();
/**
* Initialize default decorators
*/
- function setup() {
+ public function setup() {
$this->addDecorator('Cleanup');
}
/**
* Retrieves an instance of global definition cache factory.
- * @static
*/
- function &instance($prototype = null) {
+ public static function instance($prototype = null) {
static $instance;
if ($prototype !== null) {
$instance = $prototype;
* @param $short Short name of cache object, for reference
* @param $long Full class name of cache object, for construction
*/
- function register($short, $long) {
+ public function register($short, $long) {
$this->implementations[$short] = $long;
}
* @param $name Name of definitions handled by cache
* @param $config Instance of HTMLPurifier_Config
*/
- function &create($type, $config) {
+ public function create($type, $config) {
$method = $config->get('Cache', 'DefinitionImpl');
if ($method === null) {
- $null = new HTMLPurifier_DefinitionCache_Null($type);
- return $null;
+ return new HTMLPurifier_DefinitionCache_Null($type);
}
if (!empty($this->caches[$method][$type])) {
return $this->caches[$method][$type];
}
if (
isset($this->implementations[$method]) &&
- class_exists($class = $this->implementations[$method])
+ class_exists($class = $this->implementations[$method], false)
) {
$cache = new $class($type);
} else {
* Registers a decorator to add to all new cache objects
* @param
*/
- function addDecorator($decorator) {
+ public function addDecorator($decorator) {
if (is_string($decorator)) {
$class = "HTMLPurifier_DefinitionCache_Decorator_$decorator";
$decorator = new $class;
/**
* Full name of doctype
*/
- var $name;
+ public $name;
/**
* List of standard modules (string identifiers or literal objects)
* that this doctype uses
*/
- var $modules = array();
+ public $modules = array();
/**
* List of modules to use for tidying up code
*/
- var $tidyModules = array();
+ public $tidyModules = array();
/**
* Is the language derived from XML (i.e. XHTML)?
*/
- var $xml = true;
+ public $xml = true;
/**
* List of aliases for this doctype
*/
- var $aliases = array();
+ public $aliases = array();
/**
* Public DTD identifier
*/
- var $dtdPublic;
+ public $dtdPublic;
/**
* System DTD identifier
*/
- var $dtdSystem;
+ public $dtdSystem;
- function HTMLPurifier_Doctype($name = null, $xml = true, $modules = array(),
+ public function __construct($name = null, $xml = true, $modules = array(),
$tidyModules = array(), $aliases = array(), $dtd_public = null, $dtd_system = null
) {
$this->name = $name;
$this->dtdPublic = $dtd_public;
$this->dtdSystem = $dtd_system;
}
-
- /**
- * Clones the doctype, use before resolving modes and the like
- */
- function copy() {
- return unserialize(serialize($this));
- }
}
<?php
-require_once 'HTMLPurifier/Doctype.php';
-
-// Legacy directives for doctype specification
-HTMLPurifier_ConfigSchema::define(
- 'HTML', 'Strict', false, 'bool',
- 'Determines whether or not to use Transitional (loose) or Strict rulesets. '.
- 'This directive is deprecated in favor of %HTML.Doctype. '.
- 'This directive has been available since 1.3.0.'
-);
-
-HTMLPurifier_ConfigSchema::define(
- 'HTML', 'XHTML', true, 'bool',
- 'Determines whether or not output is XHTML 1.0 or HTML 4.01 flavor. '.
- 'This directive is deprecated in favor of %HTML.Doctype. '.
- 'This directive was available since 1.1.'
-);
-HTMLPurifier_ConfigSchema::defineAlias('Core', 'XHTML', 'HTML', 'XHTML');
-
class HTMLPurifier_DoctypeRegistry
{
/**
* Hash of doctype names to doctype objects
- * @protected
*/
- var $doctypes;
+ protected $doctypes;
/**
* Lookup table of aliases to real doctype names
- * @protected
*/
- var $aliases;
+ protected $aliases;
/**
* Registers a doctype to the registry
* @param $modules Modules doctype will load
* @param $modules_for_modes Modules doctype will load for certain modes
* @param $aliases Alias names for doctype
- * @return Reference to registered doctype (usable for further editing)
+ * @return Editable registered doctype
*/
- function ®ister($doctype, $xml = true, $modules = array(),
+ public function register($doctype, $xml = true, $modules = array(),
$tidy_modules = array(), $aliases = array(), $dtd_public = null, $dtd_system = null
) {
if (!is_array($modules)) $modules = array($modules);
$doctype, $xml, $modules, $tidy_modules, $aliases, $dtd_public, $dtd_system
);
}
- $this->doctypes[$doctype->name] =& $doctype;
+ $this->doctypes[$doctype->name] = $doctype;
$name = $doctype->name;
// hookup aliases
foreach ($doctype->aliases as $alias) {
* @note This function resolves aliases
* @note When possible, use the more fully-featured make()
* @param $doctype Name of doctype
- * @return Reference to doctype object
+ * @return Editable doctype object
*/
- function &get($doctype) {
+ public function get($doctype) {
if (isset($this->aliases[$doctype])) $doctype = $this->aliases[$doctype];
if (!isset($this->doctypes[$doctype])) {
trigger_error('Doctype ' . htmlspecialchars($doctype) . ' does not exist', E_USER_ERROR);
* Generator whether or not the current document is XML
* based or not).
*/
- function make($config) {
- $original_doctype = $this->get($this->getDoctypeFromConfig($config));
- $doctype = $original_doctype->copy();
- return $doctype;
+ public function make($config) {
+ return clone $this->get($this->getDoctypeFromConfig($config));
}
/**
* Retrieves the doctype from the configuration object
*/
- function getDoctypeFromConfig($config) {
+ public function getDoctypeFromConfig($config) {
// recommended test
$doctype = $config->get('HTML', 'Doctype');
if (!empty($doctype)) return $doctype;
* Does the definition work by itself, or is it created solely
* for the purpose of merging into another definition?
*/
- var $standalone = true;
+ public $standalone = true;
/**
* Associative array of attribute name to HTMLPurifier_AttrDef
* contain string indentifiers in lieu of HTMLPurifier_AttrDef,
* see HTMLPurifier_AttrTypes on how they are expanded during
* HTMLPurifier_HTMLDefinition->setup() processing.
- * @public
*/
- var $attr = array();
+ public $attr = array();
/**
* Indexed list of tag's HTMLPurifier_AttrTransform to be done before validation
- * @public
*/
- var $attr_transform_pre = array();
+ public $attr_transform_pre = array();
/**
* Indexed list of tag's HTMLPurifier_AttrTransform to be done after validation
- * @public
*/
- var $attr_transform_post = array();
-
-
+ public $attr_transform_post = array();
/**
* HTMLPurifier_ChildDef of this tag.
- * @public
*/
- var $child;
+ public $child;
/**
* Abstract string representation of internal ChildDef rules. See
* into an HTMLPurifier_ChildDef.
* @warning This is a temporary variable that is not available after
* being processed by HTMLDefinition
- * @public
*/
- var $content_model;
+ public $content_model;
/**
* Value of $child->type, used to determine which ChildDef to use,
* @warning This must be lowercase
* @warning This is a temporary variable that is not available after
* being processed by HTMLDefinition
- * @public
*/
- var $content_model_type;
+ public $content_model_type;
* is important for chameleon ins and del processing in
* HTMLPurifier_ChildDef_Chameleon. Dynamically set: modules don't
* have to worry about this one.
- * @public
*/
- var $descendants_are_inline = false;
+ public $descendants_are_inline = false;
/**
* List of the names of required attributes this element has. Dynamically
* populated by HTMLPurifier_HTMLDefinition::getElement
- * @public
*/
- var $required_attr = array();
+ public $required_attr = array();
/**
* Lookup table of tags excluded from all descendants of this tag.
* all descendants and not just children. Note that the XHTML
* Modularization Abstract Modules are blithely unaware of such
* distinctions.
- * @public
- */
- var $excludes = array();
-
- /**
- * Is this element safe for untrusted users to use?
*/
- var $safe;
+ public $excludes = array();
/**
* Low-level factory constructor for creating new standalone element defs
- * @static
*/
- function create($safe, $content_model, $content_model_type, $attr) {
+ public static function create($content_model, $content_model_type, $attr) {
$def = new HTMLPurifier_ElementDef();
- $def->safe = (bool) $safe;
$def->content_model = $content_model;
$def->content_model_type = $content_model_type;
$def->attr = $attr;
* Values from the new element def take precedence if a value is
* not mergeable.
*/
- function mergeIn($def) {
+ public function mergeIn($def) {
// later keys takes precedence
foreach($def->attr as $k => $v) {
}
if(!is_null($def->child)) $this->child = $def->child;
if($def->descendants_are_inline) $this->descendants_are_inline = $def->descendants_are_inline;
- if(!is_null($def->safe)) $this->safe = $def->safe;
}
* @param $a1 Array by reference that is merged into
* @param $a2 Array that merges into $a1
*/
- function _mergeAssocArray(&$a1, $a2) {
+ private function _mergeAssocArray(&$a1, $a2) {
foreach ($a2 as $k => $v) {
if ($v === false) {
if (isset($a1[$k])) unset($a1[$k]);
}
}
- /**
- * Retrieves a copy of the element definition
- */
- function copy() {
- return unserialize(serialize($this));
- }
-
}
<?php
-HTMLPurifier_ConfigSchema::define(
- 'Core', 'Encoding', 'utf-8', 'istring',
- 'If for some reason you are unable to convert all webpages to UTF-8, '.
- 'you can use this directive as a stop-gap compatibility change to '.
- 'let HTML Purifier deal with non UTF-8 input. This technique has '.
- 'notable deficiencies: absolutely no characters outside of the selected '.
- 'character encoding will be preserved, not even the ones that have '.
- 'been ampersand escaped (this is due to a UTF-8 specific <em>feature</em> '.
- 'that automatically resolves all entities), making it pretty useless '.
- 'for anything except the most I18N-blind applications, although '.
- '%Core.EscapeNonASCIICharacters offers fixes this trouble with '.
- 'another tradeoff. This directive '.
- 'only accepts ISO-8859-1 if iconv is not enabled.'
-);
-
-HTMLPurifier_ConfigSchema::define(
- 'Core', 'EscapeNonASCIICharacters', false, 'bool',
- 'This directive overcomes a deficiency in %Core.Encoding by blindly '.
- 'converting all non-ASCII characters into decimal numeric entities before '.
- 'converting it to its native encoding. This means that even '.
- 'characters that can be expressed in the non-UTF-8 encoding will '.
- 'be entity-ized, which can be a real downer for encodings like Big5. '.
- 'It also assumes that the ASCII repetoire is available, although '.
- 'this is the case for almost all encodings. Anyway, use UTF-8! This '.
- 'directive has been available since 1.4.0.'
-);
-
-if ( !function_exists('iconv') ) {
- // only encodings with native PHP support
- HTMLPurifier_ConfigSchema::defineAllowedValues(
- 'Core', 'Encoding', array(
- 'utf-8',
- 'iso-8859-1'
- )
- );
- HTMLPurifier_ConfigSchema::defineValueAliases(
- 'Core', 'Encoding', array(
- 'iso8859-1' => 'iso-8859-1'
- )
- );
-}
-
-HTMLPurifier_ConfigSchema::define(
- 'Test', 'ForceNoIconv', false, 'bool',
- 'When set to true, HTMLPurifier_Encoder will act as if iconv does not '.
- 'exist and use only pure PHP implementations.'
-);
-
/**
* A UTF-8 specific character encoder that handles cleaning and transforming.
* @note All functions in this class should be static.
/**
* Constructor throws fatal error if you attempt to instantiate class
*/
- function HTMLPurifier_Encoder() {
+ private function __construct() {
trigger_error('Cannot instantiate encoder, call methods statically', E_USER_ERROR);
}
/**
* Error-handler that mutes errors, alternative to shut-up operator.
*/
- function muteErrorHandler() {}
+ private static function muteErrorHandler() {}
/**
* Cleans a UTF-8 string for well-formedness and SGML validity
* It will parse according to UTF-8 and return a valid UTF8 string, with
* non-SGML codepoints excluded.
*
- * @static
* @note Just for reference, the non-SGML code points are 0 to 31 and
* 127 to 159, inclusive. However, we allow code points 9, 10
* and 13, which are the tab, line feed and carriage return
* would need that, and I'm probably not going to implement them.
* Once again, PHP 6 should solve all our problems.
*/
- function cleanUTF8($str, $force_php = false) {
+ public static function cleanUTF8($str, $force_php = false) {
static $non_sgml_chars = array();
if (empty($non_sgml_chars)) {
/**
* Translates a Unicode codepoint into its corresponding UTF-8 character.
- * @static
* @note Based on Feyd's function at
* <http://forums.devnetwork.net/viewtopic.php?p=191404#191404>,
* which is in public domain.
// | 00000000 | 00010000 | 11111111 | 11111111 | Defined upper limit of legal scalar codes
// +----------+----------+----------+----------+
- function unichr($code) {
+ public static function unichr($code) {
if($code > 1114111 or $code < 0 or
($code >= 55296 and $code <= 57343) ) {
// bits are set outside the "valid" range as defined
/**
* Converts a string to UTF-8 based on configuration.
- * @static
*/
- function convertToUTF8($str, $config, &$context) {
+ public static function convertToUTF8($str, $config, $context) {
static $iconv = null;
if ($iconv === null) $iconv = function_exists('iconv');
$encoding = $config->get('Core', 'Encoding');
if ($encoding === 'utf-8') return $str;
if ($iconv && !$config->get('Test', 'ForceNoIconv')) {
- return @iconv($encoding, 'utf-8//IGNORE', $str);
+ set_error_handler(array('HTMLPurifier_Encoder', 'muteErrorHandler'));
+ $str = iconv($encoding, 'utf-8//IGNORE', $str);
+ restore_error_handler();
+ return $str;
} elseif ($encoding === 'iso-8859-1') {
- return @utf8_encode($str);
+ set_error_handler(array('HTMLPurifier_Encoder', 'muteErrorHandler'));
+ $str = utf8_encode($str);
+ restore_error_handler();
+ return $str;
}
trigger_error('Encoding not supported', E_USER_ERROR);
}
/**
* Converts a string from UTF-8 based on configuration.
- * @static
* @note Currently, this is a lossy conversion, with unexpressable
* characters being omitted.
*/
- function convertFromUTF8($str, $config, &$context) {
+ public static function convertFromUTF8($str, $config, $context) {
static $iconv = null;
if ($iconv === null) $iconv = function_exists('iconv');
$encoding = $config->get('Core', 'Encoding');
$str = HTMLPurifier_Encoder::convertToASCIIDumbLossless($str);
}
if ($iconv && !$config->get('Test', 'ForceNoIconv')) {
- return @iconv('utf-8', $encoding . '//IGNORE', $str);
+ set_error_handler(array('HTMLPurifier_Encoder', 'muteErrorHandler'));
+ $str = iconv('utf-8', $encoding . '//IGNORE', $str);
+ restore_error_handler();
+ return $str;
} elseif ($encoding === 'iso-8859-1') {
- return @utf8_decode($str);
+ set_error_handler(array('HTMLPurifier_Encoder', 'muteErrorHandler'));
+ $str = utf8_decode($str);
+ restore_error_handler();
+ return $str;
}
trigger_error('Encoding not supported', E_USER_ERROR);
}
/**
* Lossless (character-wise) conversion of HTML to ASCII
- * @static
* @param $str UTF-8 string to be converted to ASCII
* @returns ASCII encoded string with non-ASCII character entity-ized
* @warning Adapted from MediaWiki, claiming fair use: this is a common
* @note Sort of with cleanUTF8() but it assumes that $str is
* well-formed UTF-8
*/
- function convertToASCIIDumbLossless($str) {
+ public static function convertToASCIIDumbLossless($str) {
$bytesleft = 0;
$result = '';
$working = 0;
/**
* Assoc array of entity name to character represented.
- * @public
*/
- var $table;
+ public $table;
/**
* Sets up the entity lookup table from the serialized file contents.
* using the maintenance script generate_entity_file.php
* @warning This is not in constructor to help enforce the Singleton
*/
- function setup($file = false) {
+ public function setup($file = false) {
if (!$file) {
$file = HTMLPURIFIER_PREFIX . '/HTMLPurifier/EntityLookup/entities.ser';
}
/**
* Retrieves sole instance of the object.
- * @static
* @param Optional prototype of custom lookup table to overload with.
*/
- function instance($prototype = false) {
+ public static function instance($prototype = false) {
// no references, since PHP doesn't copy unless modified
static $instance = null;
if ($prototype) {
<?php
-require_once 'HTMLPurifier/EntityLookup.php';
-require_once 'HTMLPurifier/Encoder.php';
-
// if want to implement error collecting here, we'll need to use some sort
// of global data (probably trigger_error) because it's impossible to pass
// $config or $context to the callback functions.
/**
* Reference to entity lookup table.
- * @protected
*/
- var $_entity_lookup;
+ protected $_entity_lookup;
/**
* Callback regex string for parsing entities.
- * @protected
*/
- var $_substituteEntitiesRegex =
+ protected $_substituteEntitiesRegex =
'/&(?:[#]x([a-fA-F0-9]+)|[#]0*(\d+)|([A-Za-z_:][A-Za-z0-9.\-_:]*));?/';
// 1. hex 2. dec 3. string (XML style)
/**
* Decimal to parsed string conversion table for special entities.
- * @protected
*/
- var $_special_dec2str =
+ protected $_special_dec2str =
array(
34 => '"',
38 => '&',
/**
* Stripped entity names to decimal conversion table for special entities.
- * @protected
*/
- var $_special_ent2dec =
+ protected $_special_ent2dec =
array(
'quot' => 34,
'amp' => 38,
* running this whenever you have parsed character is t3h 5uck, we run
* it before everything else.
*
- * @protected
* @param $string String to have non-special entities parsed.
* @returns Parsed string.
*/
- function substituteNonSpecialEntities($string) {
+ public function substituteNonSpecialEntities($string) {
// it will try to detect missing semicolons, but don't rely on it
return preg_replace_callback(
$this->_substituteEntitiesRegex,
/**
* Callback function for substituteNonSpecialEntities() that does the work.
*
- * @warning Though this is public in order to let the callback happen,
- * calling it directly is not recommended.
* @param $matches PCRE matches array, with 0 the entire match, and
* either index 1, 2 or 3 set with a hex value, dec value,
* or string (respectively).
* @returns Replacement string.
*/
- function nonSpecialEntityCallback($matches) {
+ protected function nonSpecialEntityCallback($matches) {
// replaces all but big five
$entity = $matches[0];
$is_num = (@$matches[0][1] === '#');
* @notice We try to avoid calling this function because otherwise, it
* would have to be called a lot (for every parsed section).
*
- * @protected
* @param $string String to have non-special entities parsed.
* @returns Parsed string.
*/
- function substituteSpecialEntities($string) {
+ public function substituteSpecialEntities($string) {
return preg_replace_callback(
$this->_substituteEntitiesRegex,
array($this, 'specialEntityCallback'),
*
* This callback has same syntax as nonSpecialEntityCallback().
*
- * @warning Though this is public in order to let the callback happen,
- * calling it directly is not recommended.
* @param $matches PCRE-style matches array, with 0 the entire match, and
* either index 1, 2 or 3 set with a hex value, dec value,
* or string (respectively).
* @returns Replacement string.
*/
- function specialEntityCallback($matches) {
+ protected function specialEntityCallback($matches) {
$entity = $matches[0];
$is_num = (@$matches[0][1] === '#');
if ($is_num) {
+++ /dev/null
-<?php
-
-/**
- * Return object from functions that signifies error when null doesn't cut it
- */
-class HTMLPurifier_Error {}
-
<?php
-require_once 'HTMLPurifier/Generator.php';
-
/**
* Error collection class that enables HTML Purifier to report HTML
* problems back to the user
class HTMLPurifier_ErrorCollector
{
- var $errors = array();
- var $locale;
- var $generator;
- var $context;
+ protected $errors = array();
+ protected $locale;
+ protected $generator;
+ protected $context;
- function HTMLPurifier_ErrorCollector(&$context) {
- $this->locale =& $context->get('Locale');
+ public function __construct($context) {
+ $this->locale =& $context->get('Locale');
$this->generator =& $context->get('Generator');
- $this->context =& $context;
+ $this->context = $context;
}
/**
* @param $severity int Error severity, PHP error style (don't use E_USER_)
* @param $msg string Error message text
*/
- function send($severity, $msg) {
+ public function send($severity, $msg) {
$args = array();
if (func_num_args() > 2) {
* @param List of arrays in format of array(Error message text,
* token that caused error, tokens surrounding token)
*/
- function getRaw() {
+ public function getRaw() {
return $this->errors;
}
* Default HTML formatting implementation for error messages
* @param $config Configuration array, vital for HTML output nature
*/
- function getHTMLFormatted($config) {
+ public function getHTMLFormatted($config) {
$ret = array();
$errors = $this->errors;
--- /dev/null
+<?php
+
+/**
+ * Global exception class for HTML Purifier; any exceptions we throw
+ * are from here.
+ */
+class HTMLPurifier_Exception extends Exception
+{
+
+}
+
* named 1, 2 and 3, the order of execution should go 1->preFilter,
* 2->preFilter, 3->preFilter, purify, 3->postFilter, 2->postFilter,
* 1->postFilter.
+ *
+ * @note Methods are not declared abstract as it is perfectly legitimate
+ * for an implementation not to want anything to happen on a step
*/
class HTMLPurifier_Filter
/**
* Name of the filter for identification purposes
*/
- var $name;
+ public $name;
/**
* Pre-processor function, handles HTML before HTML Purifier
*/
- function preFilter($html, $config, &$context) {}
+ public function preFilter($html, $config, $context) {
+ return $html;
+ }
/**
* Post-processor function, handles HTML after HTML Purifier
*/
- function postFilter($html, $config, &$context) {}
+ public function postFilter($html, $config, $context) {
+ return $html;
+ }
}
--- /dev/null
+<?php
+
+/**
+ * This filter extracts <style> blocks from input HTML, cleans them up
+ * using CSSTidy, and then places them in $purifier->context->get('StyleBlocks')
+ * so they can be used elsewhere in the document.
+ *
+ * @note
+ * See tests/HTMLPurifier/Filter/ExtractStyleBlocksTest.php for
+ * sample usage.
+ *
+ * @note
+ * This filter can also be used on stylesheets not included in the
+ * document--something purists would probably prefer. Just directly
+ * call HTMLPurifier_Filter_ExtractStyleBlocks->cleanCSS()
+ */
+class HTMLPurifier_Filter_ExtractStyleBlocks extends HTMLPurifier_Filter
+{
+
+ public $name = 'ExtractStyleBlocks';
+ private $_styleMatches = array();
+ private $_tidy;
+
+ public function __construct() {
+ $this->_tidy = new csstidy();
+ }
+
+ /**
+ * Save the contents of CSS blocks to style matches
+ * @param $matches preg_replace style $matches array
+ */
+ protected function styleCallback($matches) {
+ $this->_styleMatches[] = $matches[1];
+ }
+
+ /**
+ * Removes inline <style> tags from HTML, saves them for later use
+ * @todo Extend to indicate non-text/css style blocks
+ */
+ public function preFilter($html, $config, $context) {
+ $tidy = $config->get('FilterParam', 'ExtractStyleBlocksTidyImpl');
+ if ($tidy !== null) $this->_tidy = $tidy;
+ $html = preg_replace_callback('#<style(?:\s.*)?>(.+)</style>#isU', array($this, 'styleCallback'), $html);
+ $style_blocks = $this->_styleMatches;
+ $this->_styleMatches = array(); // reset
+ $context->register('StyleBlocks', $style_blocks); // $context must not be reused
+ if ($this->_tidy) {
+ foreach ($style_blocks as &$style) {
+ $style = $this->cleanCSS($style, $config, $context);
+ }
+ }
+ return $html;
+ }
+
+ /**
+ * Takes CSS (the stuff found in <style>) and cleans it.
+ * @warning Requires CSSTidy <http://csstidy.sourceforge.net/>
+ * @param $css CSS styling to clean
+ * @param $config Instance of HTMLPurifier_Config
+ * @param $context Instance of HTMLPurifier_Context
+ * @return Cleaned CSS
+ */
+ public function cleanCSS($css, $config, $context) {
+ // prepare scope
+ $scope = $config->get('FilterParam', 'ExtractStyleBlocksScope');
+ if ($scope !== null) {
+ $scopes = array_map('trim', explode(',', $scope));
+ } else {
+ $scopes = array();
+ }
+ // remove comments from CSS
+ $css = trim($css);
+ if (strncmp('<!--', $css, 4) === 0) {
+ $css = substr($css, 4);
+ }
+ if (strlen($css) > 3 && substr($css, -3) == '-->') {
+ $css = substr($css, 0, -3);
+ }
+ $css = trim($css);
+ $this->_tidy->parse($css);
+ $css_definition = $config->getDefinition('CSS');
+ foreach ($this->_tidy->css as $k => $decls) {
+ // $decls are all CSS declarations inside an @ selector
+ $new_decls = array();
+ foreach ($decls as $selector => $style) {
+ $selector = trim($selector);
+ if ($selector === '') continue; // should not happen
+ if ($selector[0] === '+') {
+ if ($selector !== '' && $selector[0] === '+') continue;
+ }
+ if (!empty($scopes)) {
+ $new_selector = array(); // because multiple ones are possible
+ $selectors = array_map('trim', explode(',', $selector));
+ foreach ($scopes as $s1) {
+ foreach ($selectors as $s2) {
+ $new_selector[] = "$s1 $s2";
+ }
+ }
+ $selector = implode(', ', $new_selector); // now it's a string
+ }
+ foreach ($style as $name => $value) {
+ if (!isset($css_definition->info[$name])) {
+ unset($style[$name]);
+ continue;
+ }
+ $def = $css_definition->info[$name];
+ $ret = $def->validate($value, $config, $context);
+ if ($ret === false) unset($style[$name]);
+ else $style[$name] = $ret;
+ }
+ $new_decls[$selector] = $style;
+ }
+ $this->_tidy->css[$k] = $new_decls;
+ }
+ // remove stuff that shouldn't be used, could be reenabled
+ // after security risks are analyzed
+ $this->_tidy->import = array();
+ $this->_tidy->charset = null;
+ $this->_tidy->namespace = null;
+ $css = $this->_tidy->print->plain();
+ // we are going to escape any special characters <>& to ensure
+ // that no funny business occurs (i.e. </style> in a font-family prop).
+ if ($config->get('FilterParam', 'ExtractStyleBlocksEscaping')) {
+ $css = str_replace(
+ array('<', '>', '&'),
+ array('\3C ', '\3E ', '\26 '),
+ $css
+ );
+ }
+ return $css;
+ }
+
+}
+
<?php
-require_once 'HTMLPurifier/Filter.php';
-
class HTMLPurifier_Filter_YouTube extends HTMLPurifier_Filter
{
- var $name = 'YouTube preservation';
+ public $name = 'YouTube';
- function preFilter($html, $config, &$context) {
+ public function preFilter($html, $config, $context) {
$pre_regex = '#<object[^>]+>.+?'.
'http://www.youtube.com/v/([A-Za-z0-9\-_]+).+?</object>#s';
$pre_replace = '<span class="youtube-embed">\1</span>';
return preg_replace($pre_regex, $pre_replace, $html);
}
- function postFilter($html, $config, &$context) {
+ public function postFilter($html, $config, $context) {
$post_regex = '#<span class="youtube-embed">([A-Za-z0-9\-_]+)</span>#';
$post_replace = '<object width="425" height="350" '.
'data="http://www.youtube.com/v/\1">'.
<?php
-HTMLPurifier_ConfigSchema::define(
- 'Output', 'CommentScriptContents', true, 'bool',
- 'Determines whether or not HTML Purifier should attempt to fix up '.
- 'the contents of script tags for legacy browsers with comments. This '.
- 'directive was available since 2.0.0.'
-);
-HTMLPurifier_ConfigSchema::defineAlias('Core', 'CommentScriptContents', 'Output', 'CommentScriptContents');
-
-// extension constraints could be factored into ConfigSchema
-HTMLPurifier_ConfigSchema::define(
- 'Output', 'TidyFormat', false, 'bool', <<<HTML
-<p>
- Determines whether or not to run Tidy on the final output for pretty
- formatting reasons, such as indentation and wrap.
-</p>
-<p>
- This can greatly improve readability for editors who are hand-editing
- the HTML, but is by no means necessary as HTML Purifier has already
- fixed all major errors the HTML may have had. Tidy is a non-default
- extension, and this directive will silently fail if Tidy is not
- available.
-</p>
-<p>
- If you are looking to make the overall look of your page's source
- better, I recommend running Tidy on the entire page rather than just
- user-content (after all, the indentation relative to the containing
- blocks will be incorrect).
-</p>
-<p>
- This directive was available since 1.1.1.
-</p>
-HTML
-);
-HTMLPurifier_ConfigSchema::defineAlias('Core', 'TidyFormat', 'Output', 'TidyFormat');
-
-HTMLPurifier_ConfigSchema::define('Output', 'Newline', null, 'string/null', '
-<p>
- Newline string to format final output with. If left null, HTML Purifier
- will auto-detect the default newline type of the system and use that;
- you can manually override it here. Remember, \r\n is Windows, \r
- is Mac, and \n is Unix. This directive was available since 2.0.1.
-</p>
-');
-
/**
* Generates HTML from tokens.
* @todo Refactor interface so that configuration/context is determined
- * upon instantiation, no need for messy generateFromTokens() calls
+ * upon instantiation, no need for messy generateFromTokens() calls
+ * @todo Make some of the more internal functions protected, and have
+ * unit tests work around that
*/
class HTMLPurifier_Generator
{
/**
- * Bool cache of %HTML.XHTML
- * @private
+ * Whether or not generator should produce XML output
+ */
+ private $_xhtml = true;
+
+ /**
+ * :HACK: Whether or not generator should comment the insides of <script> tags
+ */
+ private $_scriptFix = false;
+
+ /**
+ * Cache of HTMLDefinition during HTML output to determine whether or
+ * not attributes should be minimized.
*/
- var $_xhtml = true;
+ private $_def;
/**
- * Bool cache of %Output.CommentScriptContents
- * @private
+ * Configuration for the generator
*/
- var $_scriptFix = false;
+ protected $config;
/**
- * Cache of HTMLDefinition
- * @private
+ * @param $config Instance of HTMLPurifier_Config
+ * @param $context Instance of HTMLPurifier_Context
*/
- var $_def;
+ public function __construct($config = null, $context = null) {
+ if (!$config) $config = HTMLPurifier_Config::createDefault();
+ $this->config = $config;
+ $this->_scriptFix = $config->get('Output', 'CommentScriptContents');
+ $this->_def = $config->getHTMLDefinition();
+ $this->_xhtml = $this->_def->doctype->xml;
+ }
/**
* Generates HTML from an array of tokens.
* @param $config HTMLPurifier_Config object
* @return Generated HTML
*/
- function generateFromTokens($tokens, $config, &$context) {
- $html = '';
- if (!$config) $config = HTMLPurifier_Config::createDefault();
- $this->_scriptFix = $config->get('Output', 'CommentScriptContents');
-
- $this->_def = $config->getHTMLDefinition();
- $this->_xhtml = $this->_def->doctype->xml;
-
+ public function generateFromTokens($tokens) {
if (!$tokens) return '';
+
+ // Basic algorithm
+ $html = '';
for ($i = 0, $size = count($tokens); $i < $size; $i++) {
if ($this->_scriptFix && $tokens[$i]->name === 'script'
- && $i + 2 < $size && $tokens[$i+2]->type == 'end') {
+ && $i + 2 < $size && $tokens[$i+2] instanceof HTMLPurifier_Token_End) {
// script special case
// the contents of the script block must be ONE token
- // for this to work
+ // for this to work.
$html .= $this->generateFromToken($tokens[$i++]);
$html .= $this->generateScriptFromToken($tokens[$i++]);
- // We're not going to do this: it wouldn't be valid anyway
- //while ($tokens[$i]->name != 'script') {
- // $html .= $this->generateScriptFromToken($tokens[$i++]);
- //}
}
$html .= $this->generateFromToken($tokens[$i]);
}
- if ($config->get('Output', 'TidyFormat') && extension_loaded('tidy')) {
-
- $tidy_options = array(
+
+ // Tidy cleanup
+ if (extension_loaded('tidy') && $this->config->get('Output', 'TidyFormat')) {
+ $tidy = new Tidy;
+ $tidy->parseString($html, array(
'indent'=> true,
'output-xhtml' => $this->_xhtml,
'show-body-only' => true,
'indent-spaces' => 2,
'wrap' => 68,
- );
- if (version_compare(PHP_VERSION, '5', '<')) {
- tidy_set_encoding('utf8');
- foreach ($tidy_options as $key => $value) {
- tidy_setopt($key, $value);
- }
- tidy_parse_string($html);
- tidy_clean_repair();
- $html = tidy_get_output();
- } else {
- $tidy = new Tidy;
- $tidy->parseString($html, $tidy_options, 'utf8');
- $tidy->cleanRepair();
- $html = (string) $tidy;
- }
+ ), 'utf8');
+ $tidy->cleanRepair();
+ $html = (string) $tidy; // explicit cast necessary
}
- // normalize newlines to system
- $nl = $config->get('Output', 'Newline');
+
+ // Normalize newlines to system defined value
+ $nl = $this->config->get('Output', 'Newline');
if ($nl === null) $nl = PHP_EOL;
- $html = str_replace("\n", $nl, $html);
+ if ($nl !== "\n") $html = str_replace("\n", $nl, $html);
return $html;
}
* @param $token HTMLPurifier_Token object.
* @return Generated HTML
*/
- function generateFromToken($token) {
- if (!isset($token->type)) return '';
- if ($token->type == 'start') {
+ public function generateFromToken($token) {
+ if (!$token instanceof HTMLPurifier_Token) {
+ trigger_error('Cannot generate HTML from non-HTMLPurifier_Token object', E_USER_WARNING);
+ return '';
+
+ } elseif ($token instanceof HTMLPurifier_Token_Start) {
$attr = $this->generateAttributes($token->attr, $token->name);
return '<' . $token->name . ($attr ? ' ' : '') . $attr . '>';
- } elseif ($token->type == 'end') {
+ } elseif ($token instanceof HTMLPurifier_Token_End) {
return '</' . $token->name . '>';
- } elseif ($token->type == 'empty') {
+ } elseif ($token instanceof HTMLPurifier_Token_Empty) {
$attr = $this->generateAttributes($token->attr, $token->name);
return '<' . $token->name . ($attr ? ' ' : '') . $attr .
- ( $this->_xhtml ? ' /': '' )
+ ( $this->_xhtml ? ' /': '' ) // <br /> v. <br>
. '>';
- } elseif ($token->type == 'text') {
- return $this->escape($token->data);
+ } elseif ($token instanceof HTMLPurifier_Token_Text) {
+ return $this->escape($token->data, ENT_NOQUOTES);
+ } elseif ($token instanceof HTMLPurifier_Token_Comment) {
+ return '<!--' . $token->data . '-->';
} else {
return '';
* @warning This runs into problems if there's already a literal
* --> somewhere inside the script contents.
*/
- function generateScriptFromToken($token) {
- if ($token->type != 'text') return $this->generateFromToken($token);
- // return '<!--' . "\n" . trim($token->data) . "\n" . '// -->';
- // more advanced version:
- // thanks <http://lachy.id.au/log/2005/05/script-comments>
+ public function generateScriptFromToken($token) {
+ if (!$token instanceof HTMLPurifier_Token_Text) return $this->generateFromToken($token);
+ // Thanks <http://lachy.id.au/log/2005/05/script-comments>
$data = preg_replace('#//\s*$#', '', $token->data);
return '<!--//--><![CDATA[//><!--' . "\n" . trim($data) . "\n" . '//--><!]]>';
}
/**
* Generates attribute declarations from attribute array.
+ * @note This does not include the leading or trailing space.
* @param $assoc_array_of_attributes Attribute array
+ * @param $element Name of element attributes are for, used to check
+ * attribute minimization.
* @return Generate HTML fragment for insertion.
*/
- function generateAttributes($assoc_array_of_attributes, $element) {
+ public function generateAttributes($assoc_array_of_attributes, $element = false) {
$html = '';
foreach ($assoc_array_of_attributes as $key => $value) {
if (!$this->_xhtml) {
- // remove namespaced attributes
+ // Remove namespaced attributes
if (strpos($key, ':') !== false) continue;
- if (!empty($this->_def->info[$element]->attr[$key]->minimized)) {
+ // Check if we should minimize the attribute: val="val" -> val
+ if ($element && !empty($this->_def->info[$element]->attr[$key]->minimized)) {
$html .= $key . ' ';
continue;
}
/**
* Escapes raw text data.
+ * @todo This really ought to be protected, but until we have a facility
+ * for properly generating HTML here w/o using tokens, it stays
+ * public.
* @param $string String data to escape for HTML.
+ * @param $quote Quoting style, like htmlspecialchars. ENT_NOQUOTES is
+ * permissible for non-attribute output.
* @return String escaped data.
*/
- function escape($string) {
- return htmlspecialchars($string, ENT_COMPAT, 'UTF-8');
+ public function escape($string, $quote = ENT_COMPAT) {
+ return htmlspecialchars($string, $quote, 'UTF-8');
}
}
<?php
-require_once 'HTMLPurifier/Definition.php';
-require_once 'HTMLPurifier/HTMLModuleManager.php';
-
-// this definition and its modules MUST NOT define configuration directives
-// outside of the HTML or Attr namespaces
-
-HTMLPurifier_ConfigSchema::define(
- 'HTML', 'DefinitionID', null, 'string/null', '
-<p>
- Unique identifier for a custom-built HTML definition. If you edit
- the raw version of the HTMLDefinition, introducing changes that the
- configuration object does not reflect, you must specify this variable.
- If you change your custom edits, you should change this directive, or
- clear your cache. Example:
-</p>
-<pre>
-$config = HTMLPurifier_Config::createDefault();
-$config->set(\'HTML\', \'DefinitionID\', \'1\');
-$def = $config->getHTMLDefinition();
-$def->addAttribute(\'a\', \'tabindex\', \'Number\');
-</pre>
-<p>
- In the above example, the configuration is still at the defaults, but
- using the advanced API, an extra attribute has been added. The
- configuration object normally has no way of knowing that this change
- has taken place, so it needs an extra directive: %HTML.DefinitionID.
- If someone else attempts to use the default configuration, these two
- pieces of code will not clobber each other in the cache, since one has
- an extra directive attached to it.
-</p>
-<p>
- This directive has been available since 2.0.0, and in that version or
- later you <em>must</em> specify a value to this directive to use the
- advanced API features.
-</p>
-');
-
-HTMLPurifier_ConfigSchema::define(
- 'HTML', 'DefinitionRev', 1, 'int', '
-<p>
- Revision identifier for your custom definition specified in
- %HTML.DefinitionID. This serves the same purpose: uniquely identifying
- your custom definition, but this one does so in a chronological
- context: revision 3 is more up-to-date then revision 2. Thus, when
- this gets incremented, the cache handling is smart enough to clean
- up any older revisions of your definition as well as flush the
- cache. This directive has been available since 2.0.0.
-</p>
-');
-
-HTMLPurifier_ConfigSchema::define(
- 'HTML', 'BlockWrapper', 'p', 'string', '
-<p>
- String name of element to wrap inline elements that are inside a block
- context. This only occurs in the children of blockquote in strict mode.
-</p>
-<p>
- Example: by default value,
- <code><blockquote>Foo</blockquote></code> would become
- <code><blockquote><p>Foo</p></blockquote></code>.
- The <code><p></code> tags can be replaced with whatever you desire,
- as long as it is a block level element. This directive has been available
- since 1.3.0.
-</p>
-');
-
-HTMLPurifier_ConfigSchema::define(
- 'HTML', 'Parent', 'div', 'string', '
-<p>
- String name of element that HTML fragment passed to library will be
- inserted in. An interesting variation would be using span as the
- parent element, meaning that only inline tags would be allowed.
- This directive has been available since 1.3.0.
-</p>
-');
-
-HTMLPurifier_ConfigSchema::define(
- 'HTML', 'AllowedElements', null, 'lookup/null', '
-<p>
- If HTML Purifier\'s tag set is unsatisfactory for your needs, you
- can overload it with your own list of tags to allow. Note that this
- method is subtractive: it does its job by taking away from HTML Purifier
- usual feature set, so you cannot add a tag that HTML Purifier never
- supported in the first place (like embed, form or head). If you
- change this, you probably also want to change %HTML.AllowedAttributes.
-</p>
-<p>
- <strong>Warning:</strong> If another directive conflicts with the
- elements here, <em>that</em> directive will win and override.
- This directive has been available since 1.3.0.
-</p>
-');
-
-HTMLPurifier_ConfigSchema::define(
- 'HTML', 'AllowedAttributes', null, 'lookup/null', '
-<p>
- If HTML Purifier\'s attribute set is unsatisfactory, overload it!
- The syntax is "tag.attr" or "*.attr" for the global attributes
- (style, id, class, dir, lang, xml:lang).
-</p>
-<p>
- <strong>Warning:</strong> If another directive conflicts with the
- elements here, <em>that</em> directive will win and override. For
- example, %HTML.EnableAttrID will take precedence over *.id in this
- directive. You must set that directive to true before you can use
- IDs at all. This directive has been available since 1.3.0.
-</p>
-');
-
-HTMLPurifier_ConfigSchema::define(
- 'HTML', 'Allowed', null, 'itext/null', '
-<p>
- This is a convenience directive that rolls the functionality of
- %HTML.AllowedElements and %HTML.AllowedAttributes into one directive.
- Specify elements and attributes that are allowed using:
- <code>element1[attr1|attr2],element2...</code>. You can also use
- newlines instead of commas to separate elements.
-</p>
-<p>
- <strong>Warning</strong>:
- All of the constraints on the component directives are still enforced.
- The syntax is a <em>subset</em> of TinyMCE\'s <code>valid_elements</code>
- whitelist: directly copy-pasting it here will probably result in
- broken whitelists. If %HTML.AllowedElements or %HTML.AllowedAttributes
- are set, this directive has no effect.
- This directive has been available since 2.0.0.
-</p>
-');
-
/**
* Definition of the purified HTML that describes allowed children,
* attributes, and many other things.
*
* @note This class is inspected by Printer_HTMLDefinition; please
* update that class if things here change.
+ *
+ * @warning Directives that change this object's structure must be in
+ * the HTML or Attr namespace!
*/
class HTMLPurifier_HTMLDefinition extends HTMLPurifier_Definition
{
/**
* Associative array of element names to HTMLPurifier_ElementDef
- * @public
*/
- var $info = array();
+ public $info = array();
/**
* Associative array of global attribute name to attribute definition.
- * @public
*/
- var $info_global_attr = array();
+ public $info_global_attr = array();
/**
* String name of parent element HTML will be going into.
- * @public
*/
- var $info_parent = 'div';
+ public $info_parent = 'div';
/**
* Definition for parent element, allows parent element to be a
* tag that's not allowed inside the HTML fragment.
- * @public
*/
- var $info_parent_def;
+ public $info_parent_def;
/**
* String name of element used to wrap inline elements in block context
* @note This is rarely used except for BLOCKQUOTEs in strict mode
- * @public
*/
- var $info_block_wrapper = 'p';
+ public $info_block_wrapper = 'p';
/**
* Associative array of deprecated tag name to HTMLPurifier_TagTransform
- * @public
*/
- var $info_tag_transform = array();
+ public $info_tag_transform = array();
/**
* Indexed list of HTMLPurifier_AttrTransform to be performed before validation.
- * @public
*/
- var $info_attr_transform_pre = array();
+ public $info_attr_transform_pre = array();
/**
* Indexed list of HTMLPurifier_AttrTransform to be performed after validation.
- * @public
*/
- var $info_attr_transform_post = array();
+ public $info_attr_transform_post = array();
/**
* Nested lookup array of content set name (Block, Inline) to
* element name to whether or not it belongs in that content set.
- * @public
*/
- var $info_content_sets = array();
+ public $info_content_sets = array();
/**
* Doctype object
*/
- var $doctype;
+ public $doctype;
* @param $def Attribute definition, can be string or object, see
* HTMLPurifier_AttrTypes for details
*/
- function addAttribute($element_name, $attr_name, $def) {
- $module =& $this->getAnonymousModule();
+ public function addAttribute($element_name, $attr_name, $def) {
+ $module = $this->getAnonymousModule();
if (!isset($module->info[$element_name])) {
- $element =& $module->addBlankElement($element_name);
+ $element = $module->addBlankElement($element_name);
} else {
- $element =& $module->info[$element_name];
+ $element = $module->info[$element_name];
}
$element->attr[$attr_name] = $def;
}
* @note See HTMLPurifier_HTMLModule::addElement for detailed
* parameter and return value descriptions.
*/
- function &addElement($element_name, $type, $contents, $attr_collections, $attributes) {
- $module =& $this->getAnonymousModule();
+ public function addElement($element_name, $type, $contents, $attr_collections, $attributes) {
+ $module = $this->getAnonymousModule();
// assume that if the user is calling this, the element
// is safe. This may not be a good idea
- $element =& $module->addElement($element_name, true, $type, $contents, $attr_collections, $attributes);
+ $element = $module->addElement($element_name, $type, $contents, $attr_collections, $attributes);
return $element;
}
* @note See HTMLPurifier_HTMLModule::addBlankElement for detailed
* parameter and return value descriptions.
*/
- function &addBlankElement($element_name) {
- $module =& $this->getAnonymousModule();
- $element =& $module->addBlankElement($element_name);
+ public function addBlankElement($element_name) {
+ $module = $this->getAnonymousModule();
+ $element = $module->addBlankElement($element_name);
return $element;
}
* bust out advanced features without having to make your own
* module.
*/
- function &getAnonymousModule() {
+ public function getAnonymousModule() {
if (!$this->_anonModule) {
$this->_anonModule = new HTMLPurifier_HTMLModule();
$this->_anonModule->name = 'Anonymous';
return $this->_anonModule;
}
- var $_anonModule;
+ private $_anonModule;
// PUBLIC BUT INTERNAL VARIABLES --------------------------------------
- var $type = 'HTML';
- var $manager; /**< Instance of HTMLPurifier_HTMLModuleManager */
+ public $type = 'HTML';
+ public $manager; /**< Instance of HTMLPurifier_HTMLModuleManager */
/**
* Performs low-cost, preliminary initialization.
*/
- function HTMLPurifier_HTMLDefinition() {
+ public function __construct() {
$this->manager = new HTMLPurifier_HTMLModuleManager();
}
- function doSetup($config) {
+ protected function doSetup($config) {
$this->processModules($config);
$this->setupConfigStuff($config);
unset($this->manager);
/**
* Extract out the information from the manager
*/
- function processModules($config) {
+ protected function processModules($config) {
if ($this->_anonModule) {
// for user specific changes
/**
* Sets up stuff based on config. We need a better way of doing this.
*/
- function setupConfigStuff($config) {
+ protected function setupConfigStuff($config) {
$block_wrapper = $config->get('HTML', 'BlockWrapper');
if (isset($this->info_content_sets['Block'][$block_wrapper])) {
$support = "(for information on implementing this, see the ".
"support forums) ";
- // setup allowed elements
+ // setup allowed elements -----------------------------------------
$allowed_elements = $config->get('HTML', 'AllowedElements');
- $allowed_attributes = $config->get('HTML', 'AllowedAttributes');
+ $allowed_attributes = $config->get('HTML', 'AllowedAttributes'); // retrieve early
if (!is_array($allowed_elements) && !is_array($allowed_attributes)) {
$allowed = $config->get('HTML', 'Allowed');
}
// emit errors
foreach ($allowed_elements as $element => $d) {
- $element = htmlspecialchars($element);
+ $element = htmlspecialchars($element); // PHP doesn't escape errors, be careful!
trigger_error("Element '$element' is not supported $support", E_USER_WARNING);
}
}
+ // setup allowed attributes ---------------------------------------
+
$allowed_attributes_mutable = $allowed_attributes; // by copy!
if (is_array($allowed_attributes)) {
- foreach ($this->info_global_attr as $attr_key => $info) {
- if (!isset($allowed_attributes["*.$attr_key"])) {
- unset($this->info_global_attr[$attr_key]);
- } elseif (isset($allowed_attributes_mutable["*.$attr_key"])) {
- unset($allowed_attributes_mutable["*.$attr_key"]);
+
+ // This actually doesn't do anything, since we went away from
+ // global attributes. It's possible that userland code uses
+ // it, but HTMLModuleManager doesn't!
+ foreach ($this->info_global_attr as $attr => $x) {
+ $keys = array($attr, "*@$attr", "*.$attr");
+ $delete = true;
+ foreach ($keys as $key) {
+ if ($delete && isset($allowed_attributes[$key])) {
+ $delete = false;
+ }
+ if (isset($allowed_attributes_mutable[$key])) {
+ unset($allowed_attributes_mutable[$key]);
+ }
}
+ if ($delete) unset($this->info_global_attr[$attr]);
}
+
foreach ($this->info as $tag => $info) {
- foreach ($info->attr as $attr => $attr_info) {
- if (!isset($allowed_attributes["$tag.$attr"]) &&
- !isset($allowed_attributes["*.$attr"])) {
- unset($this->info[$tag]->attr[$attr]);
- } else {
- if (isset($allowed_attributes_mutable["$tag.$attr"])) {
- unset($allowed_attributes_mutable["$tag.$attr"]);
- } elseif (isset($allowed_attributes_mutable["*.$attr"])) {
- unset($allowed_attributes_mutable["*.$attr"]);
+ foreach ($info->attr as $attr => $x) {
+ $keys = array("$tag@$attr", $attr, "*@$attr", "$tag.$attr", "*.$attr");
+ $delete = true;
+ foreach ($keys as $key) {
+ if ($delete && isset($allowed_attributes[$key])) {
+ $delete = false;
+ }
+ if (isset($allowed_attributes_mutable[$key])) {
+ unset($allowed_attributes_mutable[$key]);
}
}
+ if ($delete) unset($this->info[$tag]->attr[$attr]);
}
}
// emit errors
foreach ($allowed_attributes_mutable as $elattr => $d) {
- list($element, $attribute) = explode('.', $elattr);
- $element = htmlspecialchars($element);
- $attribute = htmlspecialchars($attribute);
- if ($element == '*') {
- trigger_error("Global attribute '$attribute' is not ".
- "supported in any elements $support",
- E_USER_WARNING);
- } else {
- trigger_error("Attribute '$attribute' in element '$element' not supported $support",
- E_USER_WARNING);
+ $bits = preg_split('/[.@]/', $elattr, 2);
+ $c = count($bits);
+ switch ($c) {
+ case 2:
+ if ($bits[0] !== '*') {
+ $element = htmlspecialchars($bits[0]);
+ $attribute = htmlspecialchars($bits[1]);
+ if (!isset($this->info[$element])) {
+ trigger_error("Cannot allow attribute '$attribute' if element '$element' is not allowed/supported $support");
+ } else {
+ trigger_error("Attribute '$attribute' in element '$element' not supported $support",
+ E_USER_WARNING);
+ }
+ break;
+ }
+ // otherwise fall through
+ case 1:
+ $attribute = htmlspecialchars($bits[0]);
+ trigger_error("Global attribute '$attribute' is not ".
+ "supported in any elements $support",
+ E_USER_WARNING);
+ break;
+ }
+ }
+
+ }
+
+ // setup forbidden elements ---------------------------------------
+
+ $forbidden_elements = $config->get('HTML', 'ForbiddenElements');
+ $forbidden_attributes = $config->get('HTML', 'ForbiddenAttributes');
+
+ foreach ($this->info as $tag => $info) {
+ if (isset($forbidden_elements[$tag])) {
+ unset($this->info[$tag]);
+ continue;
+ }
+ foreach ($info->attr as $attr => $x) {
+ if (
+ isset($forbidden_attributes["$tag@$attr"]) ||
+ isset($forbidden_attributes["*@$attr"]) ||
+ isset($forbidden_attributes[$attr])
+ ) {
+ unset($this->info[$tag]->attr[$attr]);
+ continue;
+ } // this segment might get removed eventually
+ elseif (isset($forbidden_attributes["$tag.$attr"])) {
+ // $tag.$attr are not user supplied, so no worries!
+ trigger_error("Error with $tag.$attr: tag.attr syntax not supported for HTML.ForbiddenAttributes; use tag@attr instead", E_USER_WARNING);
}
}
}
+ foreach ($forbidden_attributes as $key => $v) {
+ if (strlen($key) < 2) continue;
+ if ($key[0] != '*') continue;
+ if ($key[1] == '.') {
+ trigger_error("Error with $key: *.attr syntax not supported for HTML.ForbiddenAttributes; use attr instead", E_USER_WARNING);
+ }
+ }
}
* it is different, and you'll probably have to modify your lists
* @param $list String list to parse
* @param array($allowed_elements, $allowed_attributes)
+ * @todo Give this its own class, probably static interface
*/
- function parseTinyMCEAllowedList($list) {
+ public function parseTinyMCEAllowedList($list) {
+
+ $list = str_replace(array(' ', "\t"), '', $list);
$elements = array();
$attributes = array();
* correspond to the variables in HTMLPurifier_HTMLDefinition.
* However, the prefix info carries no special meaning in these
* objects (include it anyway if that's the correspondence though).
+ * @todo Consider making some member functions protected
*/
class HTMLPurifier_HTMLModule
/**
* Short unique string identifier of the module
*/
- var $name;
+ public $name;
/**
* Informally, a list of elements this module changes. Not used in
* any significant way.
- * @protected
*/
- var $elements = array();
+ public $elements = array();
/**
* Associative array of element names to element definitions.
* Some definitions may be incomplete, to be merged in later
* with the full definition.
- * @public
*/
- var $info = array();
+ public $info = array();
/**
* Associative array of content set names to content set additions.
* This is commonly used to, say, add an A element to the Inline
* content set. This corresponds to an internal variable $content_sets
* and NOT info_content_sets member variable of HTMLDefinition.
- * @public
*/
- var $content_sets = array();
+ public $content_sets = array();
/**
* Associative array of attribute collection names to attribute
* the style attribute to the Core. Corresponds to HTMLDefinition's
* attr_collections->info, since the object's data is only info,
* with extra behavior associated with it.
- * @public
*/
- var $attr_collections = array();
+ public $attr_collections = array();
/**
* Associative array of deprecated tag name to HTMLPurifier_TagTransform
- * @public
*/
- var $info_tag_transform = array();
+ public $info_tag_transform = array();
/**
* List of HTMLPurifier_AttrTransform to be performed before validation.
- * @public
*/
- var $info_attr_transform_pre = array();
+ public $info_attr_transform_pre = array();
/**
* List of HTMLPurifier_AttrTransform to be performed after validation.
- * @public
*/
- var $info_attr_transform_post = array();
+ public $info_attr_transform_post = array();
/**
* Boolean flag that indicates whether or not getChildDef is implemented.
* For optimization reasons: may save a call to a function. Be sure
* to set it if you do implement getChildDef(), otherwise it will have
* no effect!
- * @public
*/
- var $defines_child_def = false;
+ public $defines_child_def = false;
+
+ /**
+ * Boolean flag whether or not this module is safe. If it is not safe, all
+ * of its members are unsafe. Modules are safe by default (this might be
+ * slightly dangerous, but it doesn't make much sense to force HTML Purifier,
+ * which is based off of safe HTML, to explicitly say, "This is safe," even
+ * though there are modules which are "unsafe")
+ *
+ * @note Previously, safety could be applied at an element level granularity.
+ * We've removed this ability, so in order to add "unsafe" elements
+ * or attributes, a dedicated module with this property set to false
+ * must be used.
+ */
+ public $safe = true;
/**
* Retrieves a proper HTMLPurifier_ChildDef subclass based on
* in HTMLPurifier_HTMLDefinition.
* @param $def HTMLPurifier_ElementDef instance
* @return HTMLPurifier_ChildDef subclass
- * @public
*/
- function getChildDef($def) {return false;}
+ public function getChildDef($def) {return false;}
// -- Convenience -----------------------------------------------------
/**
* Convenience function that sets up a new element
* @param $element Name of element to add
- * @param $safe Is element safe for untrusted users to use?
* @param $type What content set should element be registered to?
* Set as false to skip this step.
* @param $contents Allowed children in form of:
* element?
* @param $attr What unique attributes does the element define?
* @note See ElementDef for in-depth descriptions of these parameters.
- * @return Reference to created element definition object, so you
+ * @return Created element definition object, so you
* can set advanced parameters
- * @protected
*/
- function &addElement($element, $safe, $type, $contents, $attr_includes = array(), $attr = array()) {
+ public function addElement($element, $type, $contents, $attr_includes = array(), $attr = array()) {
$this->elements[] = $element;
// parse content_model
list($content_model_type, $content_model) = $this->parseContents($contents);
if ($type) $this->addElementToContentSet($element, $type);
// create element
$this->info[$element] = HTMLPurifier_ElementDef::create(
- $safe, $content_model, $content_model_type, $attr
+ $content_model, $content_model_type, $attr
);
// literal object $contents means direct child manipulation
if (!is_string($contents)) $this->info[$element]->child = $contents;
* Convenience function that creates a totally blank, non-standalone
* element.
* @param $element Name of element to create
- * @return Reference to created element
+ * @return Created element
*/
- function &addBlankElement($element) {
+ public function addBlankElement($element) {
if (!isset($this->info[$element])) {
$this->elements[] = $element;
$this->info[$element] = new HTMLPurifier_ElementDef();
* @param Element to register
* @param Name content set (warning: case sensitive, usually upper-case
* first letter)
- * @protected
*/
- function addElementToContentSet($element, $type) {
+ public function addElementToContentSet($element, $type) {
if (!isset($this->content_sets[$type])) $this->content_sets[$type] = '';
else $this->content_sets[$type] .= ' | ';
$this->content_sets[$type] .= $element;
* returned, and the callee needs to take the original $contents
* and use it directly.
*/
- function parseContents($contents) {
+ public function parseContents($contents) {
if (!is_string($contents)) return array(null, null); // defer
switch ($contents) {
// check for shorthand content model forms
* @param $attr Reference to attr array to modify
* @param $attr_includes Array of includes / string include to merge in
*/
- function mergeInAttrIncludes(&$attr, $attr_includes) {
+ public function mergeInAttrIncludes(&$attr, $attr_includes) {
if (!is_array($attr_includes)) {
if (empty($attr_includes)) $attr_includes = array();
else $attr_includes = array($attr_includes);
* place of the regular argument
* @return Lookup array equivalent of list
*/
- function makeLookup($list) {
+ public function makeLookup($list) {
if (is_string($list)) $list = func_get_args();
$ret = array();
foreach ($list as $value) {
<?php
-require_once 'HTMLPurifier/HTMLModule.php';
-require_once 'HTMLPurifier/AttrTransform/BdoDir.php';
-
/**
* XHTML 1.1 Bi-directional Text Module, defines elements that
* declare directionality of content. Text Extension Module.
class HTMLPurifier_HTMLModule_Bdo extends HTMLPurifier_HTMLModule
{
- var $name = 'Bdo';
- var $attr_collections = array(
+ public $name = 'Bdo';
+ public $attr_collections = array(
'I18N' => array('dir' => false)
);
- function HTMLPurifier_HTMLModule_Bdo() {
- $bdo =& $this->addElement(
- 'bdo', true, 'Inline', 'Inline', array('Core', 'Lang'),
+ public function __construct() {
+ $bdo = $this->addElement(
+ 'bdo', 'Inline', 'Inline', array('Core', 'Lang'),
array(
'dir' => 'Enum#ltr,rtl', // required
// The Abstract Module specification has the attribute
<?php
-require_once 'HTMLPurifier/HTMLModule.php';
-
class HTMLPurifier_HTMLModule_CommonAttributes extends HTMLPurifier_HTMLModule
{
- var $name = 'CommonAttributes';
+ public $name = 'CommonAttributes';
- var $attr_collections = array(
+ public $attr_collections = array(
'Core' => array(
0 => array('Style'),
// 'xml:space' => false,
<?php
-require_once 'HTMLPurifier/HTMLModule.php';
-require_once 'HTMLPurifier/ChildDef/Chameleon.php';
-
/**
* XHTML 1.1 Edit Module, defines editing-related elements. Text Extension
* Module.
class HTMLPurifier_HTMLModule_Edit extends HTMLPurifier_HTMLModule
{
- var $name = 'Edit';
+ public $name = 'Edit';
- function HTMLPurifier_HTMLModule_Edit() {
+ public function __construct() {
$contents = 'Chameleon: #PCDATA | Inline ! #PCDATA | Flow';
$attr = array(
'cite' => 'URI',
// 'datetime' => 'Datetime', // not implemented
);
- $this->addElement('del', true, 'Inline', $contents, 'Common', $attr);
- $this->addElement('ins', true, 'Inline', $contents, 'Common', $attr);
+ $this->addElement('del', 'Inline', $contents, 'Common', $attr);
+ $this->addElement('ins', 'Inline', $contents, 'Common', $attr);
}
// HTML 4.01 specifies that ins/del must not contain block
// Inline context ! Block context (exclamation mark is
// separator, see getChildDef for parsing)
- var $defines_child_def = true;
- function getChildDef($def) {
+ public $defines_child_def = true;
+ public function getChildDef($def) {
if ($def->content_model_type != 'chameleon') return false;
$value = explode('!', $def->content_model);
return new HTMLPurifier_ChildDef_Chameleon($value[0], $value[1]);
<?php
-require_once 'HTMLPurifier/HTMLModule.php';
-require_once 'HTMLPurifier/AttrDef/HTML/LinkTypes.php';
-
/**
* XHTML 1.1 Hypertext Module, defines hypertext links. Core Module.
*/
class HTMLPurifier_HTMLModule_Hypertext extends HTMLPurifier_HTMLModule
{
- var $name = 'Hypertext';
+ public $name = 'Hypertext';
- function HTMLPurifier_HTMLModule_Hypertext() {
- $a =& $this->addElement(
- 'a', true, 'Inline', 'Inline', 'Common',
+ public function __construct() {
+ $a = $this->addElement(
+ 'a', 'Inline', 'Inline', 'Common',
array(
// 'accesskey' => 'Character',
// 'charset' => 'Charset',
<?php
-require_once 'HTMLPurifier/HTMLModule.php';
-
-require_once 'HTMLPurifier/AttrDef/URI.php';
-require_once 'HTMLPurifier/AttrTransform/ImgRequired.php';
-
/**
* XHTML 1.1 Image Module provides basic image embedding.
* @note There is specialized code for removing empty images in
class HTMLPurifier_HTMLModule_Image extends HTMLPurifier_HTMLModule
{
- var $name = 'Image';
+ public $name = 'Image';
- function HTMLPurifier_HTMLModule_Image() {
- $img =& $this->addElement(
- 'img', true, 'Inline', 'Empty', 'Common',
+ public function __construct() {
+ $img = $this->addElement(
+ 'img', 'Inline', 'Empty', 'Common',
array(
'alt*' => 'Text',
'height' => 'Length',
<?php
-require_once 'HTMLPurifier/AttrDef/HTML/Bool.php';
-
/**
* XHTML 1.1 Legacy module defines elements that were previously
* deprecated.
class HTMLPurifier_HTMLModule_Legacy extends HTMLPurifier_HTMLModule
{
- // incomplete
-
- var $name = 'Legacy';
+ public $name = 'Legacy';
- function HTMLPurifier_HTMLModule_Legacy() {
+ public function __construct() {
- $this->addElement('basefont', true, 'Inline', 'Empty', false, array(
+ $this->addElement('basefont', 'Inline', 'Empty', false, array(
'color' => 'Color',
'face' => 'Text', // extremely broad, we should
'size' => 'Text', // tighten it
'id' => 'ID'
));
- $this->addElement('center', true, 'Block', 'Flow', 'Common');
- $this->addElement('dir', true, 'Block', 'Required: li', 'Common', array(
+ $this->addElement('center', 'Block', 'Flow', 'Common');
+ $this->addElement('dir', 'Block', 'Required: li', 'Common', array(
'compact' => 'Bool#compact'
));
- $this->addElement('font', true, 'Inline', 'Inline', array('Core', 'I18N'), array(
+ $this->addElement('font', 'Inline', 'Inline', array('Core', 'I18N'), array(
'color' => 'Color',
'face' => 'Text', // extremely broad, we should
'size' => 'Text', // tighten it
));
- $this->addElement('menu', true, 'Block', 'Required: li', 'Common', array(
+ $this->addElement('menu', 'Block', 'Required: li', 'Common', array(
'compact' => 'Bool#compact'
));
- $this->addElement('s', true, 'Inline', 'Inline', 'Common');
- $this->addElement('strike', true, 'Inline', 'Inline', 'Common');
- $this->addElement('u', true, 'Inline', 'Inline', 'Common');
+ $this->addElement('s', 'Inline', 'Inline', 'Common');
+ $this->addElement('strike', 'Inline', 'Inline', 'Common');
+ $this->addElement('u', 'Inline', 'Inline', 'Common');
// setup modifications to old elements
$align = 'Enum#left,right,center,justify';
- $address =& $this->addBlankElement('address');
+ $address = $this->addBlankElement('address');
$address->content_model = 'Inline | #PCDATA | p';
$address->content_model_type = 'optional';
$address->child = false;
- $blockquote =& $this->addBlankElement('blockquote');
+ $blockquote = $this->addBlankElement('blockquote');
$blockquote->content_model = 'Flow | #PCDATA';
$blockquote->content_model_type = 'optional';
$blockquote->child = false;
- $br =& $this->addBlankElement('br');
+ $br = $this->addBlankElement('br');
$br->attr['clear'] = 'Enum#left,all,right,none';
- $caption =& $this->addBlankElement('caption');
+ $caption = $this->addBlankElement('caption');
$caption->attr['align'] = 'Enum#top,bottom,left,right';
- $div =& $this->addBlankElement('div');
+ $div = $this->addBlankElement('div');
$div->attr['align'] = $align;
- $dl =& $this->addBlankElement('dl');
+ $dl = $this->addBlankElement('dl');
$dl->attr['compact'] = 'Bool#compact';
for ($i = 1; $i <= 6; $i++) {
- $h =& $this->addBlankElement("h$i");
+ $h = $this->addBlankElement("h$i");
$h->attr['align'] = $align;
}
- $hr =& $this->addBlankElement('hr');
+ $hr = $this->addBlankElement('hr');
$hr->attr['align'] = $align;
$hr->attr['noshade'] = 'Bool#noshade';
$hr->attr['size'] = 'Pixels';
$hr->attr['width'] = 'Length';
- $img =& $this->addBlankElement('img');
+ $img = $this->addBlankElement('img');
$img->attr['align'] = 'Enum#top,middle,bottom,left,right';
$img->attr['border'] = 'Pixels';
$img->attr['hspace'] = 'Pixels';
// figure out this integer business
- $li =& $this->addBlankElement('li');
+ $li = $this->addBlankElement('li');
$li->attr['value'] = new HTMLPurifier_AttrDef_Integer();
$li->attr['type'] = 'Enum#s:1,i,I,a,A,disc,square,circle';
- $ol =& $this->addBlankElement('ol');
+ $ol = $this->addBlankElement('ol');
$ol->attr['compact'] = 'Bool#compact';
$ol->attr['start'] = new HTMLPurifier_AttrDef_Integer();
$ol->attr['type'] = 'Enum#s:1,i,I,a,A';
- $p =& $this->addBlankElement('p');
+ $p = $this->addBlankElement('p');
$p->attr['align'] = $align;
- $pre =& $this->addBlankElement('pre');
+ $pre = $this->addBlankElement('pre');
$pre->attr['width'] = 'Number';
// script omitted
- $table =& $this->addBlankElement('table');
+ $table = $this->addBlankElement('table');
$table->attr['align'] = 'Enum#left,center,right';
$table->attr['bgcolor'] = 'Color';
- $tr =& $this->addBlankElement('tr');
+ $tr = $this->addBlankElement('tr');
$tr->attr['bgcolor'] = 'Color';
- $th =& $this->addBlankElement('th');
+ $th = $this->addBlankElement('th');
$th->attr['bgcolor'] = 'Color';
$th->attr['height'] = 'Length';
$th->attr['nowrap'] = 'Bool#nowrap';
$th->attr['width'] = 'Length';
- $td =& $this->addBlankElement('td');
+ $td = $this->addBlankElement('td');
$td->attr['bgcolor'] = 'Color';
$td->attr['height'] = 'Length';
$td->attr['nowrap'] = 'Bool#nowrap';
$td->attr['width'] = 'Length';
- $ul =& $this->addBlankElement('ul');
+ $ul = $this->addBlankElement('ul');
$ul->attr['compact'] = 'Bool#compact';
$ul->attr['type'] = 'Enum#square,disc,circle';
<?php
-require_once 'HTMLPurifier/HTMLModule.php';
-
/**
* XHTML 1.1 List Module, defines list-oriented elements. Core Module.
*/
class HTMLPurifier_HTMLModule_List extends HTMLPurifier_HTMLModule
{
- var $name = 'List';
+ public $name = 'List';
// According to the abstract schema, the List content set is a fully formed
// one or more expr, but it invariably occurs in an optional declaration
// we don't have support for such nested expressions without using
// the incredibly inefficient and draconic Custom ChildDef.
- var $content_sets = array('Flow' => 'List');
+ public $content_sets = array('Flow' => 'List');
- function HTMLPurifier_HTMLModule_List() {
- $this->addElement('ol', true, 'List', 'Required: li', 'Common');
- $this->addElement('ul', true, 'List', 'Required: li', 'Common');
- $this->addElement('dl', true, 'List', 'Required: dt | dd', 'Common');
+ public function __construct() {
+ $this->addElement('ol', 'List', 'Required: li', 'Common');
+ $this->addElement('ul', 'List', 'Required: li', 'Common');
+ $this->addElement('dl', 'List', 'Required: dt | dd', 'Common');
- $this->addElement('li', true, false, 'Flow', 'Common');
+ $this->addElement('li', false, 'Flow', 'Common');
- $this->addElement('dd', true, false, 'Flow', 'Common');
- $this->addElement('dt', true, false, 'Inline', 'Common');
+ $this->addElement('dd', false, 'Flow', 'Common');
+ $this->addElement('dt', false, 'Inline', 'Common');
}
}
<?php
-require_once 'HTMLPurifier/HTMLModule.php';
-
class HTMLPurifier_HTMLModule_NonXMLCommonAttributes extends HTMLPurifier_HTMLModule
{
- var $name = 'NonXMLCommonAttributes';
+ public $name = 'NonXMLCommonAttributes';
- var $attr_collections = array(
+ public $attr_collections = array(
'Lang' => array(
'lang' => 'LanguageCode',
)
<?php\r
\r
-require_once 'HTMLPurifier/HTMLModule.php';\r
-\r
/**\r
* XHTML 1.1 Object Module, defines elements for generic object inclusion\r
* @warning Users will commonly use <embed> to cater to legacy browsers: this\r
class HTMLPurifier_HTMLModule_Object extends HTMLPurifier_HTMLModule\r
{\r
\r
- var $name = 'Object';\r
+ public $name = 'Object';\r
+ public $safe = false;\r
\r
- function HTMLPurifier_HTMLModule_Object() {\r
+ public function __construct() {\r
\r
- $this->addElement('object', false, 'Inline', 'Optional: #PCDATA | Flow | param', 'Common', \r
+ $this->addElement('object', 'Inline', 'Optional: #PCDATA | Flow | param', 'Common', \r
array(\r
'archive' => 'URI',\r
'classid' => 'URI',\r
)\r
);\r
\r
- $this->addElement('param', false, false, 'Empty', false,\r
+ $this->addElement('param', false, 'Empty', false,\r
array(\r
'id' => 'ID',\r
'name*' => 'Text',\r
<?php
-require_once 'HTMLPurifier/HTMLModule.php';
-
/**
* XHTML 1.1 Presentation Module, defines simple presentation-related
* markup. Text Extension Module.
class HTMLPurifier_HTMLModule_Presentation extends HTMLPurifier_HTMLModule
{
- var $name = 'Presentation';
+ public $name = 'Presentation';
- function HTMLPurifier_HTMLModule_Presentation() {
- $this->addElement('b', true, 'Inline', 'Inline', 'Common');
- $this->addElement('big', true, 'Inline', 'Inline', 'Common');
- $this->addElement('hr', true, 'Block', 'Empty', 'Common');
- $this->addElement('i', true, 'Inline', 'Inline', 'Common');
- $this->addElement('small', true, 'Inline', 'Inline', 'Common');
- $this->addElement('sub', true, 'Inline', 'Inline', 'Common');
- $this->addElement('sup', true, 'Inline', 'Inline', 'Common');
- $this->addElement('tt', true, 'Inline', 'Inline', 'Common');
+ public function __construct() {
+ $this->addElement('b', 'Inline', 'Inline', 'Common');
+ $this->addElement('big', 'Inline', 'Inline', 'Common');
+ $this->addElement('hr', 'Block', 'Empty', 'Common');
+ $this->addElement('i', 'Inline', 'Inline', 'Common');
+ $this->addElement('small', 'Inline', 'Inline', 'Common');
+ $this->addElement('sub', 'Inline', 'Inline', 'Common');
+ $this->addElement('sup', 'Inline', 'Inline', 'Common');
+ $this->addElement('tt', 'Inline', 'Inline', 'Common');
}
}
--- /dev/null
+<?php
+
+/**
+ * Module defines proprietary tags and attributes in HTML.
+ * @warning If this module is enabled, standards-compliance is off!
+ */
+class HTMLPurifier_HTMLModule_Proprietary extends HTMLPurifier_HTMLModule
+{
+
+ public $name = 'Proprietary';
+
+ public function __construct() {
+
+ $this->addElement('marquee', 'Inline', 'Flow', 'Common',
+ array(
+ 'direction' => 'Enum#left,right,up,down',
+ 'behavior' => 'Enum#alternate',
+ 'width' => 'Length',
+ 'height' => 'Length',
+ 'scrolldelay' => 'Number',
+ 'scrollamount' => 'Number',
+ 'loop' => 'Number',
+ 'bgcolor' => 'Color',
+ 'hspace' => 'Pixels',
+ 'vspace' => 'Pixels',
+ )
+ );
+
+ }
+
+}
+
<?php
-require_once 'HTMLPurifier/HTMLModule.php';
-
/**
* XHTML 1.1 Ruby Annotation Module, defines elements that indicate
* short runs of text alongside base text for annotation or pronounciation.
class HTMLPurifier_HTMLModule_Ruby extends HTMLPurifier_HTMLModule
{
- var $name = 'Ruby';
+ public $name = 'Ruby';
- function HTMLPurifier_HTMLModule_Ruby() {
- $this->addElement('ruby', true, 'Inline',
+ public function __construct() {
+ $this->addElement('ruby', 'Inline',
'Custom: ((rb, (rt | (rp, rt, rp))) | (rbc, rtc, rtc?))',
'Common');
- $this->addElement('rbc', true, false, 'Required: rb', 'Common');
- $this->addElement('rtc', true, false, 'Required: rt', 'Common');
- $rb =& $this->addElement('rb', true, false, 'Inline', 'Common');
+ $this->addElement('rbc', false, 'Required: rb', 'Common');
+ $this->addElement('rtc', false, 'Required: rt', 'Common');
+ $rb = $this->addElement('rb', false, 'Inline', 'Common');
$rb->excludes = array('ruby' => true);
- $rt =& $this->addElement('rt', true, false, 'Inline', 'Common', array('rbspan' => 'Number'));
+ $rt = $this->addElement('rt', false, 'Inline', 'Common', array('rbspan' => 'Number'));
$rt->excludes = array('ruby' => true);
- $this->addElement('rp', true, false, 'Optional: #PCDATA', 'Common');
+ $this->addElement('rp', false, 'Optional: #PCDATA', 'Common');
}
}
*/
-/**
- * Implements required attribute stipulation for <script>
- */
-class HTMLPurifier_AttrTransform_ScriptRequired extends HTMLPurifier_AttrTransform
-{
- function transform($attr, $config, &$context) {
- if (!isset($attr['type'])) {
- $attr['type'] = 'text/javascript';
- }
- return $attr;
- }
-}
-
/**
* XHTML 1.1 Scripting module, defines elements that are used to contain
* information pertaining to executable scripts or the lack of support
*/
class HTMLPurifier_HTMLModule_Scripting extends HTMLPurifier_HTMLModule
{
- var $name = 'Scripting';
- var $elements = array('script', 'noscript');
- var $content_sets = array('Block' => 'script | noscript', 'Inline' => 'script | noscript');
+ public $name = 'Scripting';
+ public $elements = array('script', 'noscript');
+ public $content_sets = array('Block' => 'script | noscript', 'Inline' => 'script | noscript');
+ public $safe = false;
- function HTMLPurifier_HTMLModule_Scripting() {
+ public function __construct() {
// TODO: create custom child-definition for noscript that
// auto-wraps stray #PCDATA in a similar manner to
// blockquote's custom definition (we would use it but
// TODO: convert this to new syntax, main problem is getting
// both content sets working
- foreach ($this->elements as $element) {
- $this->info[$element] = new HTMLPurifier_ElementDef();
- $this->info[$element]->safe = false;
- }
+
+ // In theory, this could be safe, but I don't see any reason to
+ // allow it.
+ $this->info['noscript'] = new HTMLPurifier_ElementDef();
$this->info['noscript']->attr = array( 0 => array('Common') );
$this->info['noscript']->content_model = 'Heading | List | Block';
$this->info['noscript']->content_model_type = 'required';
+
+ $this->info['script'] = new HTMLPurifier_ElementDef();
$this->info['script']->attr = array(
'defer' => new HTMLPurifier_AttrDef_Enum(array('defer')),
'src' => new HTMLPurifier_AttrDef_URI(true),
<?php
-require_once 'HTMLPurifier/HTMLModule.php';
-require_once 'HTMLPurifier/AttrDef/CSS.php';
-
/**
* XHTML 1.1 Edit Module, defines editing-related elements. Text Extension
* Module.
class HTMLPurifier_HTMLModule_StyleAttribute extends HTMLPurifier_HTMLModule
{
- var $name = 'StyleAttribute';
- var $attr_collections = array(
+ public $name = 'StyleAttribute';
+ public $attr_collections = array(
// The inclusion routine differs from the Abstract Modules but
// is in line with the DTD and XML Schemas.
'Style' => array('style' => false), // see constructor
'Core' => array(0 => array('Style'))
);
- function HTMLPurifier_HTMLModule_StyleAttribute() {
+ public function __construct() {
$this->attr_collections['Style']['style'] = new HTMLPurifier_AttrDef_CSS();
}
<?php
-require_once 'HTMLPurifier/HTMLModule.php';
-require_once 'HTMLPurifier/ChildDef/Table.php';
-
/**
* XHTML 1.1 Tables Module, fully defines accessible table elements.
*/
class HTMLPurifier_HTMLModule_Tables extends HTMLPurifier_HTMLModule
{
- var $name = 'Tables';
+ public $name = 'Tables';
- function HTMLPurifier_HTMLModule_Tables() {
+ public function __construct() {
- $this->addElement('caption', true, false, 'Inline', 'Common');
+ $this->addElement('caption', false, 'Inline', 'Common');
- $this->addElement('table', true, 'Block',
+ $this->addElement('table', 'Block',
new HTMLPurifier_ChildDef_Table(), 'Common',
array(
'border' => 'Pixels',
),
$cell_align
);
- $this->addElement('td', true, false, 'Flow', 'Common', $cell_t);
- $this->addElement('th', true, false, 'Flow', 'Common', $cell_t);
+ $this->addElement('td', false, 'Flow', 'Common', $cell_t);
+ $this->addElement('th', false, 'Flow', 'Common', $cell_t);
- $this->addElement('tr', true, false, 'Required: td | th', 'Common', $cell_align);
+ $this->addElement('tr', false, 'Required: td | th', 'Common', $cell_align);
$cell_col = array_merge(
array(
),
$cell_align
);
- $this->addElement('col', true, false, 'Empty', 'Common', $cell_col);
- $this->addElement('colgroup', true, false, 'Optional: col', 'Common', $cell_col);
+ $this->addElement('col', false, 'Empty', 'Common', $cell_col);
+ $this->addElement('colgroup', false, 'Optional: col', 'Common', $cell_col);
- $this->addElement('tbody', true, false, 'Required: tr', 'Common', $cell_align);
- $this->addElement('thead', true, false, 'Required: tr', 'Common', $cell_align);
- $this->addElement('tfoot', true, false, 'Required: tr', 'Common', $cell_align);
+ $this->addElement('tbody', false, 'Required: tr', 'Common', $cell_align);
+ $this->addElement('thead', false, 'Required: tr', 'Common', $cell_align);
+ $this->addElement('tfoot', false, 'Required: tr', 'Common', $cell_align);
}
<?php
-require_once 'HTMLPurifier/AttrDef/HTML/FrameTarget.php';
-
/**
* XHTML 1.1 Target Module, defines target attribute in link elements.
*/
class HTMLPurifier_HTMLModule_Target extends HTMLPurifier_HTMLModule
{
- var $name = 'Target';
+ public $name = 'Target';
- function HTMLPurifier_HTMLModule_Target() {
+ public function __construct() {
$elements = array('a');
foreach ($elements as $name) {
- $e =& $this->addBlankElement($name);
+ $e = $this->addBlankElement($name);
$e->attr = array(
'target' => new HTMLPurifier_AttrDef_HTML_FrameTarget()
);
<?php
-require_once 'HTMLPurifier/HTMLModule.php';
-
/**
* XHTML 1.1 Text Module, defines basic text containers. Core Module.
* @note In the normative XML Schema specification, this module
class HTMLPurifier_HTMLModule_Text extends HTMLPurifier_HTMLModule
{
- var $name = 'Text';
- var $content_sets = array(
+ public $name = 'Text';
+ public $content_sets = array(
'Flow' => 'Heading | Block | Inline'
);
- function HTMLPurifier_HTMLModule_Text() {
+ public function __construct() {
// Inline Phrasal -------------------------------------------------
- $this->addElement('abbr', true, 'Inline', 'Inline', 'Common');
- $this->addElement('acronym', true, 'Inline', 'Inline', 'Common');
- $this->addElement('cite', true, 'Inline', 'Inline', 'Common');
- $this->addElement('code', true, 'Inline', 'Inline', 'Common');
- $this->addElement('dfn', true, 'Inline', 'Inline', 'Common');
- $this->addElement('em', true, 'Inline', 'Inline', 'Common');
- $this->addElement('kbd', true, 'Inline', 'Inline', 'Common');
- $this->addElement('q', true, 'Inline', 'Inline', 'Common', array('cite' => 'URI'));
- $this->addElement('samp', true, 'Inline', 'Inline', 'Common');
- $this->addElement('strong', true, 'Inline', 'Inline', 'Common');
- $this->addElement('var', true, 'Inline', 'Inline', 'Common');
+ $this->addElement('abbr', 'Inline', 'Inline', 'Common');
+ $this->addElement('acronym', 'Inline', 'Inline', 'Common');
+ $this->addElement('cite', 'Inline', 'Inline', 'Common');
+ $this->addElement('code', 'Inline', 'Inline', 'Common');
+ $this->addElement('dfn', 'Inline', 'Inline', 'Common');
+ $this->addElement('em', 'Inline', 'Inline', 'Common');
+ $this->addElement('kbd', 'Inline', 'Inline', 'Common');
+ $this->addElement('q', 'Inline', 'Inline', 'Common', array('cite' => 'URI'));
+ $this->addElement('samp', 'Inline', 'Inline', 'Common');
+ $this->addElement('strong', 'Inline', 'Inline', 'Common');
+ $this->addElement('var', 'Inline', 'Inline', 'Common');
// Inline Structural ----------------------------------------------
- $this->addElement('span', true, 'Inline', 'Inline', 'Common');
- $this->addElement('br', true, 'Inline', 'Empty', 'Core');
+ $this->addElement('span', 'Inline', 'Inline', 'Common');
+ $this->addElement('br', 'Inline', 'Empty', 'Core');
// Moodle specific elements - start
- $this->addElement('nolink', true, 'Inline', 'Flow');
- $this->addElement('tex', true, 'Inline', 'Flow');
- $this->addElement('algebra', true, 'Inline', 'Flow');
- $this->addElement('lang', true, 'Inline', 'Flow', 'I18N');
+ $this->addElement('nolink', 'Inline', 'Flow');
+ $this->addElement('tex', 'Inline', 'Flow');
+ $this->addElement('algebra', 'Inline', 'Flow');
+ $this->addElement('lang', 'Inline', 'Flow', 'I18N');
// Moodle specific elements - end
// Block Phrasal --------------------------------------------------
- $this->addElement('address', true, 'Block', 'Inline', 'Common');
- $this->addElement('blockquote', true, 'Block', 'Optional: Heading | Block | List', 'Common', array('cite' => 'URI') );
- $pre =& $this->addElement('pre', true, 'Block', 'Inline', 'Common');
+ $this->addElement('address', 'Block', 'Inline', 'Common');
+ $this->addElement('blockquote', 'Block', 'Optional: Heading | Block | List', 'Common', array('cite' => 'URI') );
+ $pre = $this->addElement('pre', 'Block', 'Inline', 'Common');
$pre->excludes = $this->makeLookup(
'img', 'big', 'small', 'object', 'applet', 'font', 'basefont' );
- $this->addElement('h1', true, 'Heading', 'Inline', 'Common');
- $this->addElement('h2', true, 'Heading', 'Inline', 'Common');
- $this->addElement('h3', true, 'Heading', 'Inline', 'Common');
- $this->addElement('h4', true, 'Heading', 'Inline', 'Common');
- $this->addElement('h5', true, 'Heading', 'Inline', 'Common');
- $this->addElement('h6', true, 'Heading', 'Inline', 'Common');
+ $this->addElement('h1', 'Heading', 'Inline', 'Common');
+ $this->addElement('h2', 'Heading', 'Inline', 'Common');
+ $this->addElement('h3', 'Heading', 'Inline', 'Common');
+ $this->addElement('h4', 'Heading', 'Inline', 'Common');
+ $this->addElement('h5', 'Heading', 'Inline', 'Common');
+ $this->addElement('h6', 'Heading', 'Inline', 'Common');
// Block Structural -----------------------------------------------
- $this->addElement('p', true, 'Block', 'Inline', 'Common');
- $this->addElement('div', true, 'Block', 'Flow', 'Common');
+ $this->addElement('p', 'Block', 'Inline', 'Common');
+ $this->addElement('div', 'Block', 'Flow', 'Common');
}
<?php
-require_once 'HTMLPurifier/HTMLModule.php';
-
-HTMLPurifier_ConfigSchema::define(
- 'HTML', 'TidyLevel', 'medium', 'string', '
-<p>General level of cleanliness the Tidy module should enforce.
-There are four allowed values:</p>
-<dl>
- <dt>none</dt>
- <dd>No extra tidying should be done</dd>
- <dt>light</dt>
- <dd>Only fix elements that would be discarded otherwise due to
- lack of support in doctype</dd>
- <dt>medium</dt>
- <dd>Enforce best practices</dd>
- <dt>heavy</dt>
- <dd>Transform all deprecated elements and attributes to standards
- compliant equivalents</dd>
-</dl>
-<p>This directive has been available since 2.0.0</p>
-' );
-HTMLPurifier_ConfigSchema::defineAllowedValues(
- 'HTML', 'TidyLevel', array('none', 'light', 'medium', 'heavy')
-);
-
-HTMLPurifier_ConfigSchema::define(
- 'HTML', 'TidyAdd', array(), 'lookup', '
-Fixes to add to the default set of Tidy fixes as per your level. This
-directive has been available since 2.0.0.
-' );
-
-HTMLPurifier_ConfigSchema::define(
- 'HTML', 'TidyRemove', array(), 'lookup', '
-Fixes to remove from the default set of Tidy fixes as per your level. This
-directive has been available since 2.0.0.
-' );
-
/**
* Abstract class for a set of proprietary modules that clean up (tidy)
* poorly written HTML.
+ * @todo Figure out how to protect some of these methods/properties
*/
class HTMLPurifier_HTMLModule_Tidy extends HTMLPurifier_HTMLModule
{
* List of supported levels. Index zero is a special case "no fixes"
* level.
*/
- var $levels = array(0 => 'none', 'light', 'medium', 'heavy');
+ public $levels = array(0 => 'none', 'light', 'medium', 'heavy');
/**
* Default level to place all fixes in. Disabled by default
*/
- var $defaultLevel = null;
+ public $defaultLevel = null;
/**
* Lists of fixes used by getFixesForLevel(). Format is:
* HTMLModule_Tidy->fixesForLevel[$level] = array('fix-1', 'fix-2');
*/
- var $fixesForLevel = array(
+ public $fixesForLevel = array(
'light' => array(),
'medium' => array(),
'heavy' => array()
* @todo Wildcard matching and error reporting when an added or
* subtracted fix has no effect.
*/
- function construct($config) {
+ public function construct($config) {
// create fixes, initialize fixesForLevel
$fixes = $this->makeFixes();
* @param $level String level identifier, see $levels for valid values
* @return Lookup up table of fixes
*/
- function getFixesForLevel($level) {
+ public function getFixesForLevel($level) {
if ($level == $this->levels[0]) {
return array();
}
* the fixes array. It may be custom overloaded, used in conjunction
* with $defaultLevel, or not used at all.
*/
- function makeFixesForLevel($fixes) {
+ public function makeFixesForLevel($fixes) {
if (!isset($this->defaultLevel)) return;
if (!isset($this->fixesForLevel[$this->defaultLevel])) {
trigger_error(
* based on a list of fixes passed to it
* @param $lookup Lookup table of fixes to activate
*/
- function populate($fixes) {
+ public function populate($fixes) {
foreach ($fixes as $name => $fix) {
// determine what the fix is for
list($type, $params) = $this->getFixType($name);
if (isset($params['element'])) {
$element = $params['element'];
if (empty($this->info[$element])) {
- $e =& $this->addBlankElement($element);
+ $e = $this->addBlankElement($element);
} else {
- $e =& $this->info[$element];
+ $e = $this->info[$element];
}
} else {
$type = "info_$type";
- $e =& $this;
+ $e = $this;
}
+ // PHP does some weird parsing when I do
+ // $e->$type[$attr], so I have to assign a ref.
$f =& $e->$type;
$f[$attr] = $fix;
break;
case 'content_model_type':
$element = $params['element'];
if (empty($this->info[$element])) {
- $e =& $this->addBlankElement($element);
+ $e = $this->addBlankElement($element);
} else {
- $e =& $this->info[$element];
+ $e = $this->info[$element];
}
$e->$type = $fix;
break;
* @note $fix_parameters is type dependant, see populate() for usage
* of these parameters
*/
- function getFixType($name) {
+ public function getFixType($name) {
// parse it
$property = $attr = null;
if (strpos($name, '#') !== false) list($name, $property) = explode('#', $name);
/**
* Defines all fixes the module will perform in a compact
* associative array of fix name to fix implementation.
- * @abstract
*/
- function makeFixes() {}
+ public function makeFixes() {}
}
<?php
-require_once 'HTMLPurifier/HTMLModule/Tidy.php';
-
-class HTMLPurifier_HTMLModule_Tidy_Proprietary extends
- HTMLPurifier_HTMLModule_Tidy
+class HTMLPurifier_HTMLModule_Tidy_Proprietary extends HTMLPurifier_HTMLModule_Tidy
{
- var $name = 'Tidy_Proprietary';
- var $defaultLevel = 'light';
+ public $name = 'Tidy_Proprietary';
+ public $defaultLevel = 'light';
- function makeFixes() {
+ public function makeFixes() {
return array();
}
--- /dev/null
+<?php
+
+class HTMLPurifier_HTMLModule_Tidy_Strict extends HTMLPurifier_HTMLModule_Tidy_XHTMLAndHTML4
+{
+ public $name = 'Tidy_Strict';
+ public $defaultLevel = 'light';
+
+ public function makeFixes() {
+ $r = parent::makeFixes();
+ $r['blockquote#content_model_type'] = 'strictblockquote';
+ return $r;
+ }
+
+ public $defines_child_def = true;
+ public function getChildDef($def) {
+ if ($def->content_model_type != 'strictblockquote') return parent::getChildDef($def);
+ return new HTMLPurifier_ChildDef_StrictBlockquote($def->content_model);
+ }
+}
--- /dev/null
+<?php
+
+class HTMLPurifier_HTMLModule_Tidy_Transitional extends HTMLPurifier_HTMLModule_Tidy_XHTMLAndHTML4
+{
+ public $name = 'Tidy_Transitional';
+ public $defaultLevel = 'heavy';
+}
+
<?php
-require_once 'HTMLPurifier/HTMLModule/Tidy.php';
-require_once 'HTMLPurifier/AttrTransform/Lang.php';
-
-class HTMLPurifier_HTMLModule_Tidy_XHTML extends
- HTMLPurifier_HTMLModule_Tidy
+class HTMLPurifier_HTMLModule_Tidy_XHTML extends HTMLPurifier_HTMLModule_Tidy
{
- var $name = 'Tidy_XHTML';
- var $defaultLevel = 'medium';
+ public $name = 'Tidy_XHTML';
+ public $defaultLevel = 'medium';
- function makeFixes() {
+ public function makeFixes() {
$r = array();
$r['@lang'] = new HTMLPurifier_AttrTransform_Lang();
return $r;
<?php
-require_once 'HTMLPurifier/HTMLModule/Tidy.php';
-
-require_once 'HTMLPurifier/TagTransform/Simple.php';
-require_once 'HTMLPurifier/TagTransform/Font.php';
-
-require_once 'HTMLPurifier/AttrTransform/BgColor.php';
-require_once 'HTMLPurifier/AttrTransform/BoolToCSS.php';
-require_once 'HTMLPurifier/AttrTransform/Border.php';
-require_once 'HTMLPurifier/AttrTransform/Name.php';
-require_once 'HTMLPurifier/AttrTransform/Length.php';
-require_once 'HTMLPurifier/AttrTransform/ImgSpace.php';
-require_once 'HTMLPurifier/AttrTransform/EnumToCSS.php';
-
-require_once 'HTMLPurifier/ChildDef/StrictBlockquote.php';
-
-class HTMLPurifier_HTMLModule_Tidy_XHTMLAndHTML4 extends
- HTMLPurifier_HTMLModule_Tidy
+class HTMLPurifier_HTMLModule_Tidy_XHTMLAndHTML4 extends HTMLPurifier_HTMLModule_Tidy
{
- function makeFixes() {
+ public function makeFixes() {
$r = array();
}
-class HTMLPurifier_HTMLModule_Tidy_Transitional extends
- HTMLPurifier_HTMLModule_Tidy_XHTMLAndHTML4
-{
- var $name = 'Tidy_Transitional';
- var $defaultLevel = 'heavy';
-}
-
-class HTMLPurifier_HTMLModule_Tidy_Strict extends
- HTMLPurifier_HTMLModule_Tidy_XHTMLAndHTML4
-{
- var $name = 'Tidy_Strict';
- var $defaultLevel = 'light';
-
- function makeFixes() {
- $r = parent::makeFixes();
- $r['blockquote#content_model_type'] = 'strictblockquote';
- return $r;
- }
-
- var $defines_child_def = true;
- function getChildDef($def) {
- if ($def->content_model_type != 'strictblockquote') return parent::getChildDef($def);
- return new HTMLPurifier_ChildDef_StrictBlockquote($def->content_model);
- }
-}
-
+++ /dev/null
-<?php
-
-require_once 'HTMLPurifier/HTMLModule/Tidy.php';
-require_once 'HTMLPurifier/ChildDef/StrictBlockquote.php';
-
-class HTMLPurifier_HTMLModule_Tidy_XHTMLStrict extends
- HTMLPurifier_HTMLModule_Tidy
-{
-
- var $name = 'Tidy_XHTMLStrict';
- var $defaultLevel = 'light';
-
- function makeFixes() {
- $r = array();
- $r['blockquote#content_model_type'] = 'strictblockquote';
- return $r;
- }
-
- var $defines_child_def = true;
- function getChildDef($def) {
- if ($def->content_model_type != 'strictblockquote') return false;
- return new HTMLPurifier_ChildDef_StrictBlockquote($def->content_model);
- }
-
-}
-
<?php
-require_once 'HTMLPurifier/HTMLModule.php';
-
class HTMLPurifier_HTMLModule_XMLCommonAttributes extends HTMLPurifier_HTMLModule
{
- var $name = 'XMLCommonAttributes';
+ public $name = 'XMLCommonAttributes';
- var $attr_collections = array(
+ public $attr_collections = array(
/* moodle comment - xml:lang breaks our multilang
'Lang' => array(
'xml:lang' => 'LanguageCode',
<?php
-require_once 'HTMLPurifier/HTMLModule.php';
-require_once 'HTMLPurifier/ElementDef.php';
-require_once 'HTMLPurifier/Doctype.php';
-require_once 'HTMLPurifier/DoctypeRegistry.php';
-
-require_once 'HTMLPurifier/ContentSets.php';
-require_once 'HTMLPurifier/AttrTypes.php';
-require_once 'HTMLPurifier/AttrCollections.php';
-
-require_once 'HTMLPurifier/AttrDef.php';
-require_once 'HTMLPurifier/AttrDef/Enum.php';
-
-// W3C modules
-require_once 'HTMLPurifier/HTMLModule/CommonAttributes.php';
-require_once 'HTMLPurifier/HTMLModule/Text.php';
-require_once 'HTMLPurifier/HTMLModule/Hypertext.php';
-require_once 'HTMLPurifier/HTMLModule/List.php';
-require_once 'HTMLPurifier/HTMLModule/Presentation.php';
-require_once 'HTMLPurifier/HTMLModule/Edit.php';
-require_once 'HTMLPurifier/HTMLModule/Bdo.php';
-require_once 'HTMLPurifier/HTMLModule/Tables.php';
-require_once 'HTMLPurifier/HTMLModule/Image.php';
-require_once 'HTMLPurifier/HTMLModule/StyleAttribute.php';
-require_once 'HTMLPurifier/HTMLModule/Legacy.php';
-require_once 'HTMLPurifier/HTMLModule/Target.php';
-require_once 'HTMLPurifier/HTMLModule/Scripting.php';
-require_once 'HTMLPurifier/HTMLModule/XMLCommonAttributes.php';
-require_once 'HTMLPurifier/HTMLModule/NonXMLCommonAttributes.php';
-require_once 'HTMLPurifier/HTMLModule/Ruby.php';
-require_once 'HTMLPurifier/HTMLModule/Object.php';
-
-// tidy modules
-require_once 'HTMLPurifier/HTMLModule/Tidy.php';
-require_once 'HTMLPurifier/HTMLModule/Tidy/XHTMLAndHTML4.php';
-require_once 'HTMLPurifier/HTMLModule/Tidy/XHTML.php';
-require_once 'HTMLPurifier/HTMLModule/Tidy/Proprietary.php';
-
-HTMLPurifier_ConfigSchema::define(
- 'HTML', 'Doctype', '', 'string',
- 'Doctype to use during filtering. '.
- 'Technically speaking this is not actually a doctype (as it does '.
- 'not identify a corresponding DTD), but we are using this name '.
- 'for sake of simplicity. When non-blank, this will override any older directives '.
- 'like %HTML.XHTML or %HTML.Strict.'
-);
-HTMLPurifier_ConfigSchema::defineAllowedValues('HTML', 'Doctype', array(
- '', 'HTML 4.01 Transitional', 'HTML 4.01 Strict',
- 'XHTML 1.0 Transitional', 'XHTML 1.0 Strict',
- 'XHTML 1.1'
-));
-
-HTMLPurifier_ConfigSchema::define(
- 'HTML', 'CustomDoctype', null, 'string/null',
-'
-A custom doctype for power-users who defined there own document
-type. This directive only applies when %HTML.Doctype is blank.
-This directive has been available since 2.0.1.
-'
-);
-
-HTMLPurifier_ConfigSchema::define(
- 'HTML', 'Trusted', false, 'bool',
- 'Indicates whether or not the user input is trusted or not. If the '.
- 'input is trusted, a more expansive set of allowed tags and attributes '.
- 'will be used. This directive has been available since 2.0.0.'
-);
-
-HTMLPurifier_ConfigSchema::define(
- 'HTML', 'AllowedModules', null, 'lookup/null', '
-<p>
- A doctype comes with a set of usual modules to use. Without having
- to mucking about with the doctypes, you can quickly activate or
- disable these modules by specifying which modules you wish to allow
- with this directive. This is most useful for unit testing specific
- modules, although end users may find it useful for their own ends.
-</p>
-<p>
- If you specify a module that does not exist, the manager will silently
- fail to use it, so be careful! User-defined modules are not affected
- by this directive. Modules defined in %HTML.CoreModules are not
- affected by this directive. This directive has been available since 2.0.0.
-</p>
-');
-
-HTMLPurifier_ConfigSchema::define(
- 'HTML', 'CoreModules', array(
- 'Structure' => true,
- 'Text' => true,
- 'Hypertext' => true,
- 'List' => true,
- 'NonXMLCommonAttributes' => true,
- 'XMLCommonAttributes' => true,
- 'CommonAttributes' => true
- ), 'lookup', '
-<p>
- Certain modularized doctypes (XHTML, namely), have certain modules
- that must be included for the doctype to be an conforming document
- type: put those modules here. By default, XHTML\'s core modules
- are used. You can set this to a blank array to disable core module
- protection, but this is not recommended. This directive has been
- available since 2.0.0.
-</p>
-');
-
class HTMLPurifier_HTMLModuleManager
{
/**
* Instance of HTMLPurifier_DoctypeRegistry
- * @public
*/
- var $doctypes;
+ public $doctypes;
/**
* Instance of current doctype
- * @public
*/
- var $doctype;
+ public $doctype;
/**
* Instance of HTMLPurifier_AttrTypes
- * @public
*/
- var $attrTypes;
+ public $attrTypes;
/**
* Active instances of modules for the specified doctype are
* indexed, by name, in this array.
*/
- var $modules = array();
+ public $modules = array();
/**
* Array of recognized HTMLPurifier_Module instances, indexed by
* module's class name. This array is usually lazy loaded, but a
* user can overload a module by pre-emptively registering it.
*/
- var $registeredModules = array();
+ public $registeredModules = array();
/**
* List of extra modules that were added by the user using addModule().
* These get unconditionally merged into the current doctype, whatever
* it may be.
*/
- var $userModules = array();
+ public $userModules = array();
/**
* Associative array of element name to list of modules that have
* definitions for the element; this array is dynamically filled.
*/
- var $elementLookup = array();
+ public $elementLookup = array();
/** List of prefixes we should use for registering small names */
- var $prefixes = array('HTMLPurifier_HTMLModule_');
+ public $prefixes = array('HTMLPurifier_HTMLModule_');
- var $contentSets; /**< Instance of HTMLPurifier_ContentSets */
- var $attrCollections; /**< Instance of HTMLPurifier_AttrCollections */
+ public $contentSets; /**< Instance of HTMLPurifier_ContentSets */
+ public $attrCollections; /**< Instance of HTMLPurifier_AttrCollections */
/** If set to true, unsafe elements and attributes will be allowed */
- var $trusted = false;
+ public $trusted = false;
- function HTMLPurifier_HTMLModuleManager() {
+ public function __construct() {
// editable internal objects
$this->attrTypes = new HTMLPurifier_AttrTypes();
$this->doctypes = new HTMLPurifier_DoctypeRegistry();
- // setup default HTML doctypes
-
- // module reuse
+ // setup basic modules
$common = array(
'CommonAttributes', 'Text', 'Hypertext', 'List',
'Presentation', 'Edit', 'Bdo', 'Tables', 'Image',
$xml = array('XMLCommonAttributes');
$non_xml = array('NonXMLCommonAttributes');
+ // setup basic doctypes
$this->doctypes->register(
'HTML 4.01 Transitional', false,
array_merge($common, $transitional, $non_xml),
* @param $module Mixed: string module name, with or without
* HTMLPurifier_HTMLModule prefix, or instance of
* subclass of HTMLPurifier_HTMLModule.
+ * @param $overload Boolean whether or not to overload previous modules.
+ * If this is not set, and you do overload a module,
+ * HTML Purifier will complain with a warning.
* @note This function will not call autoload, you must instantiate
* (and thus invoke) autoload outside the method.
* @note If a string is passed as a module name, different variants
* If your object name collides with an internal class, specify
* your module manually. All modules must have been included
* externally: registerModule will not perform inclusions for you!
- * @warning If your module has the same name as an already loaded
- * module, your module will overload the old one WITHOUT
- * warning.
*/
- function registerModule($module) {
+ public function registerModule($module, $overload = false) {
if (is_string($module)) {
// attempt to load the module
$original_module = $module;
$ok = false;
foreach ($this->prefixes as $prefix) {
$module = $prefix . $original_module;
- if ($this->_classExists($module)) {
+ if (class_exists($module)) {
$ok = true;
break;
}
}
if (!$ok) {
$module = $original_module;
- if (!$this->_classExists($module)) {
+ if (!class_exists($module)) {
trigger_error($original_module . ' module does not exist',
E_USER_ERROR);
return;
trigger_error('Module instance of ' . get_class($module) . ' must have name');
return;
}
- $this->registeredModules[$module->name] = $module;
- }
-
- /**
- * Safely tests for class existence without invoking __autoload in PHP5
- * or greater.
- * @param $name String class name to test
- * @note If any other class needs it, we'll need to stash in a
- * conjectured "compatibility" class
- * @private
- */
- function _classExists($name) {
- static $is_php_4 = null;
- if ($is_php_4 === null) {
- $is_php_4 = version_compare(PHP_VERSION, '5', '<');
- }
- if ($is_php_4) {
- return class_exists($name);
- } else {
- return class_exists($name, false);
+ if (!$overload && isset($this->registeredModules[$module->name])) {
+ trigger_error('Overloading ' . $module->name . ' without explicit overload parameter', E_USER_WARNING);
}
+ $this->registeredModules[$module->name] = $module;
}
/**
* Adds a module to the current doctype by first registering it,
* and then tacking it on to the active doctype
*/
- function addModule($module) {
+ public function addModule($module) {
$this->registerModule($module);
if (is_object($module)) $module = $module->name;
$this->userModules[] = $module;
* Adds a class prefix that registerModule() will use to resolve a
* string name to a concrete class
*/
- function addPrefix($prefix) {
+ public function addPrefix($prefix) {
$this->prefixes[] = $prefix;
}
* use getElement() and getElements()
* @param $config Instance of HTMLPurifier_Config
*/
- function setup($config) {
+ public function setup($config) {
$this->trusted = $config->get('HTML', 'Trusted');
// merge in custom modules
$modules = array_merge($modules, $this->userModules);
+ // add proprietary module (this gets special treatment because
+ // it is completely removed from doctypes, etc.)
+ if ($config->get('HTML', 'Proprietary')) {
+ $modules[] = 'Proprietary';
+ }
+
foreach ($modules as $module) {
$this->processModule($module);
}
* Takes a module and adds it to the active module collection,
* registering it if necessary.
*/
- function processModule($module) {
+ public function processModule($module) {
if (!isset($this->registeredModules[$module]) || is_object($module)) {
$this->registerModule($module);
}
* Retrieves merged element definitions.
* @return Array of HTMLPurifier_ElementDef
*/
- function getElements() {
+ public function getElements() {
$elements = array();
foreach ($this->modules as $module) {
+ if (!$this->trusted && !$module->safe) continue;
foreach ($module->info as $name => $v) {
if (isset($elements[$name])) continue;
- // if element is not safe, don't use it
- if (!$this->trusted && ($v->safe === false)) continue;
$elements[$name] = $this->getElement($name);
}
}
* @param $trusted Boolean trusted overriding parameter: set to true
* if you want the full version of an element
* @return Merged HTMLPurifier_ElementDef
+ * @note You may notice that modules are getting iterated over twice (once
+ * in getElements() and once here). This
+ * is because
*/
- function getElement($name, $trusted = null) {
-
- $def = false;
- if ($trusted === null) $trusted = $this->trusted;
-
- $modules = $this->modules;
+ public function getElement($name, $trusted = null) {
if (!isset($this->elementLookup[$name])) {
return false;
}
+ // setup global state variables
+ $def = false;
+ if ($trusted === null) $trusted = $this->trusted;
+
+ // iterate through each module that has registered itself to this
+ // element
foreach($this->elementLookup[$name] as $module_name) {
- $module = $modules[$module_name];
-
- // copy is used because, ideally speaking, the original
- // definition should not be modified. Usually, this will
- // make no difference, but for consistency's sake
- $new_def = $module->info[$name]->copy();
+ $module = $this->modules[$module_name];
- // refuse to create/merge in a definition that is deemed unsafe
- if (!$trusted && ($new_def->safe === false)) {
- $def = false;
+ // refuse to create/merge from a module that is deemed unsafe--
+ // pretend the module doesn't exist--when trusted mode is not on.
+ if (!$trusted && !$module->safe) {
continue;
}
+ // clone is used because, ideally speaking, the original
+ // definition should not be modified. Usually, this will
+ // make no difference, but for consistency's sake
+ $new_def = clone $module->info[$name];
+
if (!$def && $new_def->standalone) {
- // element with unknown safety is not to be trusted.
- // however, a merge-in definition with undefined safety
- // is fine
- if (!$trusted && !$new_def->safe) continue;
$def = $new_def;
} elseif ($def) {
+ // This will occur even if $new_def is standalone. In practice,
+ // this will usually result in a full replacement.
$def->mergeIn($new_def);
} else {
- // could "save it for another day":
+ // :TODO:
// non-standalone definitions that don't have a standalone
// to merge into could be deferred to the end
continue;
<?php
-HTMLPurifier_ConfigSchema::define(
- 'Attr', 'IDBlacklist', array(), 'list',
- 'Array of IDs not allowed in the document.'
-);
-
/**
* Component of HTMLPurifier_AttrContext that accumulates IDs to prevent dupes
* @note In Slashdot-speak, dupe means duplicate.
* Lookup table of IDs we've accumulated.
* @public
*/
- var $ids = array();
+ public $ids = array();
/**
* Builds an IDAccumulator, also initializing the default blacklist
* @param $config Instance of HTMLPurifier_Config
* @param $context Instance of HTMLPurifier_Context
* @return Fully initialized HTMLPurifier_IDAccumulator
- * @static
*/
- function build($config, &$context) {
- $acc = new HTMLPurifier_IDAccumulator();
- $acc->load($config->get('Attr', 'IDBlacklist'));
- return $acc;
+ public static function build($config, $context) {
+ $id_accumulator = new HTMLPurifier_IDAccumulator();
+ $id_accumulator->load($config->get('Attr', 'IDBlacklist'));
+ return $id_accumulator;
}
/**
* @param $id ID to be added.
* @return Bool status, true if success, false if there's a dupe
*/
- function add($id) {
+ public function add($id) {
if (isset($this->ids[$id])) return false;
return $this->ids[$id] = true;
}
* @param $array_of_ids Array of IDs to load
* @note This function doesn't care about duplicates
*/
- function load($array_of_ids) {
+ public function load($array_of_ids) {
foreach ($array_of_ids as $id) {
$this->ids[$id] = true;
}
* @todo Allow injectors to request a re-run on their output. This
* would help if an operation is recursive.
*/
-class HTMLPurifier_Injector
+abstract class HTMLPurifier_Injector
{
/**
* Advisory name of injector, this is for friendly error messages
*/
- var $name;
+ public $name;
/**
* Amount of tokens the injector needs to skip + 1. Because
* the decrement is the first thing that happens, this needs to
* be one greater than the "real" skip count.
*/
- var $skip = 1;
+ public $skip = 1;
/**
* Instance of HTMLPurifier_HTMLDefinition
*/
- var $htmlDefinition;
+ protected $htmlDefinition;
/**
* Reference to CurrentNesting variable in Context. This is an array
* list of tokens that we are currently "inside"
*/
- var $currentNesting;
+ protected $currentNesting;
/**
* Reference to InputTokens variable in Context. This is an array
* list of the input tokens that are being processed.
*/
- var $inputTokens;
+ protected $inputTokens;
/**
* Reference to InputIndex variable in Context. This is an integer
* array index for $this->inputTokens that indicates what token
* is currently being processed.
*/
- var $inputIndex;
+ protected $inputIndex;
/**
* Array of elements and attributes this injector creates and therefore
* need to be allowed by the definition. Takes form of
* array('element' => array('attr', 'attr2'), 'element2')
*/
- var $needed = array();
+ public $needed = array();
/**
* Prepares the injector by giving it the config and context objects:
* @param $context Instance of HTMLPurifier_Context
* @return Boolean false if success, string of missing needed element/attribute if failure
*/
- function prepare($config, &$context) {
+ public function prepare($config, $context) {
$this->htmlDefinition = $config->getHTMLDefinition();
// perform $needed checks
foreach ($this->needed as $element => $attributes) {
* @param $name Name of element to test for
* @return True if element is allowed, false if it is not
*/
- function allowsElement($name) {
+ public function allowsElement($name) {
if (!empty($this->currentNesting)) {
$parent_token = array_pop($this->currentNesting);
$this->currentNesting[] = $parent_token;
/**
* Handler that is called when a text token is processed
*/
- function handleText(&$token) {}
+ public function handleText(&$token) {}
/**
* Handler that is called when a start or empty token is processed
*/
- function handleElement(&$token) {}
+ public function handleElement(&$token) {}
/**
* Notifier that is called when an end token is processed
* @note This differs from handlers in that the token is read-only
*/
- function notifyEnd($token) {}
+ public function notifyEnd($token) {}
}
<?php
-require_once 'HTMLPurifier/Injector.php';
-
-HTMLPurifier_ConfigSchema::define(
- 'AutoFormat', 'AutoParagraph', false, 'bool', '
-<p>
- This directive turns on auto-paragraphing, where double newlines are
- converted in to paragraphs whenever possible. Auto-paragraphing:
-</p>
-<ul>
- <li>Always applies to inline elements or text in the root node,</li>
- <li>Applies to inline elements or text with double newlines in nodes
- that allow paragraph tags,</li>
- <li>Applies to double newlines in paragraph tags</li>
-</ul>
-<p>
- <code>p</code> tags must be allowed for this directive to take effect.
- We do not use <code>br</code> tags for paragraphing, as that is
- semantically incorrect.
-</p>
-<p>
- To prevent auto-paragraphing as a content-producer, refrain from using
- double-newlines except to specify a new paragraph or in contexts where
- it has special meaning (whitespace usually has no meaning except in
- tags like <code>pre</code>, so this should not be difficult.) To prevent
- the paragraphing of inline text adjacent to block elements, wrap them
- in <code>div</code> tags (the behavior is slightly different outside of
- the root node.)
-</p>
-<p>
- This directive has been available since 2.0.1.
-</p>
-');
-
/**
* Injector that auto paragraphs text in the root node based on
* double-spacing.
class HTMLPurifier_Injector_AutoParagraph extends HTMLPurifier_Injector
{
- var $name = 'AutoParagraph';
- var $needed = array('p');
+ public $name = 'AutoParagraph';
+ public $needed = array('p');
- function _pStart() {
+ private function _pStart() {
$par = new HTMLPurifier_Token_Start('p');
$par->armor['MakeWellFormed_TagClosedError'] = true;
return $par;
}
- function handleText(&$token) {
+ public function handleText(&$token) {
$text = $token->data;
if (empty($this->currentNesting)) {
if (!$this->allowsElement('p')) return;
// a double newline in them
$nesting = 0;
for ($i = $this->inputIndex + 1; isset($this->inputTokens[$i]); $i++) {
- if ($this->inputTokens[$i]->type == 'start'){
+ if ($this->inputTokens[$i] instanceof HTMLPurifier_Token_Start){
if (!$this->_isInline($this->inputTokens[$i])) {
// we haven't found a double-newline, and
// we've hit a block element, so don't paragraph
}
$nesting++;
}
- if ($this->inputTokens[$i]->type == 'end') {
+ if ($this->inputTokens[$i] instanceof HTMLPurifier_Token_End) {
if ($nesting <= 0) break;
$nesting--;
}
- if ($this->inputTokens[$i]->type == 'text') {
+ if ($this->inputTokens[$i] instanceof HTMLPurifier_Token_Text) {
// found it!
if (strpos($this->inputTokens[$i]->data, "\n\n") !== false) {
$ok = true;
}
- function handleElement(&$token) {
+ public function handleElement(&$token) {
// check if we're inside a tag already
if (!empty($this->currentNesting)) {
if ($this->allowsElement('p')) {
// check if this token is adjacent to the parent token
$prev = $this->inputTokens[$this->inputIndex - 1];
- if ($prev->type != 'start') {
+ if (!$prev instanceof HTMLPurifier_Token_Start) {
// not adjacent, we can abort early
// add lead paragraph tag if our token is inline
// and the previous tag was an end paragraph
if (
- $prev->name == 'p' && $prev->type == 'end' &&
+ $prev->name == 'p' && $prev instanceof HTMLPurifier_Token_End &&
$this->_isInline($token)
) {
$token = array($this->_pStart(), $token);
// early if possible
$j = 1; // current nesting, one is due to parent (we recalculate current token)
for ($i = $this->inputIndex; isset($this->inputTokens[$i]); $i++) {
- if ($this->inputTokens[$i]->type == 'start') $j++;
- if ($this->inputTokens[$i]->type == 'end') $j--;
- if ($this->inputTokens[$i]->type == 'text') {
+ if ($this->inputTokens[$i] instanceof HTMLPurifier_Token_Start) $j++;
+ if ($this->inputTokens[$i] instanceof HTMLPurifier_Token_End) $j--;
+ if ($this->inputTokens[$i] instanceof HTMLPurifier_Token_Text) {
if (strpos($this->inputTokens[$i]->data, "\n\n") !== false) {
$ok = true;
break;
* tags will be appended onto
* @param $config Instance of HTMLPurifier_Config
* @param $context Instance of HTMLPurifier_Context
- * @private
*/
- function _splitText($data, &$result) {
+ private function _splitText($data, &$result) {
$raw_paragraphs = explode("\n\n", $data);
// remove empty paragraphs
if (!$needs_end) {
// Start of the checks one after the current token's index
for ($i = $this->inputIndex + 1; isset($this->inputTokens[$i]); $i++) {
- if ($this->inputTokens[$i]->type == 'start' || $this->inputTokens[$i]->type == 'empty') {
+ if ($this->inputTokens[$i] instanceof HTMLPurifier_Token_Start || $this->inputTokens[$i] instanceof HTMLPurifier_Token_Empty) {
$remove_paragraph_end = $this->_isInline($this->inputTokens[$i]);
}
// check if we can abort early (whitespace means we carry-on!)
- if ($this->inputTokens[$i]->type == 'text' && !$this->inputTokens[$i]->is_whitespace) break;
+ if ($this->inputTokens[$i] instanceof HTMLPurifier_Token_Text && !$this->inputTokens[$i]->is_whitespace) break;
// end tags will automatically be handled by MakeWellFormed,
// so we don't have to worry about them
- if ($this->inputTokens[$i]->type == 'end') break;
+ if ($this->inputTokens[$i] instanceof HTMLPurifier_Token_End) break;
}
} else {
$remove_paragraph_end = false;
/**
* Returns true if passed token is inline (and, ergo, allowed in
* paragraph tags)
- * @private
*/
- function _isInline($token) {
+ private function _isInline($token) {
return isset($this->htmlDefinition->info['p']->child->elements[$token->name]);
}
<?php
-require_once 'HTMLPurifier/Injector.php';
-
-HTMLPurifier_ConfigSchema::define(
- 'AutoFormat', 'Linkify', false, 'bool', '
-<p>
- This directive turns on linkification, auto-linking http, ftp and
- https URLs. <code>a</code> tags with the <code>href</code> attribute
- must be allowed. This directive has been available since 2.0.1.
-</p>
-');
-
/**
* Injector that converts http, https and ftp text URLs to actual links.
*/
class HTMLPurifier_Injector_Linkify extends HTMLPurifier_Injector
{
- var $name = 'Linkify';
- var $needed = array('a' => array('href'));
+ public $name = 'Linkify';
+ public $needed = array('a' => array('href'));
- function handleText(&$token) {
+ public function handleText(&$token) {
if (!$this->allowsElement('a')) return;
if (strpos($token->data, '://') === false) {
<?php
-require_once 'HTMLPurifier/Injector.php';
-
-HTMLPurifier_ConfigSchema::define(
- 'AutoFormat', 'PurifierLinkify', false, 'bool', '
-<p>
- Internal auto-formatter that converts configuration directives in
- syntax <a>%Namespace.Directive</a> to links. <code>a</code> tags
- with the <code>href</code> attribute must be allowed.
- This directive has been available since 2.0.1.
-</p>
-');
-
-HTMLPurifier_ConfigSchema::define(
- 'AutoFormatParam', 'PurifierLinkifyDocURL', '#%s', 'string', '
-<p>
- Location of configuration documentation to link to, let %s substitute
- into the configuration\'s namespace and directive names sans the percent
- sign. This directive has been available since 2.0.1.
-</p>
-');
-
/**
* Injector that converts configuration directive syntax %Namespace.Directive
* to links
class HTMLPurifier_Injector_PurifierLinkify extends HTMLPurifier_Injector
{
- var $name = 'PurifierLinkify';
- var $docURL;
- var $needed = array('a' => array('href'));
+ public $name = 'PurifierLinkify';
+ public $docURL;
+ public $needed = array('a' => array('href'));
- function prepare($config, &$context) {
+ public function prepare($config, $context) {
$this->docURL = $config->get('AutoFormatParam', 'PurifierLinkifyDocURL');
return parent::prepare($config, $context);
}
- function handleText(&$token) {
+ public function handleText(&$token) {
if (!$this->allowsElement('a')) return;
if (strpos($token->data, '%') === false) return;
<?php
-require_once 'HTMLPurifier/LanguageFactory.php';
-
+/**
+ * Represents a language and defines localizable string formatting and
+ * other functions, as well as the localized messages for HTML Purifier.
+ */
class HTMLPurifier_Language
{
/**
* ISO 639 language code of language. Prefers shortest possible version
*/
- var $code = 'en';
+ public $code = 'en';
/**
* Fallback language code
*/
- var $fallback = false;
+ public $fallback = false;
/**
* Array of localizable messages
*/
- var $messages = array();
+ public $messages = array();
/**
* Array of localizable error codes
*/
- var $errorNames = array();
+ public $errorNames = array();
/**
* True if no message file was found for this language, so English
* is being used instead. Check this if you'd like to notify the
* user that they've used a non-supported language.
*/
- var $error = false;
+ public $error = false;
/**
* Has the language object been loaded yet?
- * @private
+ * @todo Make it private, fix usage in HTMLPurifier_LanguageTest
*/
- var $_loaded = false;
+ public $_loaded = false;
/**
* Instances of HTMLPurifier_Config and HTMLPurifier_Context
*/
- var $config, $context;
+ protected $config, $context;
- function HTMLPurifier_Language($config, &$context) {
+ public function __construct($config, $context) {
$this->config = $config;
- $this->context =& $context;
+ $this->context = $context;
}
/**
* Loads language object with necessary info from factory cache
* @note This is a lazy loader
*/
- function load() {
+ public function load() {
if ($this->_loaded) return;
$factory = HTMLPurifier_LanguageFactory::instance();
$factory->loadLanguage($this->code);
* @param $key string identifier of message
* @return string localised message
*/
- function getMessage($key) {
+ public function getMessage($key) {
if (!$this->_loaded) $this->load();
if (!isset($this->messages[$key])) return "[$key]";
return $this->messages[$key];
* reporting
* @return string localised message
*/
- function getErrorName($int) {
+ public function getErrorName($int) {
if (!$this->_loaded) $this->load();
if (!isset($this->errorNames[$int])) return "[Error: $int]";
return $this->errorNames[$int];
/**
* Converts an array list into a string readable representation
*/
- function listify($array) {
+ public function listify($array) {
$sep = $this->getMessage('Item separator');
$sep_last = $this->getMessage('Item separator last');
$ret = '';
* @todo Implement conditionals? Right now, some messages make
* reference to line numbers, but those aren't always available
*/
- function formatMessage($key, $args = array()) {
+ public function formatMessage($key, $args = array()) {
if (!$this->_loaded) $this->load();
if (!isset($this->messages[$key])) return "[$key]";
$raw = $this->messages[$key];
$generator = false;
foreach ($args as $i => $value) {
if (is_object($value)) {
- if (is_a($value, 'HTMLPurifier_Token')) {
+ if ($value instanceof HTMLPurifier_Token) {
// factor this out some time
if (!$generator) $generator = $this->context->get('Generator');
if (isset($value->name)) $subst['$'.$i.'.Name'] = $value->name;
// could be introduced for all types of tokens. This
// may need to be factored out into a dedicated class
if (!empty($value->attr)) {
- $stripped_token = $value->copy();
+ $stripped_token = clone $value;
$stripped_token->attr = array();
$subst['$'.$i.'.Compact'] = $generator->generateFromToken($stripped_token);
}
<?php
-require_once 'HTMLPurifier/Language.php';
-require_once 'HTMLPurifier/AttrDef/Lang.php';
-
-HTMLPurifier_ConfigSchema::define(
- 'Core', 'Language', 'en', 'string', '
-ISO 639 language code for localizable things in HTML Purifier to use,
-which is mainly error reporting. There is currently only an English (en)
-translation, so this directive is currently useless.
-This directive has been available since 2.0.0.
-');
-
/**
* Class responsible for generating HTMLPurifier_Language objects, managing
* caching and fallbacks.
* Structure is: $factory->cache[$language_code][$key] = $value
* @value array map
*/
- var $cache;
+ public $cache;
/**
* Valid keys in the HTMLPurifier_Language object. Designates which
* variables to slurp out of a message file.
* @value array list
*/
- var $keys = array('fallback', 'messages', 'errorNames');
+ public $keys = array('fallback', 'messages', 'errorNames');
/**
* Instance of HTMLPurifier_AttrDef_Lang to validate language codes
* @value object HTMLPurifier_AttrDef_Lang
*/
- var $validator;
+ protected $validator;
/**
* Cached copy of dirname(__FILE__), directory of current file without
* trailing slash
* @value string filename
*/
- var $dir;
+ protected $dir;
/**
* Keys whose contents are a hash map and can be merged
* @value array lookup
*/
- var $mergeable_keys_map = array('messages' => true, 'errorNames' => true);
+ protected $mergeable_keys_map = array('messages' => true, 'errorNames' => true);
/**
* Keys whose contents are a list and can be merged
* @value array lookup
*/
- var $mergeable_keys_list = array();
+ protected $mergeable_keys_list = array();
/**
* Retrieve sole instance of the factory.
- * @static
* @param $prototype Optional prototype to overload sole instance with,
* or bool true to reset to default factory.
*/
- function &instance($prototype = null) {
+ public static function instance($prototype = null) {
static $instance = null;
if ($prototype !== null) {
$instance = $prototype;
* Sets up the singleton, much like a constructor
* @note Prevents people from getting this outside of the singleton
*/
- function setup() {
+ public function setup() {
$this->validator = new HTMLPurifier_AttrDef_Lang();
$this->dir = HTMLPURIFIER_PREFIX . '/HTMLPurifier';
}
* @param $context Instance of HTMLPurifier_Context
* @param $code Code to override configuration with. Private parameter.
*/
- function create($config, &$context, $code = false) {
+ public function create($config, $context, $code = false) {
// validate language code
if ($code === false) {
} else {
$class = 'HTMLPurifier_Language_' . $pcode;
$file = $this->dir . '/Language/classes/' . $code . '.php';
- if (file_exists($file)) {
- include $file;
+ if (file_exists($file) || class_exists($class, false)) {
$lang = new $class($config, $context);
} else {
// Go fallback
$depth--;
}
}
+
$lang->code = $code;
return $lang;
* @note Loads the original language into cache
* @param $code string language code
*/
- function getFallbackFor($code) {
+ public function getFallbackFor($code) {
$this->loadLanguage($code);
return $this->cache[$code]['fallback'];
}
* Loads language into the cache, handles message file and fallbacks
* @param $code string language code
*/
- function loadLanguage($code) {
+ public function loadLanguage($code) {
static $languages_seen = array(); // recursion guard
// abort if we've already loaded it
<?php
-require_once 'HTMLPurifier/Token.php';
-require_once 'HTMLPurifier/Encoder.php';
-require_once 'HTMLPurifier/EntityParser.php';
-
-// implementations
-require_once 'HTMLPurifier/Lexer/DirectLex.php';
-if (version_compare(PHP_VERSION, "5", ">=")) {
- // You can remove the if statement if you are running PHP 5 only.
- // We ought to get the strict version to follow those rules.
- require_once 'HTMLPurifier/Lexer/DOMLex.php';
-}
-
-HTMLPurifier_ConfigSchema::define(
- 'Core', 'ConvertDocumentToFragment', true, 'bool', '
-This parameter determines whether or not the filter should convert
-input that is a full document with html and body tags to a fragment
-of just the contents of a body tag. This parameter is simply something
-HTML Purifier can do during an edge-case: for most inputs, this
-processing is not necessary.
-');
-HTMLPurifier_ConfigSchema::defineAlias('Core', 'AcceptFullDocuments', 'Core', 'ConvertDocumentToFragment');
-
-HTMLPurifier_ConfigSchema::define(
- 'Core', 'LexerImpl', null, 'mixed/null', '
-<p>
- This parameter determines what lexer implementation can be used. The
- valid values are:
-</p>
-<dl>
- <dt><em>null</em></dt>
- <dd>
- Recommended, the lexer implementation will be auto-detected based on
- your PHP-version and configuration.
- </dd>
- <dt><em>string</em> lexer identifier</dt>
- <dd>
- This is a slim way of manually overridding the implementation.
- Currently recognized values are: DOMLex (the default PHP5 implementation)
- and DirectLex (the default PHP4 implementation). Only use this if
- you know what you are doing: usually, the auto-detection will
- manage things for cases you aren\'t even aware of.
- </dd>
- <dt><em>object</em> lexer instance</dt>
- <dd>
- Super-advanced: you can specify your own, custom, implementation that
- implements the interface defined by <code>HTMLPurifier_Lexer</code>.
- I may remove this option simply because I don\'t expect anyone
- to use it.
- </dd>
-</dl>
-<p>
- This directive has been available since 2.0.0.
-</p>
-'
-);
-
-HTMLPurifier_ConfigSchema::define(
- 'Core', 'MaintainLineNumbers', null, 'bool/null', '
-<p>
- If true, HTML Purifier will add line number information to all tokens.
- This is useful when error reporting is turned on, but can result in
- significant performance degradation and should not be used when
- unnecessary. This directive must be used with the DirectLex lexer,
- as the DOMLex lexer does not (yet) support this functionality.
- If the value is null, an appropriate value will be selected based
- on other configuration. This directive has been available since 2.0.0.
-</p>
-');
-
-HTMLPurifier_ConfigSchema::define(
- 'Core', 'AggressivelyFixLt', false, 'bool', '
-This directive enables aggressive pre-filter fixes HTML Purifier can
-perform in order to ensure that open angled-brackets do not get killed
-during parsing stage. Enabling this will result in two preg_replace_callback
-calls and one preg_replace call for every bit of HTML passed through here.
-It is not necessary and will have no effect for PHP 4.
-This directive has been available since 2.1.0.
-');
-
/**
* Forgivingly lexes HTML (SGML-style) markup into tokens.
*
*
* A lexer is HTML-oriented: it might work with XML, but it's not
* recommended, as we adhere to a subset of the specification for optimization
- * reasons.
+ * reasons. This might change in the future. Also, most tokenizers are not
+ * expected to handle DTDs or PIs.
*
* This class should not be directly instantiated, but you may use create() to
* retrieve a default copy of the lexer. Being a supertype, this class
*
* @note The unit tests will instantiate this class for testing purposes, as
* many of the utility functions require a class to be instantiated.
- * Be careful when porting this class to PHP 5.
+ * This means that, even though this class is not runnable, it will
+ * not be declared abstract.
*
* @par
*
* We use tokens rather than create a DOM representation because DOM would:
*
* @par
- * -# Require more processing power to create,
- * -# Require recursion to iterate,
- * -# Must be compatible with PHP 5's DOM (otherwise duplication),
- * -# Has the entire document structure (html and body not needed), and
- * -# Has unknown readability improvement.
+ * -# Require more processing and memory to create,
+ * -# Is not streamable, and
+ * -# Has the entire document structure (html and body not needed).
*
* @par
- * What the last item means is that the functions for manipulating tokens are
- * already fairly compact, and when well-commented, more abstraction may not
- * be needed.
- *
- * @see HTMLPurifier_Token
+ * However, DOM is helpful in that it makes it easy to move around nodes
+ * without a lot of lookaheads to see when a tag is closed. This is a
+ * limitation of the token system and some workarounds would be nice.
*/
class HTMLPurifier_Lexer
{
/**
* Retrieves or sets the default Lexer as a Prototype Factory.
*
- * Depending on what PHP version you are running, the abstract base
- * Lexer class will determine which concrete Lexer is best for you:
- * HTMLPurifier_Lexer_DirectLex for PHP 4, and HTMLPurifier_Lexer_DOMLex
- * for PHP 5 and beyond. This general rule has a few exceptions to it
- * involving special features that only DirectLex implements.
- *
- * @static
+ * By default HTMLPurifier_Lexer_DOMLex will be returned. There are
+ * a few exceptions involving special features that only DirectLex
+ * implements.
*
* @note The behavior of this class has changed, rather than accepting
* a prototype object, it now accepts a configuration object.
* To specify your own prototype, set %Core.LexerImpl to it.
* This change in behavior de-singletonizes the lexer object.
*
- * @note In PHP4, it is possible to call this factory method from
- * subclasses, such usage is not recommended and not
- * forwards-compatible.
- *
- * @param $prototype Optional prototype lexer or configuration object
+ * @param $config Instance of HTMLPurifier_Config
* @return Concrete lexer.
*/
- function create($config) {
+ public static function create($config) {
- if (!is_a($config, 'HTMLPurifier_Config')) {
+ if (!($config instanceof HTMLPurifier_Config)) {
$lexer = $config;
trigger_error("Passing a prototype to
HTMLPurifier_Lexer::create() is deprecated, please instead
break;
}
- if (version_compare(PHP_VERSION, "5", ">=") && // check for PHP5
- class_exists('DOMDocument')) { // check for DOM support
+ if (class_exists('DOMDocument')) {
+ // check for DOM support, because, surprisingly enough,
+ // it's *not* part of the core!
$lexer = 'DOMLex';
} else {
$lexer = 'DirectLex';
case 'DirectLex':
return new HTMLPurifier_Lexer_DirectLex();
case 'PH5P':
- // experimental Lexer that must be manually included
return new HTMLPurifier_Lexer_PH5P();
default:
trigger_error("Cannot instantiate unrecognized Lexer type " . htmlspecialchars($lexer), E_USER_ERROR);
// -- CONVENIENCE MEMBERS ---------------------------------------------
- function HTMLPurifier_Lexer() {
+ public function __construct() {
$this->_entity_parser = new HTMLPurifier_EntityParser();
}
/**
* Most common entity to raw value conversion table for special entities.
- * @protected
*/
- var $_special_entity2str =
+ protected $_special_entity2str =
array(
'"' => '"',
'&' => '&',
* @param $string String character data to be parsed.
* @returns Parsed character data.
*/
- function parseData($string) {
+ public function parseData($string) {
// following functions require at least one character
if ($string === '') return '';
* @param $string String HTML.
* @return HTMLPurifier_Token array representation of HTML.
*/
- function tokenizeHTML($string, $config, &$context) {
+ public function tokenizeHTML($string, $config, $context) {
trigger_error('Call to abstract class', E_USER_ERROR);
}
/**
* Translates CDATA sections into regular sections (through escaping).
*
- * @static
- * @protected
* @param $string HTML string to process.
* @returns HTML with CDATA sections escaped.
*/
- function escapeCDATA($string) {
+ protected static function escapeCDATA($string) {
return preg_replace_callback(
'/<!\[CDATA\[(.+?)\]\]>/s',
array('HTMLPurifier_Lexer', 'CDATACallback'),
}
/**
- * Special CDATA case that is especiall convoluted for <script>
+ * Special CDATA case that is especially convoluted for <script>
*/
- function escapeCommentedCDATA($string) {
+ protected static function escapeCommentedCDATA($string) {
return preg_replace_callback(
'#<!--//--><!\[CDATA\[//><!--(.+?)//--><!\]\]>#s',
array('HTMLPurifier_Lexer', 'CDATACallback'),
/**
* Callback function for escapeCDATA() that does the work.
*
- * @static
* @warning Though this is public in order to let the callback happen,
* calling it directly is not recommended.
* @params $matches PCRE matches array, with index 0 the entire match
* and 1 the inside of the CDATA section.
* @returns Escaped internals of the CDATA section.
*/
- function CDATACallback($matches) {
+ protected static function CDATACallback($matches) {
// not exactly sure why the character set is needed, but whatever
return htmlspecialchars($matches[1], ENT_COMPAT, 'UTF-8');
}
/**
* Takes a piece of HTML and normalizes it by converting entities, fixing
* encoding, extracting bits, and other good stuff.
+ * @todo Consider making protected
*/
- function normalize($html, $config, &$context) {
+ public function normalize($html, $config, $context) {
// extract body from document if applicable
if ($config->get('Core', 'ConvertDocumentToFragment')) {
/**
* Takes a string of HTML (fragment or document) and returns the content
+ * @todo Consider making protected
*/
- function extractBody($html) {
+ public function extractBody($html) {
$matches = array();
$result = preg_match('!<body[^>]*>(.+?)</body>!is', $html, $matches);
if ($result) {
<?php
-require_once 'HTMLPurifier/Lexer.php';
-require_once 'HTMLPurifier/TokenFactory.php';
-
/**
* Parser that uses PHP 5's DOM extension (part of the core).
*
public function __construct() {
// setup the factory
- parent::HTMLPurifier_Lexer();
+ parent::__construct();
$this->factory = new HTMLPurifier_TokenFactory();
}
- public function tokenizeHTML($html, $config, &$context) {
+ public function tokenizeHTML($html, $config, $context) {
$html = $this->normalize($html, $config, $context);
if ($config->get('Core', 'AggressivelyFixLt')) {
$char = '[^a-z!\/]';
$comment = "/<!--(.*?)(-->|\z)/is";
- $html = preg_replace_callback($comment, array('HTMLPurifier_Lexer_DOMLex', 'callbackArmorCommentEntities'), $html);
+ $html = preg_replace_callback($comment, array($this, 'callbackArmorCommentEntities'), $html);
$html = preg_replace("/<($char)/i", '<\\1', $html);
- $html = preg_replace_callback($comment, array('HTMLPurifier_Lexer_DOMLex', 'callbackUndoCommentSubst'), $html); // fix comments
+ $html = preg_replace_callback($comment, array($this, 'callbackUndoCommentSubst'), $html); // fix comments
}
// preprocess html, essential for UTF-8
$last = end($tokens);
$data = $node->data;
// (note $node->tagname is already normalized)
- if ($last instanceof HTMLPurifier_Token_Start && $last->name == 'script') {
+ if ($last instanceof HTMLPurifier_Token_Start && ($last->name == 'script' || $last->name == 'style')) {
$new_data = trim($data);
if (substr($new_data, 0, 4) === '<!--') {
$data = substr($new_data, 4);
* Callback function for undoing escaping of stray angled brackets
* in comments
*/
- function callbackUndoCommentSubst($matches) {
+ public function callbackUndoCommentSubst($matches) {
return '<!--' . strtr($matches[1], array('&'=>'&','<'=>'<')) . $matches[2];
}
* Callback function that entity-izes ampersands in comments so that
* callbackUndoCommentSubst doesn't clobber them
*/
- function callbackArmorCommentEntities($matches) {
+ public function callbackArmorCommentEntities($matches) {
return '<!--' . str_replace('&', '&', $matches[1]) . $matches[2];
}
/**
* Wraps an HTML fragment in the necessary HTML
*/
- function wrapHTML($html, $config, &$context) {
+ protected function wrapHTML($html, $config, $context) {
$def = $config->getDefinition('HTML');
$ret = '';
$ret .= '<html><head>';
$ret .= '<meta http-equiv="Content-Type" content="text/html; charset=utf-8" />';
+ // No protection if $html contains a stray </div>!
$ret .= '</head><body><div>'.$html.'</div></body></html>';
return $ret;
}
<?php
-require_once 'HTMLPurifier/Lexer.php';
-
-HTMLPurifier_ConfigSchema::define(
- 'Core', 'DirectLexLineNumberSyncInterval', 0, 'int', '
-<p>
- Specifies the number of tokens the DirectLex line number tracking
- implementations should process before attempting to resyncronize the
- current line count by manually counting all previous new-lines. When
- at 0, this functionality is disabled. Lower values will decrease
- performance, and this is only strictly necessary if the counting
- algorithm is buggy (in which case you should report it as a bug).
- This has no effect when %Core.MaintainLineNumbers is disabled or DirectLex is
- not being used. This directive has been available since 2.0.0.
-</p>
-');
-
/**
* Our in-house implementation of a parser.
*
/**
* Whitespace characters for str(c)spn.
- * @protected
*/
- var $_whitespace = "\x20\x09\x0D\x0A";
+ protected $_whitespace = "\x20\x09\x0D\x0A";
/**
* Callback function for script CDATA fudge
* @param $matches, in form of array(opening tag, contents, closing tag)
- * @static
*/
- function scriptCallback($matches) {
+ protected function scriptCallback($matches) {
return $matches[1] . htmlspecialchars($matches[2], ENT_COMPAT, 'UTF-8') . $matches[3];
}
- function tokenizeHTML($html, $config, &$context) {
+ public function tokenizeHTML($html, $config, $context) {
// special normalization for script tags without any armor
// our "armor" heurstic is a < sign any number of whitespaces after
// the first script tag
if ($config->get('HTML', 'Trusted')) {
$html = preg_replace_callback('#(<script[^>]*>)(\s*[^<].+?)(</script>)#si',
- array('HTMLPurifier_Lexer_DirectLex', 'scriptCallback'), $html);
+ array($this, 'scriptCallback'), $html);
}
$html = $this->normalize($html, $config, $context);
$e =& $context->get('ErrorCollector');
}
- // infinite loop protection
- // has to be pretty big, since html docs can be big
- // we're allow two hundred thousand tags... more than enough?
- // NOTE: this is also used for synchronization, so watch out
+ // for testing synchronization
$loops = 0;
- while(true) {
-
- // infinite loop protection
- if (++$loops > 200000) return array();
+ while(++$loops) {
// recalculate lines
if (
// Check if it's a comment
if (
- strncmp('!--', $segment, 3) === 0
+ substr($segment, 0, 3) === '!--'
) {
// re-determine segment length, looking for -->
$position_comment_end = strpos($html, '-->', $cursor);
}
$strlen_segment = $position_comment_end - $cursor;
$segment = substr($html, $cursor, $strlen_segment);
- $token = new HTMLPurifier_Token_Comment(substr($segment, 3));
+ $token = new
+ HTMLPurifier_Token_Comment(
+ substr(
+ $segment, 3, $strlen_segment - 3
+ )
+ );
if ($maintain_line_numbers) {
$token->line = $current_line;
$current_line += $this->substrCount($html, $nl, $cursor, $strlen_segment);
}
/**
- * PHP 4 compatible substr_count that implements offset and length
+ * PHP 5.0.x compatible substr_count that implements offset and length
*/
- function substrCount($haystack, $needle, $offset, $length) {
+ protected function substrCount($haystack, $needle, $offset, $length) {
static $oldVersion;
if ($oldVersion === null) {
$oldVersion = version_compare(PHP_VERSION, '5.1', '<');
* @param $string Inside of tag excluding name.
* @returns Assoc array of attributes.
*/
- function parseAttributeString($string, $config, &$context) {
+ public function parseAttributeString($string, $config, $context) {
$string = (string) $string; // quick typecast
if ($string == '') return array(); // no attributes
// space, so let's guarantee that there's always a terminating space.
$string .= ' ';
- // infinite loop protection
- $loops = 0;
while(true) {
- // infinite loop protection
- if (++$loops > 1000) {
- trigger_error('Infinite loop detected in attribute parsing', E_USER_WARNING);
- return array();
- }
-
if ($cursor >= $size) {
break;
}
<?php
-require_once 'XML/HTMLSax3.php'; // PEAR
-require_once 'HTMLPurifier/Lexer.php';
-
/**
* Proof-of-concept lexer that uses the PEAR package XML_HTMLSax3 to parse HTML.
*
/**
* Internal accumulator array for SAX parsers.
- * @protected
*/
- var $tokens = array();
+ protected $tokens = array();
- function tokenizeHTML($string, $config, &$context) {
+ public function tokenizeHTML($string, $config, $context) {
$this->tokens = array();
/**
* Open tag event handler, interface is defined by PEAR package.
*/
- function openHandler(&$parser, $name, $attrs, $closed) {
+ public function openHandler(&$parser, $name, $attrs, $closed) {
// entities are not resolved in attrs
foreach ($attrs as $key => $attr) {
$attrs[$key] = $this->parseData($attr);
/**
* Close tag event handler, interface is defined by PEAR package.
*/
- function closeHandler(&$parser, $name) {
+ public function closeHandler(&$parser, $name) {
// HTMLSax3 seems to always send empty tags an extra close tag
// check and ignore if you see it:
// [TESTME] to make sure it doesn't overreach
- if ($this->tokens[count($this->tokens)-1]->type == 'empty') {
+ if ($this->tokens[count($this->tokens)-1] instanceof HTMLPurifier_Token_Empty) {
return true;
}
$this->tokens[] = new HTMLPurifier_Token_End($name);
/**
* Data event handler, interface is defined by PEAR package.
*/
- function dataHandler(&$parser, $data) {
+ public function dataHandler(&$parser, $data) {
$this->tokens[] = new HTMLPurifier_Token_Text($data);
return true;
}
/**
* Escaped text handler, interface is defined by PEAR package.
*/
- function escapeHandler(&$parser, $data) {
+ public function escapeHandler(&$parser, $data) {
if (strpos($data, '--') === 0) {
$this->tokens[] = new HTMLPurifier_Token_Comment($data);
}
<?php\r
\r
-require_once 'HTMLPurifier/Lexer/DOMLex.php';\r
-\r
/**\r
* Experimental HTML5-based parser using Jeroen van der Meer's PH5P library.\r
- * Requires PHP5, and occupies space in the HTML5 pseudo-namespace (may\r
- * cause conflicts, sorry).\r
+ * Occupies space in the HTML5 pseudo-namespace, which may cause conflicts.\r
+ * \r
+ * @note\r
+ * Recent changes to PHP's DOM extension have resulted in some fatal\r
+ * error conditions with the original version of PH5P. Pending changes,\r
+ * this lexer will punt to DirectLex if DOM throughs an exception.\r
*/\r
\r
class HTMLPurifier_Lexer_PH5P extends HTMLPurifier_Lexer_DOMLex {\r
\r
- public function tokenizeHTML($html, $config, &$context) {\r
- $html = $this->normalize($html, $config, $context);\r
- $html = $this->wrapHTML( $html, $config, $context);\r
- $parser = new HTML5($html);\r
- $doc = $parser->save();\r
+ public function tokenizeHTML($html, $config, $context) {\r
+ $new_html = $this->normalize($html, $config, $context);\r
+ $new_html = $this->wrapHTML($new_html, $config, $context);\r
+ try {\r
+ $parser = new HTML5($new_html);\r
+ $doc = $parser->save();\r
+ } catch (DOMException $e) {\r
+ // Uh oh, it failed. Punt to DirectLex.\r
+ $lexer = new HTMLPurifier_Lexer_DirectLex();\r
+ $context->register('PH5PError', $e); // save the error, so we can detect it\r
+ return $lexer->tokenizeHTML($html, $config, $context); // use original HTML\r
+ }\r
$tokens = array();\r
$this->tokenizeDOM(\r
$doc->getElementsByTagName('html')->item(0)-> // <html>\r
\r
public function __construct($data) {\r
$data = str_replace("\r\n", "\n", $data);\r
- $date = str_replace("\r", null, $data);\r
+ $data = str_replace("\r", null, $data);\r
\r
$this->data = $data;\r
$this->char = -1;\r
/* Reconstruct the active formatting elements, if any. */\r
$this->reconstructActiveFormattingElements();\r
\r
- $this->insertElement($token);\r
+ $this->insertElement($token, true, true);\r
break;\r
}\r
break;\r
}\r
}\r
\r
- private function insertElement($token, $append = true) {\r
+ private function insertElement($token, $append = true, $check = false) {\r
+ // Proprietary workaround for libxml2's limitations with tag names\r
+ if ($check) {\r
+ // Slightly modified HTML5 tag-name modification,\r
+ // removing anything that's not an ASCII letter, digit, or hyphen\r
+ $token['name'] = preg_replace('/[^a-z0-9-]/i', '', $token['name']);\r
+ // Remove leading hyphens and numbers\r
+ $token['name'] = ltrim($token['name'], '-0..9');\r
+ // In theory, this should ever be needed, but just in case\r
+ if ($token['name'] === '') $token['name'] = 'span'; // arbitrary generic choice\r
+ }\r
+ \r
$el = $this->dom->createElement($token['name']);\r
\r
foreach($token['attr'] as $attr) {\r
/**
* Reserved characters to preserve when using encode().
*/
- var $preserve = array();
+ protected $preserve = array();
/**
* String of characters that should be preserved while using encode().
*/
- function HTMLPurifier_PercentEncoder($preserve = false) {
+ public function __construct($preserve = false) {
// unreserved letters, ought to const-ify
for ($i = 48; $i <= 57; $i++) $this->preserve[$i] = true; // digits
for ($i = 65; $i <= 90; $i++) $this->preserve[$i] = true; // upper-case
* @param $string String to be encoded
* @return Encoded string.
*/
- function encode($string) {
+ public function encode($string) {
$ret = '';
for ($i = 0, $c = strlen($string); $i < $c; $i++) {
if ($string[$i] !== '%' && !isset($this->preserve[$int = ord($string[$i])]) ) {
* characters. Be careful when reusing instances of PercentEncoder!
* @param $string String to normalize
*/
- function normalize($string) {
+ public function normalize($string) {
if ($string == '') return '';
$parts = explode('%', $string);
$ret = array_shift($parts);
<?php
-require_once 'HTMLPurifier/Generator.php';
-require_once 'HTMLPurifier/Token.php';
-require_once 'HTMLPurifier/Encoder.php';
-
// OUT OF DATE, NEEDS UPDATING!
+// USE XMLWRITER!
class HTMLPurifier_Printer
{
/**
* Instance of HTMLPurifier_Generator for HTML generation convenience funcs
*/
- var $generator;
+ protected $generator;
/**
* Instance of HTMLPurifier_Config, for easy access
*/
- var $config;
+ protected $config;
/**
* Initialize $generator.
*/
- function HTMLPurifier_Printer() {
+ public function __construct() {
$this->generator = new HTMLPurifier_Generator();
}
/**
* Give generator necessary configuration if possible
*/
- function prepareGenerator($config) {
+ public function prepareGenerator($config) {
// hack for smoketests/configForm.php
- if (empty($config->conf['HTML'])) return;
+ $all = $config->getAll();
+ if (empty($all['HTML'])) return;
$context = new HTMLPurifier_Context();
$this->generator->generateFromTokens(array(), $config, $context);
}
* @param $tag Tag name
* @param $attr Attribute array
*/
- function start($tag, $attr = array()) {
+ protected function start($tag, $attr = array()) {
return $this->generator->generateFromToken(
new HTMLPurifier_Token_Start($tag, $attr ? $attr : array())
);
* Returns an end teg
* @param $tag Tag name
*/
- function end($tag) {
+ protected function end($tag) {
return $this->generator->generateFromToken(
new HTMLPurifier_Token_End($tag)
);
* @param $attr Tag attributes
* @param $escape Bool whether or not to escape contents
*/
- function element($tag, $contents, $attr = array(), $escape = true) {
+ protected function element($tag, $contents, $attr = array(), $escape = true) {
return $this->start($tag, $attr) .
($escape ? $this->escape($contents) : $contents) .
$this->end($tag);
}
- function elementEmpty($tag, $attr = array()) {
+ protected function elementEmpty($tag, $attr = array()) {
return $this->generator->generateFromToken(
new HTMLPurifier_Token_Empty($tag, $attr)
);
}
- function text($text) {
+ protected function text($text) {
return $this->generator->generateFromToken(
new HTMLPurifier_Token_Text($text)
);
* @param $name Key
* @param $value Value
*/
- function row($name, $value) {
+ protected function row($name, $value) {
if (is_bool($value)) $value = $value ? 'On' : 'Off';
return
$this->start('tr') . "\n" .
* Escapes a string for HTML output.
* @param $string String to escape
*/
- function escape($string) {
+ protected function escape($string) {
$string = HTMLPurifier_Encoder::cleanUTF8($string);
$string = htmlspecialchars($string, ENT_COMPAT, 'UTF-8');
return $string;
* @param $array List of strings
* @param $polite Bool whether or not to add an end before the last
*/
- function listify($array, $polite = false) {
+ protected function listify($array, $polite = false) {
if (empty($array)) return 'None';
$ret = '';
$i = count($array);
* @param $obj Object to determine class of
* @param $prefix Further prefix to remove
*/
- function getClass($obj, $sec_prefix = '') {
+ protected function getClass($obj, $sec_prefix = '') {
static $five = null;
if ($five === null) $five = version_compare(PHP_VERSION, '5', '>=');
$prefix = 'HTMLPurifier_' . $sec_prefix;
}
$class .= implode(', ', $values);
break;
- case 'composite':
+ case 'css_composite':
$values = array();
foreach ($obj->defs as $def) {
$values[] = $this->getClass($def, $sec_prefix);
}
$class .= implode(', ', $values);
break;
- case 'multiple':
+ case 'css_multiple':
$class .= $this->getClass($obj->single, $sec_prefix) . ', ';
$class .= $obj->max;
break;
<?php
-require_once 'HTMLPurifier/Printer.php';
-
class HTMLPurifier_Printer_CSSDefinition extends HTMLPurifier_Printer
{
- var $def;
+ protected $def;
- function render($config) {
+ public function render($config) {
$this->def = $config->getCSSDefinition();
$ret = '';
<?php
-require_once 'HTMLPurifier/Printer.php';
-
class HTMLPurifier_Printer_ConfigForm extends HTMLPurifier_Printer
{
/**
* Printers for specific fields
- * @protected
*/
- var $fields = array();
+ protected $fields = array();
/**
* Documentation URL, can have fragment tagged on end
- * @protected
*/
- var $docURL;
+ protected $docURL;
/**
* Name of form element to stuff config in
- * @protected
*/
- var $name;
+ protected $name;
/**
* Whether or not to compress directive names, clipping them off
* after a certain amount of letters. False to disable or integer letters
* before clipping.
- * @protected
*/
- var $compress = false;
+ protected $compress = false;
/**
* @param $name Form element name for directives to be stuffed into
* @param $doc_url String documentation URL, will have fragment tagged on
* @param $compress Integer max length before compressing a directive name, set to false to turn off
*/
- function HTMLPurifier_Printer_ConfigForm(
+ public function __construct(
$name, $doc_url = null, $compress = false
) {
- parent::HTMLPurifier_Printer();
+ parent::__construct();
$this->docURL = $doc_url;
$this->name = $name;
$this->compress = $compress;
* @param $cols Integer columns of textarea, null to use default
* @param $rows Integer rows of textarea, null to use default
*/
- function setTextareaDimensions($cols = null, $rows = null) {
+ public function setTextareaDimensions($cols = null, $rows = null) {
if ($cols) $this->fields['default']->cols = $cols;
if ($rows) $this->fields['default']->rows = $rows;
}
/**
* Retrieves styling, in case it is not accessible by webserver
*/
- function getCSS() {
+ public static function getCSS() {
return file_get_contents(HTMLPURIFIER_PREFIX . '/HTMLPurifier/Printer/ConfigForm.css');
}
/**
* Retrieves JavaScript, in case it is not accessible by webserver
*/
- function getJavaScript() {
+ public static function getJavaScript() {
return file_get_contents(HTMLPURIFIER_PREFIX . '/HTMLPurifier/Printer/ConfigForm.js');
}
* @param $config Configuration object of current form state
* @param $allowed Optional namespace(s) and directives to restrict form to.
*/
- function render($config, $allowed = true, $render_controls = true) {
+ public function render($config, $allowed = true, $render_controls = true) {
$this->config = $config;
$this->prepareGenerator($config);
$ret .= $this->start('table', array('class' => 'hp-config'));
$ret .= $this->start('thead');
$ret .= $this->start('tr');
- $ret .= $this->element('th', 'Directive');
- $ret .= $this->element('th', 'Value');
+ $ret .= $this->element('th', 'Directive', array('class' => 'hp-directive'));
+ $ret .= $this->element('th', 'Value', array('class' => 'hp-value'));
$ret .= $this->end('tr');
$ret .= $this->end('thead');
foreach ($all as $ns => $directives) {
* Renders a single namespace
* @param $ns String namespace name
* @param $directive Associative array of directives to values
- * @protected
*/
- function renderNamespace($ns, $directives) {
+ protected function renderNamespace($ns, $directives) {
$ret = '';
$ret .= $this->start('tbody', array('class' => 'namespace'));
$ret .= $this->start('tr');
/**
* Printer being decorated
*/
- var $obj;
+ protected $obj;
/**
* @param $obj Printer to decorate
*/
- function HTMLPurifier_Printer_ConfigForm_NullDecorator($obj) {
- parent::HTMLPurifier_Printer();
+ public function __construct($obj) {
+ parent::__construct();
$this->obj = $obj;
}
- function render($ns, $directive, $value, $name, $config) {
+ public function render($ns, $directive, $value, $name, $config) {
$this->prepareGenerator($config);
$ret = '';
$ret .= $this->start('label', array('for' => "$name:Null_$ns.$directive"));
'id' => "$name:Null_$ns.$directive",
'onclick' => "toggleWriteability('$name:$ns.$directive',checked)" // INLINE JAVASCRIPT!!!!
);
+ if ($this->obj instanceof HTMLPurifier_Printer_ConfigForm_bool) {
+ // modify inline javascript slightly
+ $attr['onclick'] = "toggleWriteability('$name:Yes_$ns.$directive',checked);toggleWriteability('$name:No_$ns.$directive',checked)";
+ }
if ($value === null) $attr['checked'] = 'checked';
$ret .= $this->elementEmpty('input', $attr);
$ret .= $this->text(' or ');
* Swiss-army knife configuration form field printer
*/
class HTMLPurifier_Printer_ConfigForm_default extends HTMLPurifier_Printer {
- var $cols = 18;
- var $rows = 5;
- function render($ns, $directive, $value, $name, $config) {
+ public $cols = 18;
+ public $rows = 5;
+ public function render($ns, $directive, $value, $name, $config) {
$this->prepareGenerator($config);
// this should probably be split up a little
$ret = '';
* Bool form field printer
*/
class HTMLPurifier_Printer_ConfigForm_bool extends HTMLPurifier_Printer {
- function render($ns, $directive, $value, $name, $config) {
+ public function render($ns, $directive, $value, $name, $config) {
$this->prepareGenerator($config);
$ret = '';
$ret .= $this->start('div', array('id' => "$name:$ns.$directive"));
'id' => "$name:Yes_$ns.$directive",
'value' => '1'
);
- if ($value) $attr['checked'] = 'checked';
+ if ($value === true) $attr['checked'] = 'checked';
+ if ($value === null) $attr['disabled'] = 'disabled';
$ret .= $this->elementEmpty('input', $attr);
$ret .= $this->start('label', array('for' => "$name:No_$ns.$directive"));
'id' => "$name:No_$ns.$directive",
'value' => '0'
);
- if (!$value) $attr['checked'] = 'checked';
+ if ($value === false) $attr['checked'] = 'checked';
+ if ($value === null) $attr['disabled'] = 'disabled';
$ret .= $this->elementEmpty('input', $attr);
$ret .= $this->end('div');
<?php
-require_once 'HTMLPurifier/Printer.php';
-
class HTMLPurifier_Printer_HTMLDefinition extends HTMLPurifier_Printer
{
/**
* Instance of HTMLPurifier_HTMLDefinition, for easy access
*/
- var $def;
+ protected $def;
- function render($config) {
+ public function render($config) {
$ret = '';
$this->config =& $config;
/**
* Renders the Doctype table
*/
- function renderDoctype() {
+ protected function renderDoctype() {
$doctype = $this->def->doctype;
$ret = '';
$ret .= $this->start('table');
/**
* Renders environment table, which is miscellaneous info
*/
- function renderEnvironment() {
+ protected function renderEnvironment() {
$def = $this->def;
$ret = '';
/**
* Renders the Content Sets table
*/
- function renderContentSets() {
+ protected function renderContentSets() {
$ret = '';
$ret .= $this->start('table');
$ret .= $this->element('caption', 'Content Sets');
/**
* Renders the Elements ($info) table
*/
- function renderInfo() {
+ protected function renderInfo() {
$ret = '';
$ret .= $this->start('table');
$ret .= $this->element('caption', 'Elements ($info)');
$ret .= $this->end('tr');
foreach ($this->def->info as $name => $def) {
$ret .= $this->start('tr');
- $ret .= $this->element('th', "<$name>" . ($def->safe ? '' : ' (unsafe)'), array('class'=>'heavy' . ($def->safe ? '' : ' unsafe'), 'colspan' => 2));
+ $ret .= $this->element('th', "<$name>", array('class'=>'heavy', 'colspan' => 2));
$ret .= $this->end('tr');
$ret .= $this->start('tr');
$ret .= $this->element('th', 'Inline content');
* Renders a row describing the allowed children of an element
* @param $def HTMLPurifier_ChildDef of pertinent element
*/
- function renderChildren($def) {
+ protected function renderChildren($def) {
$context = new HTMLPurifier_Context();
$ret = '';
$ret .= $this->start('tr');
* Listifies a tag lookup table.
* @param $array Tag lookup array in form of array('tagname' => true)
*/
- function listifyTagLookup($array) {
+ protected function listifyTagLookup($array) {
ksort($array);
$list = array();
foreach ($array as $name => $discard) {
* @param $array List of objects
* @todo Also add information about internal state
*/
- function listifyObjectList($array) {
+ protected function listifyObjectList($array) {
ksort($array);
$list = array();
foreach ($array as $discard => $obj) {
* Listifies a hash of attributes to AttrDef classes
* @param $array Array hash in form of array('attrname' => HTMLPurifier_AttrDef)
*/
- function listifyAttr($array) {
+ protected function listifyAttr($array) {
ksort($array);
$list = array();
foreach ($array as $name => $obj) {
/**
* Creates a heavy header row
*/
- function heavyHeader($text, $num = 1) {
+ protected function heavyHeader($text, $num = 1) {
$ret = '';
$ret .= $this->start('tr');
$ret .= $this->element('th', $text, array('colspan' => $num, 'class' => 'heavy'));
* features, such as custom tags, custom parsing of text, etc.
*/
-HTMLPurifier_ConfigSchema::define(
- 'Core', 'EscapeInvalidTags', false, 'bool',
- 'When true, invalid tags will be written back to the document as plain '.
- 'text. Otherwise, they are silently dropped.'
-);
-class HTMLPurifier_Strategy
+abstract class HTMLPurifier_Strategy
{
/**
* @param $config Configuration options
* @returns Processed array of token objects.
*/
- function execute($tokens, $config, &$context) {
- trigger_error('Cannot call abstract function', E_USER_ERROR);
- }
+ abstract public function execute($tokens, $config, $context);
}
<?php
-require_once 'HTMLPurifier/Strategy.php';
-require_once 'HTMLPurifier/Config.php';
-
/**
* Composite strategy that runs multiple strategies on tokens.
*/
-class HTMLPurifier_Strategy_Composite extends HTMLPurifier_Strategy
+abstract class HTMLPurifier_Strategy_Composite extends HTMLPurifier_Strategy
{
/**
* List of strategies to run tokens through.
*/
- var $strategies = array();
+ protected $strategies = array();
- function HTMLPurifier_Strategy_Composite() {
- trigger_error('Attempt to instantiate abstract object', E_USER_ERROR);
- }
+ abstract public function __construct();
- function execute($tokens, $config, &$context) {
+ public function execute($tokens, $config, $context) {
foreach ($this->strategies as $strategy) {
$tokens = $strategy->execute($tokens, $config, $context);
}
<?php
-require_once 'HTMLPurifier/Strategy/Composite.php';
-
-require_once 'HTMLPurifier/Strategy/RemoveForeignElements.php';
-require_once 'HTMLPurifier/Strategy/MakeWellFormed.php';
-require_once 'HTMLPurifier/Strategy/FixNesting.php';
-require_once 'HTMLPurifier/Strategy/ValidateAttributes.php';
-
/**
* Core strategy composed of the big four strategies.
*/
class HTMLPurifier_Strategy_Core extends HTMLPurifier_Strategy_Composite
{
- function HTMLPurifier_Strategy_Core() {
+ public function __construct() {
$this->strategies[] = new HTMLPurifier_Strategy_RemoveForeignElements();
$this->strategies[] = new HTMLPurifier_Strategy_MakeWellFormed();
$this->strategies[] = new HTMLPurifier_Strategy_FixNesting();
<?php
-require_once 'HTMLPurifier/Strategy.php';
-require_once 'HTMLPurifier/HTMLDefinition.php';
-
/**
* Takes a well formed list of tokens and fixes their nesting.
*
class HTMLPurifier_Strategy_FixNesting extends HTMLPurifier_Strategy
{
- function execute($tokens, $config, &$context) {
+ public function execute($tokens, $config, $context) {
//####################################################################//
// Pre-processing
// scroll to the end of this node, report number, and collect
// all children
for ($j = $i, $depth = 0; ; $j++) {
- if ($tokens[$j]->type == 'start') {
+ if ($tokens[$j] instanceof HTMLPurifier_Token_Start) {
$depth++;
// skip token assignment on first iteration, this is the
// token we currently are on
if ($depth == 1) continue;
- } elseif ($tokens[$j]->type == 'end') {
+ } elseif ($tokens[$j] instanceof HTMLPurifier_Token_End) {
$depth--;
// skip token assignment on last iteration, this is the
// end token of the token we're currently on
// Test if the token indeed is a start tag, if not, move forward
// and test again.
$size = count($tokens);
- while ($i < $size and $tokens[$i]->type != 'start') {
- if ($tokens[$i]->type == 'end') {
+ while ($i < $size and !$tokens[$i] instanceof HTMLPurifier_Token_Start) {
+ if ($tokens[$i] instanceof HTMLPurifier_Token_End) {
// pop a token index off the stack if we ended a node
array_pop($stack);
// pop an exclusion lookup off exclusion stack if
<?php
-require_once 'HTMLPurifier/Strategy.php';
-require_once 'HTMLPurifier/HTMLDefinition.php';
-require_once 'HTMLPurifier/Generator.php';
-
-require_once 'HTMLPurifier/Injector/AutoParagraph.php';
-require_once 'HTMLPurifier/Injector/Linkify.php';
-require_once 'HTMLPurifier/Injector/PurifierLinkify.php';
-
-HTMLPurifier_ConfigSchema::define(
- 'AutoFormat', 'Custom', array(), 'list', '
-<p>
- This directive can be used to add custom auto-format injectors.
- Specify an array of injector names (class name minus the prefix)
- or concrete implementations. Injector class must exist. This directive
- has been available since 2.0.1.
-</p>
-'
-);
-
/**
* Takes tokens makes them well-formed (balance end tags, etc.)
*/
/**
* Locally shared variable references
- * @private
*/
- var $inputTokens, $inputIndex, $outputTokens, $currentNesting,
+ protected $inputTokens, $inputIndex, $outputTokens, $currentNesting,
$currentInjector, $injectors;
- function execute($tokens, $config, &$context) {
+ public function execute($tokens, $config, $context) {
$definition = $config->getHTMLDefinition();
// local variables
$result = array();
- $generator = new HTMLPurifier_Generator();
+ $generator = new HTMLPurifier_Generator($config, $context);
$escape_invalid_tags = $config->get('Core', 'EscapeInvalidTags');
- $e =& $context->get('ErrorCollector', true);
+ $e = $context->get('ErrorCollector', true);
// member variables
$this->currentNesting = array();
// context variables
$context->register('CurrentNesting', $this->currentNesting);
- $context->register('InputIndex', $this->inputIndex);
- $context->register('InputTokens', $tokens);
+ $context->register('InputIndex', $this->inputIndex);
+ $context->register('InputTokens', $tokens);
// -- begin INJECTOR --
// give the injectors references to the definition and context
// variables for performance reasons
- foreach ($this->injectors as $i => $x) {
- $error = $this->injectors[$i]->prepare($config, $context);
+ foreach ($this->injectors as $i => $injector) {
+ $error = $injector->prepare($config, $context);
if (!$error) continue;
- list($injector) = array_splice($this->injectors, $i, 1);
- $name = $injector->name;
- trigger_error("Cannot enable $name injector because $error is not allowed", E_USER_WARNING);
+ array_splice($this->injectors, $i, 1); // rm the injector
+ trigger_error("Cannot enable {$injector->name} injector because $error is not allowed", E_USER_WARNING);
}
- // warning: most foreach loops follow the convention $i => $x.
- // be sure, for PHP4 compatibility, to only perform write operations
- // directly referencing the object using $i: $x is only safe for reads
+ // warning: most foreach loops follow the convention $i => $injector.
+ // Don't define these as loop-wide variables, please!
// -- end INJECTOR --
$token = false;
$context->register('CurrentToken', $token);
+ // isset is in loop because $tokens size changes during loop exec
for ($this->inputIndex = 0; isset($tokens[$this->inputIndex]); $this->inputIndex++) {
// if all goes well, this token will be passed through unharmed
//printTokens($tokens, $this->inputIndex);
- foreach ($this->injectors as $i => $x) {
- if ($x->skip > 0) $this->injectors[$i]->skip--;
+ foreach ($this->injectors as $injector) {
+ if ($injector->skip > 0) $injector->skip--;
}
// quick-check: if it's not a tag, no need to process
if (empty( $token->is_tag )) {
- if ($token->type === 'text') {
+ if ($token instanceof HTMLPurifier_Token_Text) {
// injector handler code; duplicated for performance reasons
- foreach ($this->injectors as $i => $x) {
- if (!$x->skip) $this->injectors[$i]->handleText($token);
+ foreach ($this->injectors as $i => $injector) {
+ if (!$injector->skip) $injector->handleText($token);
if (is_array($token)) {
$this->currentInjector = $i;
break;
// quick tag checks: anything that's *not* an end tag
$ok = false;
- if ($info->type == 'empty' && $token->type == 'start') {
+ if ($info->type === 'empty' && $token instanceof HTMLPurifier_Token_Start) {
// test if it claims to be a start tag but is empty
$token = new HTMLPurifier_Token_Empty($token->name, $token->attr);
$ok = true;
- } elseif ($info->type != 'empty' && $token->type == 'empty' ) {
+ } elseif ($info->type !== 'empty' && $token instanceof HTMLPurifier_Token_Empty) {
// claims to be empty but really is a start tag
$token = array(
new HTMLPurifier_Token_Start($token->name, $token->attr),
new HTMLPurifier_Token_End($token->name)
);
$ok = true;
- } elseif ($token->type == 'empty') {
+ } elseif ($token instanceof HTMLPurifier_Token_Empty) {
// real empty token
$ok = true;
- } elseif ($token->type == 'start') {
+ } elseif ($token instanceof HTMLPurifier_Token_Start) {
// start tag
// ...unless they also have to close their parent
// injector handler code; duplicated for performance reasons
if ($ok) {
- foreach ($this->injectors as $i => $x) {
- if (!$x->skip) $this->injectors[$i]->handleElement($token);
+ foreach ($this->injectors as $i => $injector) {
+ if (!$injector->skip) $injector->handleElement($token);
if (is_array($token)) {
$this->currentInjector = $i;
break;
}
// sanity check: we should be dealing with a closing tag
- if ($token->type != 'end') continue;
+ if (!$token instanceof HTMLPurifier_Token_End) continue;
// make sure that we have something open
if (empty($this->currentNesting)) {
$current_parent = array_pop($this->currentNesting);
if ($current_parent->name == $token->name) {
$result[] = $token;
- foreach ($this->injectors as $i => $x) {
- $this->injectors[$i]->notifyEnd($token);
+ foreach ($this->injectors as $i => $injector) {
+ $injector->notifyEnd($token);
}
continue;
}
// okay, we found it, close all the skipped tags
// note that skipped tags contains the element we need closed
for ($i = count($skipped_tags) - 1; $i >= 0; $i--) {
+ // please don't redefine $i!
if ($i && $e && !isset($skipped_tags[$i]->armor['MakeWellFormed_TagClosedError'])) {
$e->send(E_NOTICE, 'Strategy_MakeWellFormed: Tag closed by element end', $skipped_tags[$i]);
}
$result[] = $new_token = new HTMLPurifier_Token_End($skipped_tags[$i]->name);
- foreach ($this->injectors as $j => $x) { // $j, not $i!!!
- $this->injectors[$j]->notifyEnd($new_token);
+ foreach ($this->injectors as $injector) {
+ $injector->notifyEnd($new_token);
}
}
// not using $skipped_tags since it would invariably be all of them
if (!empty($this->currentNesting)) {
for ($i = count($this->currentNesting) - 1; $i >= 0; $i--) {
+ // please don't redefine $i!
if ($e && !isset($this->currentNesting[$i]->armor['MakeWellFormed_TagClosedError'])) {
$e->send(E_NOTICE, 'Strategy_MakeWellFormed: Tag closed by document end', $this->currentNesting[$i]);
}
$result[] = $new_token = new HTMLPurifier_Token_End($this->currentNesting[$i]->name);
- foreach ($this->injectors as $j => $x) { // $j, not $i!!!
- $this->injectors[$j]->notifyEnd($new_token);
+ foreach ($this->injectors as $injector) {
+ $injector->notifyEnd($new_token);
}
}
}
return $result;
}
- function processToken($token, $config, &$context) {
+ function processToken($token, $config, $context) {
if (is_array($token)) {
// the original token was overloaded by an injector, time
// to some fancy acrobatics
} elseif ($token) {
// regular case
$this->outputTokens[] = $token;
- if ($token->type == 'start') {
+ if ($token instanceof HTMLPurifier_Token_Start) {
$this->currentNesting[] = $token;
- } elseif ($token->type == 'end') {
+ } elseif ($token instanceof HTMLPurifier_Token_End) {
array_pop($this->currentNesting); // not actually used
}
}
<?php
-require_once 'HTMLPurifier/Strategy.php';
-require_once 'HTMLPurifier/HTMLDefinition.php';
-require_once 'HTMLPurifier/Generator.php';
-require_once 'HTMLPurifier/TagTransform.php';
-
-require_once 'HTMLPurifier/AttrValidator.php';
-
-HTMLPurifier_ConfigSchema::define(
- 'Core', 'RemoveInvalidImg', true, 'bool', '
-<p>
- This directive enables pre-emptive URI checking in <code>img</code>
- tags, as the attribute validation strategy is not authorized to
- remove elements from the document. This directive has been available
- since 1.3.0, revert to pre-1.3.0 behavior by setting to false.
-</p>
-'
-);
-
-HTMLPurifier_ConfigSchema::define(
- 'Core', 'RemoveScriptContents', null, 'bool/null', '
-<p>
- This directive enables HTML Purifier to remove not only script tags
- but all of their contents. This directive has been deprecated since 2.1.0,
- and when not set the value of %Core.HiddenElements will take
- precedence. This directive has been available since 2.0.0, and can be used to
- revert to pre-2.0.0 behavior by setting it to false.
-</p>
-'
-);
-
-HTMLPurifier_ConfigSchema::define(
- 'Core', 'HiddenElements', array('script' => true, 'style' => true), 'lookup', '
-<p>
- This directive is a lookup array of elements which should have their
- contents removed when they are not allowed by the HTML definition.
- For example, the contents of a <code>script</code> tag are not
- normally shown in a document, so if script tags are to be removed,
- their contents should be removed to. This is opposed to a <code>b</code>
- tag, which defines some presentational changes but does not hide its
- contents.
-</p>
-'
-);
-
/**
* Removes all unrecognized tags from the list of tokens.
*
class HTMLPurifier_Strategy_RemoveForeignElements extends HTMLPurifier_Strategy
{
- function execute($tokens, $config, &$context) {
+ public function execute($tokens, $config, $context) {
$definition = $config->getHTMLDefinition();
- $generator = new HTMLPurifier_Generator();
+ $generator = new HTMLPurifier_Generator($config, $context);
$result = array();
$escape_invalid_tags = $config->get('Core', 'EscapeInvalidTags');
// mostly everything's good, but
// we need to make sure required attributes are in order
if (
- ($token->type === 'start' || $token->type === 'empty') &&
+ ($token instanceof HTMLPurifier_Token_Start || $token instanceof HTMLPurifier_Token_Empty) &&
$definition->info[$token->name]->required_attr &&
($token->name != 'img' || $remove_invalid_img) // ensure config option still works
) {
$token->armor['ValidateAttributes'] = true;
}
- if (isset($hidden_elements[$token->name]) && $token->type == 'start') {
+ if (isset($hidden_elements[$token->name]) && $token instanceof HTMLPurifier_Token_Start) {
$textify_comments = $token->name;
- } elseif ($token->name === $textify_comments && $token->type == 'end') {
+ } elseif ($token->name === $textify_comments && $token instanceof HTMLPurifier_Token_End) {
$textify_comments = false;
}
// check if we need to destroy all of the tag's children
// CAN BE GENERICIZED
if (isset($hidden_elements[$token->name])) {
- if ($token->type == 'start') {
+ if ($token instanceof HTMLPurifier_Token_Start) {
$remove_until = $token->name;
- } elseif ($token->type == 'empty') {
+ } elseif ($token instanceof HTMLPurifier_Token_Empty) {
// do nothing: we're still looking
} else {
$remove_until = false;
}
continue;
}
- } elseif ($token->type == 'comment') {
+ } elseif ($token instanceof HTMLPurifier_Token_Comment) {
// textify comments in script tags when they are allowed
if ($textify_comments !== false) {
$data = $token->data;
if ($e) $e->send(E_NOTICE, 'Strategy_RemoveForeignElements: Comment removed');
continue;
}
- } elseif ($token->type == 'text') {
+ } elseif ($token instanceof HTMLPurifier_Token_Text) {
} else {
continue;
}
<?php
-require_once 'HTMLPurifier/Strategy.php';
-require_once 'HTMLPurifier/HTMLDefinition.php';
-require_once 'HTMLPurifier/IDAccumulator.php';
-
-require_once 'HTMLPurifier/AttrValidator.php';
-
/**
* Validate all attributes in the tokens.
*/
class HTMLPurifier_Strategy_ValidateAttributes extends HTMLPurifier_Strategy
{
- function execute($tokens, $config, &$context) {
+ public function execute($tokens, $config, $context) {
// setup validator
$validator = new HTMLPurifier_AttrValidator();
// only process tokens that have attributes,
// namely start and empty tags
- if ($token->type !== 'start' && $token->type !== 'empty') continue;
+ if (!$token instanceof HTMLPurifier_Token_Start && !$token instanceof HTMLPurifier_Token_Empty) continue;
// skip tokens that are armored
if (!empty($token->armor['ValidateAttributes'])) continue;
--- /dev/null
+<?php
+
+/**
+ * This is in almost every respect equivalent to an array except
+ * that it keeps track of which keys were accessed.
+ *
+ * @warning For the sake of backwards compatibility with early versions
+ * of PHP 5, you must not use the $hash[$key] syntax; if you do
+ * our version of offsetGet is never called.
+ */
+class HTMLPurifier_StringHash extends ArrayObject
+{
+ protected $accessed = array();
+
+ /**
+ * Retrieves a value, and logs the access.
+ */
+ public function offsetGet($index) {
+ $this->accessed[$index] = true;
+ return parent::offsetGet($index);
+ }
+
+ /**
+ * Returns a lookup array of all array indexes that have been accessed.
+ * @return Array in form array($index => true).
+ */
+ public function getAccessed() {
+ return $this->accessed;
+ }
+
+ /**
+ * Resets the access array.
+ */
+ public function resetAccessed() {
+ $this->accessed = array();
+ }
+}
--- /dev/null
+<?php
+
+/**
+ * Parses string hash files. File format is as such:
+ *
+ * DefaultKeyValue
+ * KEY: Value
+ * KEY2: Value2
+ * --MULTILINE-KEY--
+ * Multiline
+ * value.
+ *
+ * Which would output something similar to:
+ *
+ * array(
+ * 'ID' => 'DefaultKeyValue',
+ * 'KEY' => 'Value',
+ * 'KEY2' => 'Value2',
+ * 'MULTILINE-KEY' => "Multiline\nvalue.\n",
+ * )
+ *
+ * We use this as an easy to use file-format for configuration schema
+ * files, but the class itself is usage agnostic.
+ *
+ * You can use ---- to forcibly terminate parsing of a single string-hash;
+ * this marker is used in multi string-hashes to delimit boundaries.
+ */
+class HTMLPurifier_StringHashParser
+{
+
+ public $default = 'ID';
+
+ /**
+ * Parses a file that contains a single string-hash.
+ */
+ public function parseFile($file) {
+ if (!file_exists($file)) return false;
+ $fh = fopen($file, 'r');
+ if (!$fh) return false;
+ $ret = $this->parseHandle($fh);
+ fclose($fh);
+ return $ret;
+ }
+
+ /**
+ * Parses a file that contains multiple string-hashes delimited by '----'
+ */
+ public function parseMultiFile($file) {
+ if (!file_exists($file)) return false;
+ $ret = array();
+ $fh = fopen($file, 'r');
+ if (!$fh) return false;
+ while (!feof($fh)) {
+ $ret[] = $this->parseHandle($fh);
+ }
+ fclose($fh);
+ return $ret;
+ }
+
+ /**
+ * Internal parser that acepts a file handle.
+ * @note While it's possible to simulate in-memory parsing by using
+ * custom stream wrappers, if such a use-case arises we should
+ * factor out the file handle into its own class.
+ * @param $fh File handle with pointer at start of valid string-hash
+ * block.
+ */
+ protected function parseHandle($fh) {
+ $state = false;
+ $single = false;
+ $ret = array();
+ do {
+ $line = fgets($fh);
+ if ($line === false) break;
+ $line = rtrim($line, "\n\r");
+ if (!$state && $line === '') continue;
+ if ($line === '----') break;
+ if (strncmp('--', $line, 2) === 0) {
+ // Multiline declaration
+ $state = trim($line, '- ');
+ continue;
+ } elseif (!$state) {
+ $single = true;
+ if (strpos($line, ':') !== false) {
+ // Single-line declaration
+ list($state, $line) = explode(': ', $line, 2);
+ } else {
+ // Use default declaration
+ $state = $this->default;
+ }
+ }
+ if ($single) {
+ $ret[$state] = $line;
+ $single = false;
+ $state = false;
+ } else {
+ if (!isset($ret[$state])) $ret[$state] = '';
+ $ret[$state] .= "$line\n";
+ }
+ } while (!feof($fh));
+ return $ret;
+ }
+
+}
<?php
-require_once 'HTMLPurifier/Token.php';
-
/**
* Defines a mutation of an obsolete tag into a valid tag.
*/
-class HTMLPurifier_TagTransform
+abstract class HTMLPurifier_TagTransform
{
/**
* Tag name to transform the tag to.
- * @public
*/
- var $transform_to;
+ public $transform_to;
/**
* Transforms the obsolete tag into the valid tag.
* @param $config Mandatory HTMLPurifier_Config object
* @param $context Mandatory HTMLPurifier_Context object
*/
- function transform($tag, $config, &$context) {
- trigger_error('Call to abstract function', E_USER_ERROR);
- }
+ abstract public function transform($tag, $config, $context);
/**
* Prepends CSS properties to the style attribute, creating the
* @param $attr Attribute array to process (passed by reference)
* @param $css CSS to prepend
*/
- function prependCSS(&$attr, $css) {
+ protected function prependCSS(&$attr, $css) {
$attr['style'] = isset($attr['style']) ? $attr['style'] : '';
$attr['style'] = $css . $attr['style'];
}
<?php
-require_once 'HTMLPurifier/TagTransform.php';
-
/**
* Transforms FONT tags to the proper form (SPAN with CSS styling)
*
class HTMLPurifier_TagTransform_Font extends HTMLPurifier_TagTransform
{
- var $transform_to = 'span';
+ public $transform_to = 'span';
- var $_size_lookup = array(
+ protected $_size_lookup = array(
'0' => 'xx-small',
'1' => 'xx-small',
'2' => 'small',
'+4' => '300%'
);
- function transform($tag, $config, &$context) {
+ public function transform($tag, $config, $context) {
- if ($tag->type == 'end') {
- $new_tag = $tag->copy();
+ if ($tag instanceof HTMLPurifier_Token_End) {
+ $new_tag = clone $tag;
$new_tag->name = $this->transform_to;
return $new_tag;
}
$prepend_style;
}
- $new_tag = $tag->copy();
+ $new_tag = clone $tag;
$new_tag->name = $this->transform_to;
$new_tag->attr = $attr;
<?php
-require_once 'HTMLPurifier/TagTransform.php';
-
/**
* Simple transformation, just change tag name to something else,
* and possibly add some styling. This will cover most of the deprecated
class HTMLPurifier_TagTransform_Simple extends HTMLPurifier_TagTransform
{
- var $style;
+ protected $style;
/**
* @param $transform_to Tag name to transform to.
* @param $style CSS style to add to the tag
*/
- function HTMLPurifier_TagTransform_Simple($transform_to, $style = null) {
+ public function __construct($transform_to, $style = null) {
$this->transform_to = $transform_to;
$this->style = $style;
}
- function transform($tag, $config, &$context) {
- $new_tag = $tag->copy();
+ public function transform($tag, $config, $context) {
+ $new_tag = clone $tag;
$new_tag->name = $this->transform_to;
if (!is_null($this->style) &&
- ($new_tag->type == 'start' || $new_tag->type == 'empty')
+ ($new_tag instanceof HTMLPurifier_Token_Start || $new_tag instanceof HTMLPurifier_Token_Empty)
) {
$this->prependCSS($new_tag->attr, $this->style);
}
<?php
-/**
- * Defines a set of immutable value object tokens for HTML representation.
- *
- * @file
- */
-
/**
* Abstract base token class that all others inherit from.
*/
class HTMLPurifier_Token {
- var $type; /**< Type of node to bypass <tt>is_a()</tt>. @public */
- var $line; /**< Line number node was on in source document. Null if unknown. @public */
+ public $type; /**< Type of node to bypass <tt>is_a()</tt>. */
+ public $line; /**< Line number node was on in source document. Null if unknown. */
/**
* Lookup array of processing that this token is exempt from.
* Currently, valid values are "ValidateAttributes" and
* "MakeWellFormed_TagClosedError"
*/
- var $armor = array();
-
- /**
- * Copies the tag into a new one (clone substitute).
- * @return Copied token
- */
- function copy() {
- return unserialize(serialize($this));
- }
-}
-
-/**
- * Abstract class of a tag token (start, end or empty), and its behavior.
- */
-class HTMLPurifier_Token_Tag extends HTMLPurifier_Token // abstract
-{
- /**
- * Static bool marker that indicates the class is a tag.
- *
- * This allows us to check objects with <tt>!empty($obj->is_tag)</tt>
- * without having to use a function call <tt>is_a()</tt>.
- *
- * @public
- */
- var $is_tag = true;
-
- /**
- * The lower-case name of the tag, like 'a', 'b' or 'blockquote'.
- *
- * @note Strictly speaking, XML tags are case sensitive, so we shouldn't
- * be lower-casing them, but these tokens cater to HTML tags, which are
- * insensitive.
- *
- * @public
- */
- var $name;
-
- /**
- * Associative array of the tag's attributes.
- */
- var $attr = array();
-
- /**
- * Non-overloaded constructor, which lower-cases passed tag name.
- *
- * @param $name String name.
- * @param $attr Associative array of attributes.
- */
- function HTMLPurifier_Token_Tag($name, $attr = array(), $line = null) {
- $this->name = ctype_lower($name) ? $name : strtolower($name);
- foreach ($attr as $key => $value) {
- // normalization only necessary when key is not lowercase
- if (!ctype_lower($key)) {
- $new_key = strtolower($key);
- if (!isset($attr[$new_key])) {
- $attr[$new_key] = $attr[$key];
- }
- if ($new_key !== $key) {
- unset($attr[$key]);
- }
- }
+ public $armor = array();
+
+ public function __get($n) {
+ if ($n === 'type') {
+ trigger_error('Deprecated type property called; use instanceof', E_USER_NOTICE);
+ switch (get_class($this)) {
+ case 'HTMLPurifier_Token_Start': return 'start';
+ case 'HTMLPurifier_Token_Empty': return 'empty';
+ case 'HTMLPurifier_Token_End': return 'end';
+ case 'HTMLPurifier_Token_Text': return 'text';
+ case 'HTMLPurifier_Token_Comment': return 'comment';
+ default: return null;
}
- $this->attr = $attr;
- $this->line = $line;
- }
-}
-
-/**
- * Concrete start token class.
- */
-class HTMLPurifier_Token_Start extends HTMLPurifier_Token_Tag
-{
- var $type = 'start';
-}
-
-/**
- * Concrete empty token class.
- */
-class HTMLPurifier_Token_Empty extends HTMLPurifier_Token_Tag
-{
- var $type = 'empty';
-}
-
-/**
- * Concrete end token class.
- *
- * @warning This class accepts attributes even though end tags cannot. This
- * is for optimization reasons, as under normal circumstances, the Lexers
- * do not pass attributes.
- */
-class HTMLPurifier_Token_End extends HTMLPurifier_Token_Tag
-{
- var $type = 'end';
-}
-
-/**
- * Concrete text token class.
- *
- * Text tokens comprise of regular parsed character data (PCDATA) and raw
- * character data (from the CDATA sections). Internally, their
- * data is parsed with all entities expanded. Surprisingly, the text token
- * does have a "tag name" called #PCDATA, which is how the DTD represents it
- * in permissible child nodes.
- */
-class HTMLPurifier_Token_Text extends HTMLPurifier_Token
-{
-
- var $name = '#PCDATA'; /**< PCDATA tag name compatible with DTD. @public */
- var $type = 'text';
- var $data; /**< Parsed character data of text. @public */
- var $is_whitespace; /**< Bool indicating if node is whitespace. @public */
-
- /**
- * Constructor, accepts data and determines if it is whitespace.
- *
- * @param $data String parsed character data.
- */
- function HTMLPurifier_Token_Text($data, $line = null) {
- $this->data = $data;
- $this->is_whitespace = ctype_space($data);
- $this->line = $line;
+ }
}
-
}
-
-/**
- * Concrete comment token class. Generally will be ignored.
- */
-class HTMLPurifier_Token_Comment extends HTMLPurifier_Token
-{
- var $data; /**< Character data within comment. @public */
- var $type = 'comment';
- /**
- * Transparent constructor.
- *
- * @param $data String comment data.
- */
- function HTMLPurifier_Token_Comment($data, $line = null) {
- $this->data = $data;
- $this->line = $line;
- }
-}
-
--- /dev/null
+<?php
+
+/**
+ * Concrete comment token class. Generally will be ignored.
+ */
+class HTMLPurifier_Token_Comment extends HTMLPurifier_Token
+{
+ public $data; /**< Character data within comment. */
+ /**
+ * Transparent constructor.
+ *
+ * @param $data String comment data.
+ */
+ public function __construct($data, $line = null) {
+ $this->data = $data;
+ $this->line = $line;
+ }
+}
+
--- /dev/null
+<?php
+
+/**
+ * Concrete empty token class.
+ */
+class HTMLPurifier_Token_Empty extends HTMLPurifier_Token_Tag
+{
+
+}
--- /dev/null
+<?php
+
+/**
+ * Concrete end token class.
+ *
+ * @warning This class accepts attributes even though end tags cannot. This
+ * is for optimization reasons, as under normal circumstances, the Lexers
+ * do not pass attributes.
+ */
+class HTMLPurifier_Token_End extends HTMLPurifier_Token_Tag
+{
+
+}
--- /dev/null
+<?php
+
+/**
+ * Concrete start token class.
+ */
+class HTMLPurifier_Token_Start extends HTMLPurifier_Token_Tag
+{
+
+}
--- /dev/null
+<?php
+
+/**
+ * Abstract class of a tag token (start, end or empty), and its behavior.
+ */
+class HTMLPurifier_Token_Tag extends HTMLPurifier_Token
+{
+ /**
+ * Static bool marker that indicates the class is a tag.
+ *
+ * This allows us to check objects with <tt>!empty($obj->is_tag)</tt>
+ * without having to use a function call <tt>is_a()</tt>.
+ */
+ public $is_tag = true;
+
+ /**
+ * The lower-case name of the tag, like 'a', 'b' or 'blockquote'.
+ *
+ * @note Strictly speaking, XML tags are case sensitive, so we shouldn't
+ * be lower-casing them, but these tokens cater to HTML tags, which are
+ * insensitive.
+ */
+ public $name;
+
+ /**
+ * Associative array of the tag's attributes.
+ */
+ public $attr = array();
+
+ /**
+ * Non-overloaded constructor, which lower-cases passed tag name.
+ *
+ * @param $name String name.
+ * @param $attr Associative array of attributes.
+ */
+ public function __construct($name, $attr = array(), $line = null) {
+ $this->name = ctype_lower($name) ? $name : strtolower($name);
+ foreach ($attr as $key => $value) {
+ // normalization only necessary when key is not lowercase
+ if (!ctype_lower($key)) {
+ $new_key = strtolower($key);
+ if (!isset($attr[$new_key])) {
+ $attr[$new_key] = $attr[$key];
+ }
+ if ($new_key !== $key) {
+ unset($attr[$key]);
+ }
+ }
+ }
+ $this->attr = $attr;
+ $this->line = $line;
+ }
+}
--- /dev/null
+<?php
+
+/**
+ * Concrete text token class.
+ *
+ * Text tokens comprise of regular parsed character data (PCDATA) and raw
+ * character data (from the CDATA sections). Internally, their
+ * data is parsed with all entities expanded. Surprisingly, the text token
+ * does have a "tag name" called #PCDATA, which is how the DTD represents it
+ * in permissible child nodes.
+ */
+class HTMLPurifier_Token_Text extends HTMLPurifier_Token
+{
+
+ public $name = '#PCDATA'; /**< PCDATA tag name compatible with DTD. */
+ public $data; /**< Parsed character data of text. */
+ public $is_whitespace; /**< Bool indicating if node is whitespace. */
+
+ /**
+ * Constructor, accepts data and determines if it is whitespace.
+ *
+ * @param $data String parsed character data.
+ */
+ public function __construct($data, $line = null) {
+ $this->data = $data;
+ $this->is_whitespace = ctype_space($data);
+ $this->line = $line;
+ }
+
+}
<?php
-require_once 'HTMLPurifier/Token.php';
-
/**
- * Factory for token generation (PHP 5 only).
+ * Factory for token generation.
*
* @note Doing some benchmarking indicates that the new operator is much
* slower than the clone operator (even discounting the cost of the
- * constructor). This class is for that optimization. We may want to
- * consider porting this to PHP 4 by virtue of the fact it makes the code
- * easier to read. Other then that, there's not much point as we don't
+ * constructor). This class is for that optimization.
+ * Other then that, there's not much point as we don't
* maintain parallel HTMLPurifier_Token hierarchies (the main reason why
* you'd want to use an abstract factory).
+ * @todo Port DirectLex to use this
*/
class HTMLPurifier_TokenFactory
{
*/
public function createStart($name, $attr = array()) {
$p = clone $this->p_start;
- $p->HTMLPurifier_Token_Tag($name, $attr);
+ $p->__construct($name, $attr);
return $p;
}
*/
public function createEnd($name) {
$p = clone $this->p_end;
- $p->HTMLPurifier_Token_Tag($name);
+ $p->__construct($name);
return $p;
}
*/
public function createEmpty($name, $attr = array()) {
$p = clone $this->p_empty;
- $p->HTMLPurifier_Token_Tag($name, $attr);
+ $p->__construct($name, $attr);
return $p;
}
*/
public function createText($data) {
$p = clone $this->p_text;
- $p->HTMLPurifier_Token_Text($data);
+ $p->__construct($data);
return $p;
}
*/
public function createComment($data) {
$p = clone $this->p_comment;
- $p->HTMLPurifier_Token_Comment($data);
+ $p->__construct($data);
return $p;
}
<?php
-require_once 'HTMLPurifier/URIParser.php';
-require_once 'HTMLPurifier/URIFilter.php';
-
/**
* HTML Purifier's internal representation of a URI.
* @note
class HTMLPurifier_URI
{
- var $scheme, $userinfo, $host, $port, $path, $query, $fragment;
+ public $scheme, $userinfo, $host, $port, $path, $query, $fragment;
/**
* @note Automatically normalizes scheme and port
*/
- function HTMLPurifier_URI($scheme, $userinfo, $host, $port, $path, $query, $fragment) {
+ public function __construct($scheme, $userinfo, $host, $port, $path, $query, $fragment) {
$this->scheme = is_null($scheme) || ctype_lower($scheme) ? $scheme : strtolower($scheme);
$this->userinfo = $userinfo;
$this->host = $host;
* @param $context Instance of HTMLPurifier_Context
* @return Scheme object appropriate for validating this URI
*/
- function getSchemeObj($config, &$context) {
- $registry =& HTMLPurifier_URISchemeRegistry::instance();
+ public function getSchemeObj($config, $context) {
+ $registry = HTMLPurifier_URISchemeRegistry::instance();
if ($this->scheme !== null) {
$scheme_obj = $registry->getScheme($this->scheme, $config, $context);
if (!$scheme_obj) return false; // invalid scheme, clean it out
* @param $context Instance of HTMLPurifier_Context
* @return True if validation/filtering succeeds, false if failure
*/
- function validate($config, &$context) {
+ public function validate($config, $context) {
// ABNF definitions from RFC 3986
$chars_sub_delims = '!$&\'()*+,;=';
* Convert URI back to string
* @return String URI appropriate for output
*/
- function toString() {
+ public function toString() {
// reconstruct authority
$authority = null;
if (!is_null($this->host)) {
return $result;
}
- /**
- * Returns a copy of the URI object
- */
- function copy() {
- return unserialize(serialize($this));
- }
-
}
<?php
-require_once 'HTMLPurifier/Definition.php';
-require_once 'HTMLPurifier/URIFilter.php';
-require_once 'HTMLPurifier/URIParser.php';
-
-require_once 'HTMLPurifier/URIFilter/DisableExternal.php';
-require_once 'HTMLPurifier/URIFilter/DisableExternalResources.php';
-require_once 'HTMLPurifier/URIFilter/HostBlacklist.php';
-require_once 'HTMLPurifier/URIFilter/MakeAbsolute.php';
-
-HTMLPurifier_ConfigSchema::define(
- 'URI', 'DefinitionID', null, 'string/null', '
-<p>
- Unique identifier for a custom-built URI definition. If you want
- to add custom URIFilters, you must specify this value.
- This directive has been available since 2.1.0.
-</p>
-');
-
-HTMLPurifier_ConfigSchema::define(
- 'URI', 'DefinitionRev', 1, 'int', '
-<p>
- Revision identifier for your custom definition. See
- %HTML.DefinitionRev for details. This directive has been available
- since 2.1.0.
-</p>
-');
-
-// informative URI directives
-
-HTMLPurifier_ConfigSchema::define(
- 'URI', 'DefaultScheme', 'http', 'string', '
-<p>
- Defines through what scheme the output will be served, in order to
- select the proper object validator when no scheme information is present.
-</p>
-');
-
-HTMLPurifier_ConfigSchema::define(
- 'URI', 'Host', null, 'string/null', '
-<p>
- Defines the domain name of the server, so we can determine whether or
- an absolute URI is from your website or not. Not strictly necessary,
- as users should be using relative URIs to reference resources on your
- website. It will, however, let you use absolute URIs to link to
- subdomains of the domain you post here: i.e. example.com will allow
- sub.example.com. However, higher up domains will still be excluded:
- if you set %URI.Host to sub.example.com, example.com will be blocked.
- <strong>Note:</strong> This directive overrides %URI.Base because
- a given page may be on a sub-domain, but you wish HTML Purifier to be
- more relaxed and allow some of the parent domains too.
- This directive has been available since 1.2.0.
-</p>
-');
-
-HTMLPurifier_ConfigSchema::define(
- 'URI', 'Base', null, 'string/null', '
-<p>
- The base URI is the URI of the document this purified HTML will be
- inserted into. This information is important if HTML Purifier needs
- to calculate absolute URIs from relative URIs, such as when %URI.MakeAbsolute
- is on. You may use a non-absolute URI for this value, but behavior
- may vary (%URI.MakeAbsolute deals nicely with both absolute and
- relative paths, but forwards-compatibility is not guaranteed).
- <strong>Warning:</strong> If set, the scheme on this URI
- overrides the one specified by %URI.DefaultScheme. This directive has
- been available since 2.1.0.
-</p>
-');
-
class HTMLPurifier_URIDefinition extends HTMLPurifier_Definition
{
- var $type = 'URI';
- var $filters = array();
- var $registeredFilters = array();
+ public $type = 'URI';
+ protected $filters = array();
+ protected $registeredFilters = array();
/**
* HTMLPurifier_URI object of the base specified at %URI.Base
*/
- var $base;
+ public $base;
/**
- * String host to consider "home" base
+ * String host to consider "home" base, derived off of $base
*/
- var $host;
+ public $host;
/**
* Name of default scheme based on %URI.DefaultScheme and %URI.Base
*/
- var $defaultScheme;
+ public $defaultScheme;
- function HTMLPurifier_URIDefinition() {
+ public function __construct() {
$this->registerFilter(new HTMLPurifier_URIFilter_DisableExternal());
$this->registerFilter(new HTMLPurifier_URIFilter_DisableExternalResources());
$this->registerFilter(new HTMLPurifier_URIFilter_HostBlacklist());
$this->registerFilter(new HTMLPurifier_URIFilter_MakeAbsolute());
}
- function registerFilter($filter) {
+ public function registerFilter($filter) {
$this->registeredFilters[$filter->name] = $filter;
}
- function addFilter($filter, $config) {
+ public function addFilter($filter, $config) {
$filter->prepare($config);
$this->filters[$filter->name] = $filter;
}
- function doSetup($config) {
+ protected function doSetup($config) {
$this->setupMemberVariables($config);
$this->setupFilters($config);
}
- function setupFilters($config) {
+ protected function setupFilters($config) {
foreach ($this->registeredFilters as $name => $filter) {
$conf = $config->get('URI', $name);
if ($conf !== false && $conf !== null) {
unset($this->registeredFilters);
}
- function setupMemberVariables($config) {
+ protected function setupMemberVariables($config) {
$this->host = $config->get('URI', 'Host');
$base_uri = $config->get('URI', 'Base');
if (!is_null($base_uri)) {
if (is_null($this->defaultScheme)) $this->defaultScheme = $config->get('URI', 'DefaultScheme');
}
- function filter(&$uri, $config, &$context) {
+ public function filter(&$uri, $config, $context) {
foreach ($this->filters as $name => $x) {
$result = $this->filters[$name]->filter($uri, $config, $context);
if (!$result) return false;
* you check that it exists. This allows filters to convert
* proprietary URI schemes into regular ones.
*/
-class HTMLPurifier_URIFilter
+abstract class HTMLPurifier_URIFilter
{
/**
* Unique identifier of filter
*/
- var $name;
+ public $name;
/**
* Performs initialization for the filter
*/
- function prepare($config) {}
+ public function prepare($config) {}
/**
* Filter a URI object
- * @param &$uri Reference to URI object
+ * @param $uri Reference to URI object variable
* @param $config Instance of HTMLPurifier_Config
- * @param &$context Instance of HTMLPurifier_Context
+ * @param $context Instance of HTMLPurifier_Context
* @return bool Whether or not to continue processing: false indicates
* URL is no good, true indicates continue processing. Note that
* all changes are committed directly on the URI object
*/
- function filter(&$uri, $config, &$context) {
- trigger_error('Cannot call abstract function', E_USER_ERROR);
- }
+ abstract public function filter(&$uri, $config, $context);
}
<?php
-require_once 'HTMLPurifier/URIFilter.php';
-
-HTMLPurifier_ConfigSchema::define(
- 'URI', 'DisableExternal', false, 'bool',
- 'Disables links to external websites. This is a highly effective '.
- 'anti-spam and anti-pagerank-leech measure, but comes at a hefty price: no'.
- 'links or images outside of your domain will be allowed. Non-linkified '.
- 'URIs will still be preserved. If you want to be able to link to '.
- 'subdomains or use absolute URIs, specify %URI.Host for your website. '.
- 'This directive has been available since 1.2.0.'
-);
-
class HTMLPurifier_URIFilter_DisableExternal extends HTMLPurifier_URIFilter
{
- var $name = 'DisableExternal';
- var $ourHostParts = false;
- function prepare($config) {
+ public $name = 'DisableExternal';
+ protected $ourHostParts = false;
+ public function prepare($config) {
$our_host = $config->get('URI', 'Host');
if ($our_host !== null) $this->ourHostParts = array_reverse(explode('.', $our_host));
}
- function filter(&$uri, $config, &$context) {
+ public function filter(&$uri, $config, $context) {
if (is_null($uri->host)) return true;
if ($this->ourHostParts === false) return false;
$host_parts = array_reverse(explode('.', $uri->host));
<?php
-require_once 'HTMLPurifier/URIFilter/DisableExternal.php';
-
-HTMLPurifier_ConfigSchema::define(
- 'URI', 'DisableExternalResources', false, 'bool',
- 'Disables the embedding of external resources, preventing users from '.
- 'embedding things like images from other hosts. This prevents '.
- 'access tracking (good for email viewers), bandwidth leeching, '.
- 'cross-site request forging, goatse.cx posting, and '.
- 'other nasties, but also results in '.
- 'a loss of end-user functionality (they can\'t directly post a pic '.
- 'they posted from Flickr anymore). Use it if you don\'t have a '.
- 'robust user-content moderation team. This directive has been '.
- 'available since 1.3.0.'
-);
-
class HTMLPurifier_URIFilter_DisableExternalResources extends HTMLPurifier_URIFilter_DisableExternal
{
- var $name = 'DisableExternalResources';
- function filter(&$uri, $config, &$context) {
+ public $name = 'DisableExternalResources';
+ public function filter(&$uri, $config, $context) {
if (!$context->get('EmbeddedURI', true)) return true;
return parent::filter($uri, $config, $context);
}
<?php
-require_once 'HTMLPurifier/URIFilter.php';
-
-HTMLPurifier_ConfigSchema::define(
- 'URI', 'HostBlacklist', array(), 'list',
- 'List of strings that are forbidden in the host of any URI. Use it to '.
- 'kill domain names of spam, etc. Note that it will catch anything in '.
- 'the domain, so <tt>moo.com</tt> will catch <tt>moo.com.example.com</tt>. '.
- 'This directive has been available since 1.3.0.'
-);
-
class HTMLPurifier_URIFilter_HostBlacklist extends HTMLPurifier_URIFilter
{
- var $name = 'HostBlacklist';
- var $blacklist = array();
- function prepare($config) {
+ public $name = 'HostBlacklist';
+ protected $blacklist = array();
+ public function prepare($config) {
$this->blacklist = $config->get('URI', 'HostBlacklist');
}
- function filter(&$uri, $config, &$context) {
+ public function filter(&$uri, $config, $context) {
foreach($this->blacklist as $blacklisted_host_fragment) {
if (strpos($uri->host, $blacklisted_host_fragment) !== false) {
return false;
// does not support network paths
-require_once 'HTMLPurifier/URIFilter.php';
-
-HTMLPurifier_ConfigSchema::define(
- 'URI', 'MakeAbsolute', false, 'bool', '
-<p>
- Converts all URIs into absolute forms. This is useful when the HTML
- being filtered assumes a specific base path, but will actually be
- viewed in a different context (and setting an alternate base URI is
- not possible). %URI.Base must be set for this directive to work.
- This directive has been available since 2.1.0.
-</p>
-');
-
class HTMLPurifier_URIFilter_MakeAbsolute extends HTMLPurifier_URIFilter
{
- var $name = 'MakeAbsolute';
- var $base;
- var $basePathStack = array();
- function prepare($config) {
+ public $name = 'MakeAbsolute';
+ protected $base;
+ protected $basePathStack = array();
+ public function prepare($config) {
$def = $config->getDefinition('URI');
$this->base = $def->base;
if (is_null($this->base)) {
$stack = $this->_collapseStack($stack); // do pre-parsing
$this->basePathStack = $stack;
}
- function filter(&$uri, $config, &$context) {
+ public function filter(&$uri, $config, $context) {
if (is_null($this->base)) return true; // abort early
if (
$uri->path === '' && is_null($uri->scheme) &&
is_null($uri->host) && is_null($uri->query) && is_null($uri->fragment)
) {
// reference to current document
- $uri = $this->base->copy();
+ $uri = clone $this->base;
return true;
}
if (!is_null($uri->scheme)) {
/**
* Resolve dots and double-dots in a path stack
- * @private
*/
- function _collapseStack($stack) {
+ private function _collapseStack($stack) {
$result = array();
for ($i = 0; isset($stack[$i]); $i++) {
$is_folder = false;
<?php
-require_once 'HTMLPurifier/URI.php';
-
/**
* Parses a URI into the components and fragment identifier as specified
* by RFC 3986.
/**
* Instance of HTMLPurifier_PercentEncoder to do normalization with.
*/
- var $percentEncoder;
+ protected $percentEncoder;
- function HTMLPurifier_URIParser() {
+ public function __construct() {
$this->percentEncoder = new HTMLPurifier_PercentEncoder();
}
* @return HTMLPurifier_URI representation of URI. This representation has
* not been validated yet and may not conform to RFC.
*/
- function parse($uri) {
+ public function parse($uri) {
$uri = $this->percentEncoder->normalize($uri);
/**
* Scheme's default port (integer)
- * @public
*/
- var $default_port = null;
+ public $default_port = null;
/**
* Whether or not URIs of this schem are locatable by a browser
* http and ftp are accessible, while mailto and news are not.
- * @public
*/
- var $browsable = false;
+ public $browsable = false;
/**
* Whether or not the URI always uses <hier_part>, resolves edge cases
* with making relative URIs absolute
*/
- var $hierarchical = false;
+ public $hierarchical = false;
/**
* Validates the components of a URI
* @param $context HTMLPurifier_Context object
* @return Bool success or failure
*/
- function validate(&$uri, $config, &$context) {
+ public function validate(&$uri, $config, $context) {
if ($this->default_port == $uri->port) $uri->port = null;
return true;
}
<?php
-require_once 'HTMLPurifier/URIScheme.php';
-
/**
* Validates ftp (File Transfer Protocol) URIs as defined by generic RFC 1738.
*/
class HTMLPurifier_URIScheme_ftp extends HTMLPurifier_URIScheme {
- var $default_port = 21;
- var $browsable = true; // usually
- var $hierarchical = true;
+ public $default_port = 21;
+ public $browsable = true; // usually
+ public $hierarchical = true;
- function validate(&$uri, $config, &$context) {
+ public function validate(&$uri, $config, $context) {
parent::validate($uri, $config, $context);
$uri->query = null;
<?php
-require_once 'HTMLPurifier/URIScheme.php';
-
/**
* Validates http (HyperText Transfer Protocol) as defined by RFC 2616
*/
class HTMLPurifier_URIScheme_http extends HTMLPurifier_URIScheme {
- var $default_port = 80;
- var $browsable = true;
- var $hierarchical = true;
+ public $default_port = 80;
+ public $browsable = true;
+ public $hierarchical = true;
- function validate(&$uri, $config, &$context) {
+ public function validate(&$uri, $config, $context) {
parent::validate($uri, $config, $context);
$uri->userinfo = null;
return true;
<?php
-require_once 'HTMLPurifier/URIScheme/http.php';
-
/**
* Validates https (Secure HTTP) according to http scheme.
*/
class HTMLPurifier_URIScheme_https extends HTMLPurifier_URIScheme_http {
- var $default_port = 443;
+ public $default_port = 443;
}
<?php
-require_once 'HTMLPurifier/URIScheme.php';
-
// VERY RELAXED! Shouldn't cause problems, not even Firefox checks if the
// email is valid, but be careful!
class HTMLPurifier_URIScheme_mailto extends HTMLPurifier_URIScheme {
- var $browsable = false;
+ public $browsable = false;
- function validate(&$uri, $config, &$context) {
+ public function validate(&$uri, $config, $context) {
parent::validate($uri, $config, $context);
$uri->userinfo = null;
$uri->host = null;
<?php
-require_once 'HTMLPurifier/URIScheme.php';
-
/**
* Validates news (Usenet) as defined by generic RFC 1738
*/
class HTMLPurifier_URIScheme_news extends HTMLPurifier_URIScheme {
- var $browsable = false;
+ public $browsable = false;
- function validate(&$uri, $config, &$context) {
+ public function validate(&$uri, $config, $context) {
parent::validate($uri, $config, $context);
$uri->userinfo = null;
$uri->host = null;
<?php
-require_once 'HTMLPurifier/URIScheme.php';
-
/**
* Validates nntp (Network News Transfer Protocol) as defined by generic RFC 1738
*/
class HTMLPurifier_URIScheme_nntp extends HTMLPurifier_URIScheme {
- var $default_port = 119;
- var $browsable = false;
+ public $default_port = 119;
+ public $browsable = false;
- function validate(&$uri, $config, &$context) {
+ public function validate(&$uri, $config, $context) {
parent::validate($uri, $config, $context);
$uri->userinfo = null;
$uri->query = null;
<?php
-require_once 'HTMLPurifier/URIScheme/http.php';
-require_once 'HTMLPurifier/URIScheme/https.php';
-require_once 'HTMLPurifier/URIScheme/mailto.php';
-require_once 'HTMLPurifier/URIScheme/ftp.php';
-require_once 'HTMLPurifier/URIScheme/nntp.php';
-require_once 'HTMLPurifier/URIScheme/news.php';
-
-HTMLPurifier_ConfigSchema::define(
- 'URI', 'AllowedSchemes', array(
- 'http' => true, // "Hypertext Transfer Protocol", nuf' said
- 'https' => true, // HTTP over SSL (Secure Socket Layer)
- // quite useful, but not necessary
- 'mailto' => true,// Email
- 'ftp' => true, // "File Transfer Protocol"
- // for Usenet, these two are similar, but distinct
- 'nntp' => true, // individual Netnews articles
- 'news' => true // newsgroup or individual Netnews articles
- ), 'lookup',
- 'Whitelist that defines the schemes that a URI is allowed to have. This '.
- 'prevents XSS attacks from using pseudo-schemes like javascript or mocha.'
-);
-
-HTMLPurifier_ConfigSchema::define(
- 'URI', 'OverrideAllowedSchemes', true, 'bool',
- 'If this is set to true (which it is by default), you can override '.
- '%URI.AllowedSchemes by simply registering a HTMLPurifier_URIScheme '.
- 'to the registry. If false, you will also have to update that directive '.
- 'in order to add more schemes.'
-);
-
/**
* Registry for retrieving specific URI scheme validator objects.
*/
/**
* Retrieve sole instance of the registry.
- * @static
* @param $prototype Optional prototype to overload sole instance with,
* or bool true to reset to default registry.
* @note Pass a registry object $prototype with a compatible interface and
* the function will copy it and return it all further times.
*/
- function &instance($prototype = null) {
+ public static function instance($prototype = null) {
static $instance = null;
if ($prototype !== null) {
$instance = $prototype;
/**
* Cache of retrieved schemes.
- * @protected
*/
- var $schemes = array();
+ protected $schemes = array();
/**
* Retrieves a scheme validator object
* @param $config HTMLPurifier_Config object
* @param $config HTMLPurifier_Context object
*/
- function &getScheme($scheme, $config, &$context) {
+ public function getScheme($scheme, $config, $context) {
if (!$config) $config = HTMLPurifier_Config::createDefault();
$null = null; // for the sake of passing by reference
* @param $scheme Scheme name
* @param $scheme_obj HTMLPurifier_URIScheme object
*/
- function register($scheme, &$scheme_obj) {
- $this->schemes[$scheme] =& $scheme_obj;
+ public function register($scheme, $scheme_obj) {
+ $this->schemes[$scheme] = $scheme_obj;
}
}
--- /dev/null
+<?php
+
+/**
+ * Parses string representations into their corresponding native PHP
+ * variable type. The base implementation does a simple type-check.
+ */
+class HTMLPurifier_VarParser
+{
+
+ /**
+ * Lookup table of allowed types.
+ */
+ static public $types = array(
+ 'string' => true,
+ 'istring' => true,
+ 'text' => true,
+ 'itext' => true,
+ 'int' => true,
+ 'float' => true,
+ 'bool' => true,
+ 'lookup' => true,
+ 'list' => true,
+ 'hash' => true,
+ 'mixed' => true
+ );
+
+ /**
+ * Lookup table of types that are string, and can have aliases or
+ * allowed value lists.
+ */
+ static public $stringTypes = array(
+ 'string' => true,
+ 'istring' => true,
+ 'text' => true,
+ 'itext' => true,
+ );
+
+ /**
+ * Validate a variable according to type. Throws
+ * HTMLPurifier_VarParserException if invalid.
+ * It may return NULL as a valid type if $allow_null is true.
+ *
+ * @param $var Variable to validate
+ * @param $type Type of variable, see HTMLPurifier_VarParser->types
+ * @param $allow_null Whether or not to permit null as a value
+ * @return Validated and type-coerced variable
+ */
+ final public function parse($var, $type, $allow_null = false) {
+ if (!isset(HTMLPurifier_VarParser::$types[$type])) {
+ throw new HTMLPurifier_VarParserException("Invalid type '$type'");
+ }
+ $var = $this->parseImplementation($var, $type, $allow_null);
+ if ($allow_null && $var === null) return null;
+ // These are basic checks, to make sure nothing horribly wrong
+ // happened in our implementations.
+ switch ($type) {
+ case 'string':
+ case 'istring':
+ case 'text':
+ case 'itext':
+ if (!is_string($var)) break;
+ if ($type[0] == 'i') $var = strtolower($var);
+ return $var;
+ case 'int':
+ if (!is_int($var)) break;
+ return $var;
+ case 'float':
+ if (!is_float($var)) break;
+ return $var;
+ case 'bool':
+ if (!is_bool($var)) break;
+ return $var;
+ case 'lookup':
+ case 'list':
+ case 'hash':
+ if (!is_array($var)) break;
+ if ($type === 'lookup') {
+ foreach ($var as $k) if ($k !== true) $this->error('Lookup table contains value other than true');
+ } elseif ($type === 'list') {
+ $keys = array_keys($var);
+ if (array_keys($keys) !== $keys) $this->error('Indices for list are not uniform');
+ }
+ return $var;
+ case 'mixed':
+ return $var;
+ default:
+ $this->errorInconsistent(get_class($this), $type);
+ }
+ $this->errorGeneric($var, $type);
+ }
+
+ /**
+ * Actually implements the parsing. Base implementation is to not
+ * do anything to $var. Subclasses should overload this!
+ */
+ protected function parseImplementation($var, $type, $allow_null) {
+ return $var;
+ }
+
+ /**
+ * Throws an exception.
+ */
+ protected function error($msg) {
+ throw new HTMLPurifier_VarParserException($msg);
+ }
+
+ /**
+ * Throws an inconsistency exception.
+ * @note This should not ever be called. It would be called if we
+ * extend the allowed values of HTMLPurifier_VarParser without
+ * updating subclasses.
+ */
+ protected function errorInconsistent($class, $type) {
+ throw new HTMLPurifier_Exception("Inconsistency in $class: $type not implemented");
+ }
+
+ /**
+ * Generic error for if a type didn't work.
+ */
+ protected function errorGeneric($var, $type) {
+ $vtype = gettype($var);
+ $this->error("Expected type $type, got $vtype");
+ }
+
+}
--- /dev/null
+<?php
+
+/**
+ * Performs safe variable parsing based on types which can be used by
+ * users. This may not be able to represent all possible data inputs,
+ * however.
+ */
+class HTMLPurifier_VarParser_Flexible extends HTMLPurifier_VarParser
+{
+
+ protected function parseImplementation($var, $type, $allow_null) {
+ if ($allow_null && $var === null) return null;
+ switch ($type) {
+ // Note: if code "breaks" from the switch, it triggers a generic
+ // exception to be thrown. Specific errors can be specifically
+ // done here.
+ case 'mixed':
+ case 'istring':
+ case 'string':
+ case 'text':
+ case 'itext':
+ return $var;
+ case 'int':
+ if (is_string($var) && ctype_digit($var)) $var = (int) $var;
+ return $var;
+ case 'float':
+ if ((is_string($var) && is_numeric($var)) || is_int($var)) $var = (float) $var;
+ return $var;
+ case 'bool':
+ if (is_int($var) && ($var === 0 || $var === 1)) {
+ $var = (bool) $var;
+ } elseif (is_string($var)) {
+ if ($var == 'on' || $var == 'true' || $var == '1') {
+ $var = true;
+ } elseif ($var == 'off' || $var == 'false' || $var == '0') {
+ $var = false;
+ } else {
+ throw new HTMLPurifier_VarParserException("Unrecognized value '$var' for $type");
+ }
+ }
+ return $var;
+ case 'list':
+ case 'hash':
+ case 'lookup':
+ if (is_string($var)) {
+ // special case: technically, this is an array with
+ // a single empty string item, but having an empty
+ // array is more intuitive
+ if ($var == '') return array();
+ if (strpos($var, "\n") === false && strpos($var, "\r") === false) {
+ // simplistic string to array method that only works
+ // for simple lists of tag names or alphanumeric characters
+ $var = explode(',',$var);
+ } else {
+ $var = preg_split('/(,|[\n\r]+)/', $var);
+ }
+ // remove spaces
+ foreach ($var as $i => $j) $var[$i] = trim($j);
+ if ($type === 'hash') {
+ // key:value,key2:value2
+ $nvar = array();
+ foreach ($var as $keypair) {
+ $c = explode(':', $keypair, 2);
+ if (!isset($c[1])) continue;
+ $nvar[$c[0]] = $c[1];
+ }
+ $var = $nvar;
+ }
+ }
+ if (!is_array($var)) break;
+ $keys = array_keys($var);
+ if ($keys === array_keys($keys)) {
+ if ($type == 'list') return $var;
+ elseif ($type == 'lookup') {
+ $new = array();
+ foreach ($var as $key) {
+ $new[$key] = true;
+ }
+ return $new;
+ } else break;
+ }
+ if ($type === 'lookup') {
+ foreach ($var as $key => $value) {
+ $var[$key] = true;
+ }
+ }
+ return $var;
+ default:
+ $this->errorInconsistent(__CLASS__, $type);
+ }
+ $this->errorGeneric($var, $type);
+ }
+
+}
--- /dev/null
+<?php
+
+/**
+ * This variable parser uses PHP's internal code engine. Because it does
+ * this, it can represent all inputs; however, it is dangerous and cannot
+ * be used by users.
+ */
+class HTMLPurifier_VarParser_Native extends HTMLPurifier_VarParser
+{
+
+ protected function parseImplementation($var, $type, $allow_null) {
+ return $this->evalExpression($var);
+ }
+
+ protected function evalExpression($expr) {
+ $var = null;
+ $result = eval("\$var = $expr;");
+ if ($result === false) {
+ throw new HTMLPurifier_VarParserException("Fatal error in evaluated code");
+ }
+ return $var;
+ }
+
+}
+
--- /dev/null
+<?php
+
+/**
+ * Exception type for HTMLPurifier_VarParser
+ */
+class HTMLPurifier_VarParserException extends HTMLPurifier_Exception
+{
+
+}
-Description of HTML Purifier v2.1.4 Lite library import into Moodle
+Description of HTML Purifier v3.1.0 library import into Moodle
Changes:
* HMLTModule/Text.php - added <nolink>, <tex>, <lang> and <algebra> tags
* HMLTModule/XMLCommonAttributes.php - remove xml:lang - needed for multilang
* AttrDef/Lang.php - relax lang check - needed for multilang
- * AttrDef/URI/Email/SimpleCheck.php - deleted to prevent errors on some systems, not used anyway
+
+ * temporary work dir fix from http://htmlpurifier.org/phorum/read.php?2,1809,1809#msg-1809
skodak
global $CFG;
// this can not be done only once because we sometimes need to reset the cache
- $cachedir = $CFG->dataroot.'/cache/htmlpurifier/';
+ $cachedir = $CFG->dataroot.'/cache/htmlpurifier';
$status = check_dir_exists($cachedir, true, true);
static $purifier = false;
+ static $config;
if ($purifier === false) {
- require_once $CFG->libdir.'/htmlpurifier/HTMLPurifier.auto.php';
+ require_once $CFG->libdir.'/htmlpurifier/HTMLPurifier.safe-includes.php';
$config = HTMLPurifier_Config::createDefault();
- $config->set('Core', 'AcceptFullDocuments', false);
+ $config->set('Core', 'ConvertDocumentToFragment', true);
$config->set('Core', 'Encoding', 'UTF-8');
$config->set('HTML', 'Doctype', 'XHTML 1.0 Transitional');
$config->set('Cache', 'SerializerPath', $cachedir);