set_include_path(dirname(__FILE__) . PATH_SEPARATOR . get_include_path() );
require_once 'HTMLPurifier.php';
-?>
\ No newline at end of file
return $purifier->purify($html, $config);
}
-?>
\ No newline at end of file
*/
/*
- HTML Purifier 1.6.1 - Standards Compliant HTML Filtering
+ HTML Purifier 2.1.1 - Standards Compliant HTML Filtering
Copyright (C) 2006 Edward Z. Yang
This library is free software; you can redistribute it and/or
Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
*/
+// constants are slow, but we'll make one exception
+define('HTMLPURIFIER_PREFIX', dirname(__FILE__));
+
// almost every class has an undocumented dependency to these, so make sure
// they get included
-require_once 'HTMLPurifier/ConfigSchema.php';
+require_once 'HTMLPurifier/ConfigSchema.php'; // important
require_once 'HTMLPurifier/Config.php';
require_once 'HTMLPurifier/Context.php';
require_once 'HTMLPurifier/Strategy/Core.php';
require_once 'HTMLPurifier/Encoder.php';
+require_once 'HTMLPurifier/ErrorCollector.php';
+require_once 'HTMLPurifier/LanguageFactory.php';
+
+HTMLPurifier_ConfigSchema::define(
+ 'Core', 'CollectErrors', false, 'bool', '
+Whether or not to collect errors found while filtering the document. This
+is a useful way to give feedback to your users. CURRENTLY NOT IMPLEMENTED.
+This directive has been available since 2.0.0.
+');
+
/**
* Main library execution class.
*
class HTMLPurifier
{
- var $version = '1.6.1';
+ var $version = '2.1.1';
var $config;
var $filters;
- var $lexer, $strategy, $generator;
+ var $strategy, $generator;
/**
* Final HTMLPurifier_Context of last run purification. Might be an array.
$this->config = HTMLPurifier_Config::create($config);
- $this->lexer = HTMLPurifier_Lexer::create();
$this->strategy = new HTMLPurifier_Strategy_Core();
$this->generator = new HTMLPurifier_Generator();
$config = $config ? HTMLPurifier_Config::create($config) : $this->config;
+ // implementation is partially environment dependant, partially
+ // configuration dependant
+ $lexer = HTMLPurifier_Lexer::create($config);
+
$context = new HTMLPurifier_Context();
+
+ // our friendly neighborhood generator, all primed with configuration too!
+ $this->generator->generateFromTokens(array(), $config, $context);
+ $context->register('Generator', $this->generator);
+
+ // set up global context variables
+ if ($config->get('Core', 'CollectErrors')) {
+ // may get moved out if other facilities use it
+ $language_factory = HTMLPurifier_LanguageFactory::instance();
+ $language = $language_factory->create($config, $context);
+ $context->register('Locale', $language);
+
+ $error_collector = new HTMLPurifier_ErrorCollector($context);
+ $context->register('ErrorCollector', $error_collector);
+ }
+
$html = HTMLPurifier_Encoder::convertToUTF8($html, $config, $context);
for ($i = 0, $size = count($this->filters); $i < $size; $i++) {
// list of tokens
$this->strategy->execute(
// list of un-purified tokens
- $this->lexer->tokenizeHTML(
+ $lexer->tokenizeHTML(
// un-purified HTML
$html, $config, $context
),
return $array_of_html;
}
+ /**
+ * Singleton for enforcing just one HTML Purifier in your system
+ */
+ function &getInstance($prototype = null) {
+ static $htmlpurifier;
+ if (!$htmlpurifier || $prototype) {
+ if (is_a($prototype, 'HTMLPurifier')) {
+ $htmlpurifier = $prototype;
+ } elseif ($prototype) {
+ $htmlpurifier = new HTMLPurifier($prototype);
+ } else {
+ $htmlpurifier = new HTMLPurifier();
+ }
+ }
+ return $htmlpurifier;
+ }
+
}
-?>
\ No newline at end of file
<?php
require_once 'HTMLPurifier/AttrTypes.php';
-require_once 'HTMLPurifier/AttrDef/Lang.php';
/**
* Defines common attribute collections that modules reference
/**
* Associative array of attribute collections, indexed by name
- * @note Technically, the composition of these is more complicated,
- * but we bypass it using our own excludes property
*/
var $info = array();
* @param $modules Hash array of HTMLPurifier_HTMLModule members
*/
function HTMLPurifier_AttrCollections($attr_types, $modules) {
- $info =& $this->info;
// load extensions from the modules
foreach ($modules as $module) {
foreach ($module->attr_collections as $coll_i => $coll) {
+ if (!isset($this->info[$coll_i])) {
+ $this->info[$coll_i] = array();
+ }
foreach ($coll as $attr_i => $attr) {
- if ($attr_i === 0 && isset($info[$coll_i][$attr_i])) {
+ if ($attr_i === 0 && isset($this->info[$coll_i][$attr_i])) {
// merge in includes
- $info[$coll_i][$attr_i] = array_merge(
- $info[$coll_i][$attr_i], $attr);
+ $this->info[$coll_i][$attr_i] = array_merge(
+ $this->info[$coll_i][$attr_i], $attr);
continue;
}
- $info[$coll_i][$attr_i] = $attr;
+ $this->info[$coll_i][$attr_i] = $attr;
}
}
}
// perform internal expansions and inclusions
- foreach ($info as $name => $attr) {
+ foreach ($this->info as $name => $attr) {
// merge attribute collections that include others
- $this->performInclusions($info[$name]);
+ $this->performInclusions($this->info[$name]);
// replace string identifiers with actual attribute objects
- $this->expandIdentifiers($info[$name], $attr_types);
+ $this->expandIdentifiers($this->info[$name], $attr_types);
}
}
function performInclusions(&$attr) {
if (!isset($attr[0])) return;
$merge = $attr[0];
+ $seen = array(); // recursion guard
// loop through all the inclusions
for ($i = 0; isset($merge[$i]); $i++) {
+ if (isset($seen[$merge[$i]])) continue;
+ $seen[$merge[$i]] = true;
// foreach attribute of the inclusion, copy it over
+ if (!isset($this->info[$merge[$i]])) continue;
foreach ($this->info[$merge[$i]] as $key => $value) {
if (isset($attr[$key])) continue; // also catches more inclusions
$attr[$key] = $value;
}
- if (isset($info[$merge[$i]][0])) {
+ if (isset($this->info[$merge[$i]][0])) {
// recursion
- $merge = array_merge($merge, isset($info[$merge[$i]][0]));
+ $merge = array_merge($merge, $this->info[$merge[$i]][0]);
}
}
unset($attr[0]);
* @param $attr_types HTMLPurifier_AttrTypes instance
*/
function expandIdentifiers(&$attr, $attr_types) {
+
+ // because foreach will process new elements we add, make sure we
+ // skip duplicates
+ $processed = array();
+
foreach ($attr as $def_i => $def) {
+ // skip inclusions
if ($def_i === 0) continue;
- if (!is_string($def)) continue;
+
+ if (isset($processed[$def_i])) continue;
+
+ // determine whether or not attribute is required
+ if ($required = (strpos($def_i, '*') !== false)) {
+ // rename the definition
+ unset($attr[$def_i]);
+ $def_i = trim($def_i, '*');
+ $attr[$def_i] = $def;
+ }
+
+ $processed[$def_i] = true;
+
+ // if we've already got a literal object, move on
+ if (is_object($def)) {
+ // preserve previous required
+ $attr[$def_i]->required = ($required || $attr[$def_i]->required);
+ continue;
+ }
+
if ($def === false) {
unset($attr[$def_i]);
continue;
}
- if (isset($attr_types->info[$def])) {
- $attr[$def_i] = $attr_types->info[$def];
+
+ if ($t = $attr_types->get($def)) {
+ $attr[$def_i] = $t;
+ $attr[$def_i]->required = $required;
} else {
- trigger_error('Attempted to reference undefined attribute type', E_USER_ERROR);
unset($attr[$def_i]);
}
}
+
}
}
-?>
\ No newline at end of file
{
/**
- * Tells us whether or not an HTML attribute is minimized. Only the
- * boolean attribute vapourware would use this.
+ * Tells us whether or not an HTML attribute is minimized. Has no
+ * meaning in other contexts.
*/
var $minimized = false;
+ /**
+ * Tells us whether or not an HTML attribute is required. Has no
+ * meaning in other contexts
+ */
+ var $required = false;
+
/**
* Validates and cleans passed string according to a definition.
*
$string = str_replace(array("\r", "\t"), ' ', $string);
return $string;
}
+
+ /**
+ * Factory method for creating this class from a string.
+ * @param $string String construction info
+ * @return Created AttrDef object corresponding to $string
+ * @public
+ */
+ function make($string) {
+ // default implementation, return flyweight of this object
+ // if overloaded, it is *necessary* for you to clone the
+ // object (usually by instantiating a new copy) and return that
+ return $this;
+ }
+
}
-?>
\ No newline at end of file
}
-?>
\ No newline at end of file
}
-?>
\ No newline at end of file
}
-?>
\ No newline at end of file
}
-?>
\ No newline at end of file
require_once 'HTMLPurifier/AttrDef.php';
-/**
- * Validates Color as defined by CSS.
- */
-class HTMLPurifier_AttrDef_CSS_Color extends HTMLPurifier_AttrDef
-{
-
- /**
- * Color keyword lookup table.
- * @todo Extend it to include all usually allowed colors.
- */
- var $colors = array(
+HTMLPurifier_ConfigSchema::define(
+ 'Core', 'ColorKeywords', array(
'maroon' => '#800000',
- 'red' => '#F00',
+ 'red' => '#FF0000',
'orange' => '#FFA500',
- 'yellow' => '#FF0',
+ 'yellow' => '#FFFF00',
'olive' => '#808000',
'purple' => '#800080',
- 'fuchsia' => '#F0F',
- 'white' => '#FFF',
- 'lime' => '#0F0',
+ 'fuchsia' => '#FF00FF',
+ 'white' => '#FFFFFF',
+ 'lime' => '#00FF00',
'green' => '#008000',
'navy' => '#000080',
- 'blue' => '#00F',
- 'aqua' => '#0FF',
+ 'blue' => '#0000FF',
+ 'aqua' => '#00FFFF',
'teal' => '#008080',
- 'black' => '#000',
+ 'black' => '#000000',
'silver' => '#C0C0C0',
'gray' => '#808080'
- );
+ ), 'hash', '
+Lookup array of color names to six digit hexadecimal number corresponding
+to color, with preceding hash mark. Used when parsing colors.
+This directive has been available since 2.0.0.
+');
+
+/**
+ * Validates Color as defined by CSS.
+ */
+class HTMLPurifier_AttrDef_CSS_Color extends HTMLPurifier_AttrDef
+{
function validate($color, $config, &$context) {
+ static $colors = null;
+ if ($colors === null) $colors = $config->get('Core', 'ColorKeywords');
+
$color = trim($color);
if (!$color) return false;
$lower = strtolower($color);
- if (isset($this->colors[$lower])) return $this->colors[$lower];
+ if (isset($colors[$lower])) return $colors[$lower];
if ($color[0] === '#') {
// hexadecimal handling
}
-?>
\ No newline at end of file
}
-?>
\ No newline at end of file
*/
var $info = array();
- /**
- * System font keywords.
- */
- var $system_fonts = array(
- 'caption' => true,
- 'icon' => true,
- 'menu' => true,
- 'message-box' => true,
- 'small-caption' => true,
- 'status-bar' => true
- );
-
function HTMLPurifier_AttrDef_CSS_Font($config) {
$def = $config->getCSSDefinition();
$this->info['font-style'] = $def->info['font-style'];
function validate($string, $config, &$context) {
+ static $system_fonts = array(
+ 'caption' => true,
+ 'icon' => true,
+ 'menu' => true,
+ 'message-box' => true,
+ 'small-caption' => true,
+ 'status-bar' => true
+ );
+
// regular pre-processing
$string = $this->parseCDATA($string);
if ($string === '') return false;
// check if it's one of the keywords
$lowercase_string = strtolower($string);
- if (isset($this->system_fonts[$lowercase_string])) {
+ if (isset($system_fonts[$lowercase_string])) {
return $lowercase_string;
}
}
-?>
\ No newline at end of file
class HTMLPurifier_AttrDef_CSS_FontFamily extends HTMLPurifier_AttrDef
{
- /**
- * Generic font family keywords.
- * @protected
- */
- var $generic_names = array(
- 'serif' => true,
- 'sans-serif' => true,
- 'monospace' => true,
- 'fantasy' => true,
- 'cursive' => true
- );
-
function validate($string, $config, &$context) {
+ static $generic_names = array(
+ 'serif' => true,
+ 'sans-serif' => true,
+ 'monospace' => true,
+ 'fantasy' => true,
+ 'cursive' => true
+ );
+
$string = $this->parseCDATA($string);
// assume that no font names contain commas in them
$fonts = explode(',', $string);
$font = trim($font);
if ($font === '') continue;
// match a generic name
- if (isset($this->generic_names[$font])) {
+ if (isset($generic_names[$font])) {
$final .= $font . ', ';
continue;
}
$quote = $font[0];
if ($font[$length - 1] !== $quote) continue;
$font = substr($font, 1, $length - 2);
+ // double-backslash processing is buggy
+ $font = str_replace("\\$quote", $quote, $font); // de-escape quote
+ $font = str_replace("\\\n", "\n", $font); // de-escape newlines
}
- // process font
+ // $font is a pure representation of the font name
+
if (ctype_alnum($font)) {
// very simple font, allow it in unharmed
$final .= $font . ', ';
continue;
}
- $nospace = str_replace(array(' ', '.', '!'), '', $font);
- if (ctype_alnum($nospace)) {
- // font with spaces in it
- $final .= "'$font', ";
- continue;
- }
+
+ // complicated font, requires quoting
+
+ // armor single quotes and new lines
+ $font = str_replace("'", "\\'", $font);
+ $font = str_replace("\n", "\\\n", $font);
+ $final .= "'$font', ";
}
$final = rtrim($final, ', ');
if ($final === '') return false;
}
-?>
\ No newline at end of file
}
-?>
\ No newline at end of file
}
-?>
\ No newline at end of file
}
-?>
\ No newline at end of file
}
-?>
\ No newline at end of file
}
-?>
\ No newline at end of file
class HTMLPurifier_AttrDef_CSS_TextDecoration extends HTMLPurifier_AttrDef
{
- /**
- * Lookup table of allowed values.
- * @protected
- */
- var $allowed_values = array(
- 'line-through' => true,
- 'overline' => true,
- 'underline' => true
- );
-
function validate($string, $config, &$context) {
+ static $allowed_values = array(
+ 'line-through' => true,
+ 'overline' => true,
+ 'underline' => true
+ );
+
$string = strtolower($this->parseCDATA($string));
$parts = explode(' ', $string);
$final = '';
foreach ($parts as $part) {
- if (isset($this->allowed_values[$part])) {
+ if (isset($allowed_values[$part])) {
$final .= $part . ' ';
}
}
}
-?>
\ No newline at end of file
{
function HTMLPurifier_AttrDef_CSS_URI() {
- $this->HTMLPurifier_AttrDef_URI(true); // always embedded
+ parent::HTMLPurifier_AttrDef_URI(true); // always embedded
}
function validate($uri_string, $config, &$context) {
if ($uri_string[$new_length] != ')') return false;
$uri = trim(substr($uri_string, 0, $new_length));
- if (isset($uri[0]) && ($uri[0] == "'" || $uri[0] == '"')) {
+ if (!empty($uri) && ($uri[0] == "'" || $uri[0] == '"')) {
$quote = $uri[0];
$new_length = strlen($uri) - 1;
if ($uri[$new_length] !== $quote) return false;
}
-?>
\ No newline at end of file
return $result ? $string : false;
}
+ /**
+ * @param $string In form of comma-delimited list of case-insensitive
+ * valid values. Example: "foo,bar,baz". Prepend "s:" to make
+ * case sensitive
+ */
+ function make($string) {
+ if (strlen($string) > 2 && $string[0] == 's' && $string[1] == ':') {
+ $string = substr($string, 2);
+ $sensitive = true;
+ } else {
+ $sensitive = false;
+ }
+ $values = explode(',', $string);
+ return new HTMLPurifier_AttrDef_Enum($values, $sensitive);
+ }
+
}
-?>
\ No newline at end of file
--- /dev/null
+<?php
+
+require_once 'HTMLPurifier/AttrDef.php';
+
+/**
+ * Validates a boolean attribute
+ */
+class HTMLPurifier_AttrDef_HTML_Bool extends HTMLPurifier_AttrDef
+{
+
+ var $name;
+ var $minimized = true;
+
+ function HTMLPurifier_AttrDef_HTML_Bool($name = false) {$this->name = $name;}
+
+ function validate($string, $config, &$context) {
+ if (empty($string)) return false;
+ return $this->name;
+ }
+
+ /**
+ * @param $string Name of attribute
+ */
+ function make($string) {
+ return new HTMLPurifier_AttrDef_HTML_Bool($string);
+ }
+
+}
+
--- /dev/null
+<?php
+
+require_once 'HTMLPurifier/AttrDef.php';
+require_once 'HTMLPurifier/AttrDef/CSS/Color.php'; // for %Core.ColorKeywords
+
+/**
+ * Validates a color according to the HTML spec.
+ */
+class HTMLPurifier_AttrDef_HTML_Color extends HTMLPurifier_AttrDef
+{
+
+ function validate($string, $config, &$context) {
+
+ static $colors = null;
+ if ($colors === null) $colors = $config->get('Core', 'ColorKeywords');
+
+ $string = trim($string);
+
+ if (empty($string)) return false;
+ if (isset($colors[$string])) return $colors[$string];
+ if ($string[0] === '#') $hex = substr($string, 1);
+ else $hex = $string;
+
+ $length = strlen($hex);
+ if ($length !== 3 && $length !== 6) return false;
+ if (!ctype_xdigit($hex)) return false;
+ if ($length === 3) $hex = $hex[0].$hex[0].$hex[1].$hex[1].$hex[2].$hex[2];
+
+ return "#$hex";
+
+ }
+
+}
+
}
-?>
\ No newline at end of file
}
-?>
\ No newline at end of file
}
-?>
\ No newline at end of file
class HTMLPurifier_AttrDef_HTML_LinkTypes extends HTMLPurifier_AttrDef
{
- /** Lookup array of attribute names to configuration name */
- var $configLookup = array(
- 'rel' => 'AllowedRel',
- 'rev' => 'AllowedRev'
- );
-
/** Name config attribute to pull. */
var $name;
function HTMLPurifier_AttrDef_HTML_LinkTypes($name) {
- if (!isset($this->configLookup[$name])) {
+ $configLookup = array(
+ 'rel' => 'AllowedRel',
+ 'rev' => 'AllowedRev'
+ );
+ if (!isset($configLookup[$name])) {
trigger_error('Unrecognized attribute name for link '.
'relationship.', E_USER_ERROR);
return;
}
- $this->name = $this->configLookup[$name];
+ $this->name = $configLookup[$name];
}
function validate($string, $config, &$context) {
}
-?>
\ No newline at end of file
}
-?>
\ No newline at end of file
}
-?>
\ No newline at end of file
}
-?>
\ No newline at end of file
}
-?>
\ No newline at end of file
}
-?>
\ No newline at end of file
}
-?>
\ No newline at end of file
<?php
require_once 'HTMLPurifier/AttrDef.php';
+require_once 'HTMLPurifier/URIParser.php';
require_once 'HTMLPurifier/URIScheme.php';
require_once 'HTMLPurifier/URISchemeRegistry.php';
require_once 'HTMLPurifier/AttrDef/URI/Host.php';
require_once 'HTMLPurifier/PercentEncoder.php';
-HTMLPurifier_ConfigSchema::define(
- 'URI', 'DefaultScheme', 'http', 'string',
- 'Defines through what scheme the output will be served, in order to '.
- 'select the proper object validator when no scheme information is present.'
-);
-
-HTMLPurifier_ConfigSchema::define(
- 'URI', 'Host', null, 'string/null',
- 'Defines the domain name of the server, so we can determine whether or '.
- 'an absolute URI is from your website or not. Not strictly necessary, '.
- 'as users should be using relative URIs to reference resources on your '.
- 'website. It will, however, let you use absolute URIs to link to '.
- 'subdomains of the domain you post here: i.e. example.com will allow '.
- 'sub.example.com. However, higher up domains will still be excluded: '.
- 'if you set %URI.Host to sub.example.com, example.com will be blocked. '.
- 'This directive has been available since 1.2.0.'
-);
-
-HTMLPurifier_ConfigSchema::define(
- 'URI', 'DisableExternal', false, 'bool',
- 'Disables links to external websites. This is a highly effective '.
- 'anti-spam and anti-pagerank-leech measure, but comes at a hefty price: no'.
- 'links or images outside of your domain will be allowed. Non-linkified '.
- 'URIs will still be preserved. If you want to be able to link to '.
- 'subdomains or use absolute URIs, specify %URI.Host for your website. '.
- 'This directive has been available since 1.2.0.'
-);
-
-HTMLPurifier_ConfigSchema::define(
- 'URI', 'DisableExternalResources', false, 'bool',
- 'Disables the embedding of external resources, preventing users from '.
- 'embedding things like images from other hosts. This prevents '.
- 'access tracking (good for email viewers), bandwidth leeching, '.
- 'cross-site request forging, goatse.cx posting, and '.
- 'other nasties, but also results in '.
- 'a loss of end-user functionality (they can\'t directly post a pic '.
- 'they posted from Flickr anymore). Use it if you don\'t have a '.
- 'robust user-content moderation team. This directive has been '.
- 'available since 1.3.0.'
-);
+// special case filtering directives
HTMLPurifier_ConfigSchema::define(
- 'URI', 'DisableResources', false, 'bool',
- 'Disables embedding resources, essentially meaning no pictures. You can '.
- 'still link to them though. See %URI.DisableExternalResources for why '.
- 'this might be a good idea. This directive has been available since 1.3.0.'
-);
+ 'URI', 'Munge', null, 'string/null', '
+<p>
+ Munges all browsable (usually http, https and ftp)
+ absolute URI\'s into another URI, usually a URI redirection service.
+ This directive accepts a URI, formatted with a <code>%s</code> where
+ the url-encoded original URI should be inserted (sample:
+ <code>http://www.google.com/url?q=%s</code>).
+</p>
+<p>
+ Uses for this directive:
+</p>
+<ul>
+ <li>
+ Prevent PageRank leaks, while being fairly transparent
+ to users (you may also want to add some client side JavaScript to
+ override the text in the statusbar). <strong>Notice</strong>:
+ Many security experts believe that this form of protection does not deter spam-bots.
+ </li>
+ <li>
+ Redirect users to a splash page telling them they are leaving your
+ website. While this is poor usability practice, it is often mandated
+ in corporate environments.
+ </li>
+</ul>
+<p>
+ This directive has been available since 1.3.0.
+</p>
+');
-HTMLPurifier_ConfigSchema::define(
- 'URI', 'Munge', null, 'string/null',
- 'Munges all browsable (usually http, https and ftp) URI\'s into some URL '.
- 'redirection service. Pass this directive a URI, with %s inserted where '.
- 'the url-encoded original URI should be inserted (sample: '.
- '<code>http://www.google.com/url?q=%s</code>). '.
- 'This prevents PageRank leaks, while being as transparent as possible '.
- 'to users (you may also want to add some client side JavaScript to '.
- 'override the text in the statusbar). Warning: many security experts '.
- 'believe that this form of protection does not deter spam-bots. '.
- 'You can also use this directive to redirect users to a splash page '.
- 'telling them they are leaving your website. '.
- 'This directive has been available since 1.3.0.'
-);
+// disabling directives
HTMLPurifier_ConfigSchema::define(
- 'URI', 'HostBlacklist', array(), 'list',
- 'List of strings that are forbidden in the host of any URI. Use it to '.
- 'kill domain names of spam, etc. Note that it will catch anything in '.
- 'the domain, so <tt>moo.com</tt> will catch <tt>moo.com.example.com</tt>. '.
- 'This directive has been available since 1.3.0.'
-);
+ 'URI', 'Disable', false, 'bool', '
+<p>
+ Disables all URIs in all forms. Not sure why you\'d want to do that
+ (after all, the Internet\'s founded on the notion of a hyperlink).
+ This directive has been available since 1.3.0.
+</p>
+');
+HTMLPurifier_ConfigSchema::defineAlias('Attr', 'DisableURI', 'URI', 'Disable');
HTMLPurifier_ConfigSchema::define(
- 'URI', 'Disable', false, 'bool',
- 'Disables all URIs in all forms. Not sure why you\'d want to do that '.
- '(after all, the Internet\'s founded on the notion of a hyperlink). '.
- 'This directive has been available since 1.3.0.'
-);
-HTMLPurifier_ConfigSchema::defineAlias('Attr', 'DisableURI', 'URI', 'Disable');
+ 'URI', 'DisableResources', false, 'bool', '
+<p>
+ Disables embedding resources, essentially meaning no pictures. You can
+ still link to them though. See %URI.DisableExternalResources for why
+ this might be a good idea. This directive has been available since 1.3.0.
+</p>
+');
/**
* Validates a URI as defined by RFC 3986.
class HTMLPurifier_AttrDef_URI extends HTMLPurifier_AttrDef
{
- var $host;
- var $PercentEncoder;
- var $embeds_resource;
+ var $parser, $percentEncoder;
+ var $embedsResource;
/**
* @param $embeds_resource_resource Does the URI here result in an extra HTTP request?
*/
function HTMLPurifier_AttrDef_URI($embeds_resource = false) {
- $this->host = new HTMLPurifier_AttrDef_URI_Host();
- $this->PercentEncoder = new HTMLPurifier_PercentEncoder();
- $this->embeds_resource = (bool) $embeds_resource;
+ $this->parser = new HTMLPurifier_URIParser();
+ $this->percentEncoder = new HTMLPurifier_PercentEncoder();
+ $this->embedsResource = (bool) $embeds_resource;
}
function validate($uri, $config, &$context) {
- // We'll write stack-based parsers later, for now, use regexps to
- // get things working as fast as possible (irony)
-
if ($config->get('URI', 'Disable')) return false;
- // parse as CDATA
+ // initial operations
$uri = $this->parseCDATA($uri);
+ $uri = $this->percentEncoder->normalize($uri);
- // fix up percent-encoding
- $uri = $this->PercentEncoder->normalize($uri);
-
- // while it would be nice to use parse_url(), that's specifically
- // for HTTP and thus won't work for our generic URI parsing
+ // parse the URI
+ $uri = $this->parser->parse($uri);
+ if ($uri === false) return false;
- // according to the RFC... (but this cuts corners, i.e. non-validating)
- $r_URI = '!'.
- '(([^:/?#<>\'"]+):)?'. // 2. Scheme
- '(//([^/?#<>\'"]*))?'. // 4. Authority
- '([^?#<>\'"]*)'. // 5. Path
- '(\?([^#<>\'"]*))?'. // 7. Query
- '(#([^<>\'"]*))?'. // 8. Fragment
- '!';
+ // add embedded flag to context for validators
+ $context->register('EmbeddedURI', $this->embedsResource);
- $matches = array();
- $result = preg_match($r_URI, $uri, $matches);
-
- if (!$result) return false; // invalid URI
-
- // seperate out parts
- $scheme = !empty($matches[1]) ? $matches[2] : null;
- $authority = !empty($matches[3]) ? $matches[4] : null;
- $path = $matches[5]; // always present, can be empty
- $query = !empty($matches[6]) ? $matches[7] : null;
- $fragment = !empty($matches[8]) ? $matches[9] : null;
-
-
-
- $registry =& HTMLPurifier_URISchemeRegistry::instance();
- if ($scheme !== null) {
- // no need to validate the scheme's fmt since we do that when we
- // retrieve the specific scheme object from the registry
- $scheme = ctype_lower($scheme) ? $scheme : strtolower($scheme);
- $scheme_obj = $registry->getScheme($scheme, $config, $context);
- if (!$scheme_obj) return false; // invalid scheme, clean it out
- } else {
- $scheme_obj = $registry->getScheme(
- $config->get('URI', 'DefaultScheme'), $config, $context
- );
- }
-
-
- // the URI we're processing embeds_resource a resource in the page, but the URI
- // it references cannot be located
- if ($this->embeds_resource && !$scheme_obj->browsable) {
- return false;
- }
-
-
- if ($authority !== null) {
+ $ok = false;
+ do {
- // remove URI if it's absolute and we disabled externals or
- // if it's absolute and embedded and we disabled external resources
- unset($our_host);
- if (
- $config->get('URI', 'DisableExternal') ||
- (
- $config->get('URI', 'DisableExternalResources') &&
- $this->embeds_resource
- )
- ) {
- $our_host = $config->get('URI', 'Host');
- if ($our_host === null) return false;
- }
+ // generic validation
+ $result = $uri->validate($config, $context);
+ if (!$result) break;
- $HEXDIG = '[A-Fa-f0-9]';
- $unreserved = 'A-Za-z0-9-._~'; // make sure you wrap with []
- $sub_delims = '!$&\'()'; // needs []
- $pct_encoded = "%$HEXDIG$HEXDIG";
- $r_userinfo = "(?:[$unreserved$sub_delims:]|$pct_encoded)*";
- $r_authority = "/^(($r_userinfo)@)?(\[[^\]]+\]|[^:]*)(:(\d*))?/";
- $matches = array();
- preg_match($r_authority, $authority, $matches);
- // overloads regexp!
- $userinfo = !empty($matches[1]) ? $matches[2] : null;
- $host = !empty($matches[3]) ? $matches[3] : null;
- $port = !empty($matches[4]) ? $matches[5] : null;
+ // chained validation
+ $uri_def =& $config->getDefinition('URI');
+ $result = $uri_def->filter($uri, $config, $context);
+ if (!$result) break;
- // validate port
- if ($port !== null) {
- $port = (int) $port;
- if ($port < 1 || $port > 65535) $port = null;
- }
-
- $host = $this->host->validate($host, $config, $context);
- if ($host === false) $host = null;
-
- if ($this->checkBlacklist($host, $config, $context)) return false;
+ // scheme-specific validation
+ $scheme_obj = $uri->getSchemeObj($config, $context);
+ if (!$scheme_obj) break;
+ if ($this->embedsResource && !$scheme_obj->browsable) break;
+ $result = $scheme_obj->validate($uri, $config, $context);
+ if (!$result) break;
- // more lenient absolute checking
- if (isset($our_host)) {
- $host_parts = array_reverse(explode('.', $host));
- // could be cached
- $our_host_parts = array_reverse(explode('.', $our_host));
- foreach ($our_host_parts as $i => $discard) {
- if (!isset($host_parts[$i])) return false;
- if ($host_parts[$i] != $our_host_parts[$i]) return false;
- }
- }
-
- // userinfo and host are validated within the regexp
+ // survived gauntlet
+ $ok = true;
- } else {
- $port = $host = $userinfo = null;
- }
-
-
- // query and fragment are quite simple in terms of definition:
- // *( pchar / "/" / "?" ), so define their validation routines
- // when we start fixing percent encoding
-
-
-
- // path gets to be validated against a hodge-podge of rules depending
- // on the status of authority and scheme, but it's not that important,
- // esp. since it won't be applicable to everyone
-
+ } while (false);
+ $context->destroy('EmbeddedURI');
+ if (!$ok) return false;
- // okay, now we defer execution to the subobject for more processing
- // note that $fragment is omitted
- list($userinfo, $host, $port, $path, $query) =
- $scheme_obj->validateComponents(
- $userinfo, $host, $port, $path, $query, $config, $context
- );
-
-
- // reconstruct authority
- $authority = null;
- if (!is_null($userinfo) || !is_null($host) || !is_null($port)) {
- $authority = '';
- if($userinfo !== null) $authority .= $userinfo . '@';
- $authority .= $host;
- if($port !== null) $authority .= ':' . $port;
+ // munge scheme off if necessary (this must be last)
+ if (!is_null($uri->scheme) && is_null($uri->host)) {
+ if ($uri_def->defaultScheme == $uri->scheme) {
+ $uri->scheme = null;
+ }
}
- // reconstruct the result
- $result = '';
- if ($scheme !== null) $result .= "$scheme:";
- if ($authority !== null) $result .= "//$authority";
- $result .= $path;
- if ($query !== null) $result .= "?$query";
- if ($fragment !== null) $result .= "#$fragment";
+ // back to string
+ $result = $uri->toString();
- // munge if necessary
- $munge = $config->get('URI', 'Munge');
- if (!empty($scheme_obj->browsable) && $munge !== null) {
- if ($authority !== null) {
- $result = str_replace('%s', rawurlencode($result), $munge);
- }
+ // munge entire URI if necessary
+ if (
+ !is_null($uri->host) && // indicator for authority
+ !empty($scheme_obj->browsable) &&
+ !is_null($munge = $config->get('URI', 'Munge'))
+ ) {
+ $result = str_replace('%s', rawurlencode($result), $munge);
}
return $result;
}
- /**
- * Checks a host against an array blacklist
- * @param $host Host to check
- * @param $config HTMLPurifier_Config instance
- * @param $context HTMLPurifier_Context instance
- * @return bool Is spam?
- */
- function checkBlacklist($host, &$config, &$context) {
- $blacklist = $config->get('URI', 'HostBlacklist');
- if (!empty($blacklist)) {
- foreach($blacklist as $blacklisted_host_fragment) {
- if (strpos($host, $blacklisted_host_fragment) !== false) {
- return true;
- }
- }
- }
- return false;
- }
-
}
-?>
+
}
-?>
\ No newline at end of file
}
-?>
\ No newline at end of file
}
-?>
\ No newline at end of file
*/
var $ip4;
- function HTMLPurifier_AttrDef_URI_IPv4() {
- $oct = '(?:25[0-5]|2[0-4][0-9]|1[0-9]{2}|[1-9][0-9]|[0-9])'; // 0-255
- $this->ip4 = "(?:{$oct}\\.{$oct}\\.{$oct}\\.{$oct})";
- }
-
function validate($aIP, $config, &$context) {
+ if (!$this->ip4) $this->_loadRegex();
+
if (preg_match('#^' . $this->ip4 . '$#s', $aIP))
{
return $aIP;
}
+ /**
+ * Lazy load function to prevent regex from being stuffed in
+ * cache.
+ */
+ function _loadRegex() {
+ $oct = '(?:25[0-5]|2[0-4][0-9]|1[0-9]{2}|[1-9][0-9]|[0-9])'; // 0-255
+ $this->ip4 = "(?:{$oct}\\.{$oct}\\.{$oct}\\.{$oct})";
+ }
+
}
-?>
\ No newline at end of file
function validate($aIP, $config, &$context) {
+ if (!$this->ip4) $this->_loadRegex();
+
$original = $aIP;
$hex = '[0-9a-fA-F]';
}
-?>
\ No newline at end of file
}
-?>
\ No newline at end of file
}
-?>
\ No newline at end of file
}
-?>
\ No newline at end of file
}
-?>
\ No newline at end of file
}
-?>
\ No newline at end of file
}
-?>
\ No newline at end of file
);
/**
- * Post-transform that ensures the required attrs of img (alt and src) are set
+ * Transform that supplies default values for the src and alt attributes
+ * in img tags, as well as prevents the img tag from being removed
+ * because of a missing alt tag. This needs to be registered as both
+ * a pre and post attribute transform.
*/
class HTMLPurifier_AttrTransform_ImgRequired extends HTMLPurifier_AttrTransform
{
$src = true;
if (!isset($attr['src'])) {
+ if ($config->get('Core', 'RemoveInvalidImg')) return $attr;
$attr['src'] = $config->get('Attr', 'DefaultInvalidImage');
$src = false;
}
}
-?>
\ No newline at end of file
}
-?>
\ No newline at end of file
}
-?>
\ No newline at end of file
}
-?>
\ No newline at end of file
}
-?>
\ No newline at end of file
+++ /dev/null
-<?php
-
-require_once 'HTMLPurifier/AttrTransform.php';
-
-/**
- * Pre-transform that changes deprecated align attribute to text-align.
- */
-class HTMLPurifier_AttrTransform_TextAlign
-extends HTMLPurifier_AttrTransform {
-
- function transform($attr, $config, &$context) {
-
- if (!isset($attr['align'])) return $attr;
-
- $align = strtolower(trim($attr['align']));
- unset($attr['align']);
-
- $values = array('left' => 1,
- 'right' => 1,
- 'center' => 1,
- 'justify' => 1);
-
- if (!isset($values[$align])) {
- return $attr;
- }
-
- $attr['style'] = isset($attr['style']) ? $attr['style'] : '';
- $attr['style'] = "text-align:$align;" . $attr['style'];
-
- return $attr;
-
- }
-
-}
-
-?>
\ No newline at end of file
<?php
+require_once 'HTMLPurifier/AttrDef/Lang.php';
+require_once 'HTMLPurifier/AttrDef/Enum.php';
+require_once 'HTMLPurifier/AttrDef/HTML/Bool.php';
require_once 'HTMLPurifier/AttrDef/HTML/ID.php';
require_once 'HTMLPurifier/AttrDef/HTML/Length.php';
require_once 'HTMLPurifier/AttrDef/HTML/MultiLength.php';
require_once 'HTMLPurifier/AttrDef/HTML/Nmtokens.php';
require_once 'HTMLPurifier/AttrDef/HTML/Pixels.php';
+require_once 'HTMLPurifier/AttrDef/HTML/Color.php';
require_once 'HTMLPurifier/AttrDef/Integer.php';
require_once 'HTMLPurifier/AttrDef/Text.php';
require_once 'HTMLPurifier/AttrDef/URI.php';
{
/**
* Lookup array of attribute string identifiers to concrete implementations
- * @public
+ * @protected
*/
var $info = array();
/**
- * Constructs the info array
+ * Constructs the info array, supplying default implementations for attribute
+ * types.
*/
function HTMLPurifier_AttrTypes() {
+ // pseudo-types, must be instantiated via shorthand
+ $this->info['Enum'] = new HTMLPurifier_AttrDef_Enum();
+ $this->info['Bool'] = new HTMLPurifier_AttrDef_HTML_Bool();
+
$this->info['CDATA'] = new HTMLPurifier_AttrDef_Text();
$this->info['ID'] = new HTMLPurifier_AttrDef_HTML_ID();
$this->info['Length'] = new HTMLPurifier_AttrDef_HTML_Length();
$this->info['Pixels'] = new HTMLPurifier_AttrDef_HTML_Pixels();
$this->info['Text'] = new HTMLPurifier_AttrDef_Text();
$this->info['URI'] = new HTMLPurifier_AttrDef_URI();
+ $this->info['LanguageCode'] = new HTMLPurifier_AttrDef_Lang();
+ $this->info['Color'] = new HTMLPurifier_AttrDef_HTML_Color();
// number is really a positive integer (one or more digits)
+ // FIXME: ^^ not always, see start and value of list items
$this->info['Number'] = new HTMLPurifier_AttrDef_Integer(false, false, true);
}
+
+ /**
+ * Retrieves a type
+ * @param $type String type name
+ * @return Object AttrDef for type
+ */
+ function get($type) {
+
+ // determine if there is any extra info tacked on
+ if (strpos($type, '#') !== false) list($type, $string) = explode('#', $type, 2);
+ else $string = '';
+
+ if (!isset($this->info[$type])) {
+ trigger_error('Cannot retrieve undefined attribute type ' . $type, E_USER_ERROR);
+ return;
+ }
+
+ return $this->info[$type]->make($string);
+
+ }
+
+ /**
+ * Sets a new implementation for a type
+ * @param $type String type name
+ * @param $impl Object AttrDef for type
+ */
+ function set($type, $impl) {
+ $this->info[$type] = $impl;
+ }
}
-?>
+
--- /dev/null
+<?php
+
+/**
+ * Validates the attributes of a token. Doesn't manage required attributes
+ * very well. The only reason we factored this out was because RemoveForeignElements
+ * also needed it besides ValidateAttributes.
+ */
+class HTMLPurifier_AttrValidator
+{
+
+ /**
+ * Validates the attributes of a token, returning a modified token
+ * that has valid tokens
+ * @param $token Reference to token to validate. We require a reference
+ * because the operation this class performs on the token are
+ * not atomic, so the context CurrentToken to be updated
+ * throughout
+ * @param $config Instance of HTMLPurifier_Config
+ * @param $context Instance of HTMLPurifier_Context
+ */
+ function validateToken(&$token, &$config, &$context) {
+
+ $definition = $config->getHTMLDefinition();
+ $e =& $context->get('ErrorCollector', true);
+
+ // initialize CurrentToken if necessary
+ $current_token =& $context->get('CurrentToken', true);
+ if (!$current_token) $context->register('CurrentToken', $token);
+
+ if ($token->type !== 'start' && $token->type !== 'empty') return $token;
+
+ // create alias to global definition array, see also $defs
+ // DEFINITION CALL
+ $d_defs = $definition->info_global_attr;
+
+ // reference attributes for easy manipulation
+ $attr =& $token->attr;
+
+ // do global transformations (pre)
+ // nothing currently utilizes this
+ foreach ($definition->info_attr_transform_pre as $transform) {
+ $attr = $transform->transform($o = $attr, $config, $context);
+ if ($e && ($attr != $o)) $e->send(E_NOTICE, 'AttrValidator: Attributes transformed', $o, $attr);
+ }
+
+ // do local transformations only applicable to this element (pre)
+ // ex. <p align="right"> to <p style="text-align:right;">
+ foreach ($definition->info[$token->name]->attr_transform_pre as $transform) {
+ $attr = $transform->transform($o = $attr, $config, $context);
+ if ($e && ($attr != $o)) $e->send(E_NOTICE, 'AttrValidator: Attributes transformed', $o, $attr);
+ }
+
+ // create alias to this element's attribute definition array, see
+ // also $d_defs (global attribute definition array)
+ // DEFINITION CALL
+ $defs = $definition->info[$token->name]->attr;
+
+ $attr_key = false;
+ $context->register('CurrentAttr', $attr_key);
+
+ // iterate through all the attribute keypairs
+ // Watch out for name collisions: $key has previously been used
+ foreach ($attr as $attr_key => $value) {
+
+ // call the definition
+ if ( isset($defs[$attr_key]) ) {
+ // there is a local definition defined
+ if ($defs[$attr_key] === false) {
+ // We've explicitly been told not to allow this element.
+ // This is usually when there's a global definition
+ // that must be overridden.
+ // Theoretically speaking, we could have a
+ // AttrDef_DenyAll, but this is faster!
+ $result = false;
+ } else {
+ // validate according to the element's definition
+ $result = $defs[$attr_key]->validate(
+ $value, $config, $context
+ );
+ }
+ } elseif ( isset($d_defs[$attr_key]) ) {
+ // there is a global definition defined, validate according
+ // to the global definition
+ $result = $d_defs[$attr_key]->validate(
+ $value, $config, $context
+ );
+ } else {
+ // system never heard of the attribute? DELETE!
+ $result = false;
+ }
+
+ // put the results into effect
+ if ($result === false || $result === null) {
+ // this is a generic error message that should replaced
+ // with more specific ones when possible
+ if ($e) $e->send(E_ERROR, 'AttrValidator: Attribute removed');
+
+ // remove the attribute
+ unset($attr[$attr_key]);
+ } elseif (is_string($result)) {
+ // generally, if a substitution is happening, there
+ // was some sort of implicit correction going on. We'll
+ // delegate it to the attribute classes to say exactly what.
+
+ // simple substitution
+ $attr[$attr_key] = $result;
+ }
+
+ // we'd also want slightly more complicated substitution
+ // involving an array as the return value,
+ // although we're not sure how colliding attributes would
+ // resolve (certain ones would be completely overriden,
+ // others would prepend themselves).
+ }
+
+ $context->destroy('CurrentAttr');
+
+ // post transforms
+
+ // global (error reporting untested)
+ foreach ($definition->info_attr_transform_post as $transform) {
+ $attr = $transform->transform($o = $attr, $config, $context);
+ if ($e && ($attr != $o)) $e->send(E_NOTICE, 'AttrValidator: Attributes transformed', $o, $attr);
+ }
+
+ // local (error reporting untested)
+ foreach ($definition->info[$token->name]->attr_transform_post as $transform) {
+ $attr = $transform->transform($o = $attr, $config, $context);
+ if ($e && ($attr != $o)) $e->send(E_NOTICE, 'AttrValidator: Attributes transformed', $o, $attr);
+ }
+
+ // destroy CurrentToken if we made it ourselves
+ if (!$current_token) $context->destroy('CurrentToken');
+
+ }
+
+
+}
+
<?php
+require_once 'HTMLPurifier/Definition.php';
+
require_once 'HTMLPurifier/AttrDef/CSS/Background.php';
require_once 'HTMLPurifier/AttrDef/CSS/BackgroundPosition.php';
require_once 'HTMLPurifier/AttrDef/CSS/Border.php';
require_once 'HTMLPurifier/AttrDef/CSS/URI.php';
require_once 'HTMLPurifier/AttrDef/Enum.php';
+HTMLPurifier_ConfigSchema::define(
+ 'CSS', 'DefinitionRev', 1, 'int', '
+<p>
+ Revision identifier for your custom definition. See
+ %HTML.DefinitionRev for details. This directive has been available
+ since 2.0.0.
+</p>
+');
+
/**
* Defines allowed CSS attributes and what their values are.
* @see HTMLPurifier_HTMLDefinition
*/
-class HTMLPurifier_CSSDefinition
+class HTMLPurifier_CSSDefinition extends HTMLPurifier_Definition
{
+ var $type = 'CSS';
+
/**
* Assoc array of attribute name to definition object.
*/
/**
* Constructs the info array. The meat of this class.
*/
- function setup($config) {
+ function doSetup($config) {
$this->info['text-align'] = new HTMLPurifier_AttrDef_Enum(
array('left', 'right', 'center', 'justify'), false);
}
-?>
\ No newline at end of file
*/
var $allow_empty;
+ /**
+ * Lookup array of all elements that this definition could possibly allow
+ */
+ var $elements = array();
+
/**
* Validates nodes according to definition and returns modification.
*
}
}
-?>
+
function HTMLPurifier_ChildDef_Chameleon($inline, $block) {
$this->inline = new HTMLPurifier_ChildDef_Optional($inline);
$this->block = new HTMLPurifier_ChildDef_Optional($block);
+ $this->elements = $this->block->elements;
}
function validateChildren($tokens_of_children, $config, &$context) {
}
}
-?>
\ No newline at end of file
if ($raw{0} != '(') {
$raw = "($raw)";
}
- $reg = str_replace(',', ',?', $raw);
- $reg = preg_replace('/([#a-zA-Z0-9_.-]+)/', '(,?\\0)', $reg);
+ $el = '[#a-zA-Z0-9_.-]+';
+ $reg = $raw;
+
+ // COMPLICATED! AND MIGHT BE BUGGY! I HAVE NO CLUE WHAT I'M
+ // DOING! Seriously: if there's problems, please report them.
+
+ // collect all elements into the $elements array
+ preg_match_all("/$el/", $reg, $matches);
+ foreach ($matches[0] as $match) {
+ $this->elements[$match] = true;
+ }
+
+ // setup all elements as parentheticals with leading commas
+ $reg = preg_replace("/$el/", '(,\\0)', $reg);
+
+ // remove commas when they were not solicited
+ $reg = preg_replace("/([^,(|]\(+),/", '\\1', $reg);
+
+ // remove all non-paranthetical commas: they are handled by first regex
+ $reg = preg_replace("/,\(/", '(', $reg);
+
$this->_pcre_regex = $reg;
}
function validateChildren($tokens_of_children, $config, &$context) {
$list_of_children .= $token->name . ',';
}
}
- $list_of_children = rtrim($list_of_children, ',');
-
+ // add leading comma to deal with stray comma declarations
+ $list_of_children = ',' . rtrim($list_of_children, ',');
$okay =
preg_match(
- '/^'.$this->_pcre_regex.'$/',
+ '/^,?'.$this->_pcre_regex.'$/',
$list_of_children
);
}
}
-?>
\ No newline at end of file
}
}
-?>
\ No newline at end of file
}
}
-?>
\ No newline at end of file
$elements = array_flip($elements);
foreach ($elements as $i => $x) {
$elements[$i] = true;
- if (empty($i)) unset($elements[$i]);
+ if (empty($i)) unset($elements[$i]); // remove blank
}
}
$this->elements = $elements;
- $this->gen = new HTMLPurifier_Generator();
}
var $allow_empty = false;
var $type = 'required';
// some configuration
$escape_invalid_children = $config->get('Core', 'EscapeInvalidChildren');
+ // generator
+ static $gen = null;
+ if ($gen === null) {
+ $gen = new HTMLPurifier_Generator();
+ }
+
foreach ($tokens_of_children as $token) {
if (!empty($token->is_whitespace)) {
$result[] = $token;
$result[] = $token;
} elseif ($pcdata_allowed && $escape_invalid_children) {
$result[] = new HTMLPurifier_Token_Text(
- $this->gen->generateFromToken($token, $config)
+ $gen->generateFromToken($token, $config)
);
}
continue;
} elseif ($pcdata_allowed && $escape_invalid_children) {
$result[] =
new HTMLPurifier_Token_Text(
- $this->gen->generateFromToken( $token, $config )
+ $gen->generateFromToken( $token, $config )
);
} else {
// drop silently
}
}
-?>
\ No newline at end of file
if (!$is_inline) {
if (!$depth) {
if (
- $token->type == 'text' ||
- !isset($this->elements[$token->name])
+ ($token->type == 'text' && !$token->is_whitespace) ||
+ ($token->type != 'text' && !isset($this->elements[$token->name]))
) {
$is_inline = true;
$ret[] = $block_wrap_start;
}
}
-?>
\ No newline at end of file
{
var $allow_empty = false;
var $type = 'table';
+ var $elements = array('tr' => true, 'tbody' => true, 'thead' => true,
+ 'tfoot' => true, 'caption' => true, 'colgroup' => true, 'col' => true);
function HTMLPurifier_ChildDef_Table() {}
function validateChildren($tokens_of_children, $config, &$context) {
if (empty($tokens_of_children)) return false;
}
}
-?>
\ No newline at end of file
<?php
+require_once 'HTMLPurifier/ConfigSchema.php';
+
+// member variables
+require_once 'HTMLPurifier/HTMLDefinition.php';
+require_once 'HTMLPurifier/CSSDefinition.php';
+require_once 'HTMLPurifier/URIDefinition.php';
+require_once 'HTMLPurifier/Doctype.php';
+require_once 'HTMLPurifier/DefinitionCacheFactory.php';
+
+// accomodations for versions earlier than 4.3.10 and 5.0.2
+// borrowed from PHP_Compat, LGPL licensed, by Aidan Lister <aidan@php.net>
+if (!defined('PHP_EOL')) {
+ switch (strtoupper(substr(PHP_OS, 0, 3))) {
+ case 'WIN':
+ define('PHP_EOL', "\r\n");
+ break;
+ case 'DAR':
+ define('PHP_EOL', "\r");
+ break;
+ default:
+ define('PHP_EOL', "\n");
+ }
+}
+
/**
* Configuration object that triggers customizable behavior.
*
class HTMLPurifier_Config
{
+ /**
+ * HTML Purifier's version
+ */
+ var $version = '2.1.1';
+
/**
* Two-level associative array of configuration directives
*/
var $def;
/**
- * Cached instance of HTMLPurifier_HTMLDefinition
+ * Indexed array of definitions
+ */
+ var $definitions;
+
+ /**
+ * Bool indicator whether or not config is finalized
*/
- var $html_definition;
+ var $finalized = false;
/**
- * Cached instance of HTMLPurifier_CSSDefinition
+ * Bool indicator whether or not to automatically finalize
+ * the object if a read operation is done
*/
- var $css_definition;
+ var $autoFinalize = true;
+
+ /**
+ * Namespace indexed array of serials for specific namespaces (see
+ * getSerial for more info).
+ */
+ var $serials = array();
+
+ /**
+ * Serial for entire configuration object
+ */
+ var $serial;
/**
* @param $definition HTMLPurifier_ConfigSchema that defines what directives
* @return Configured HTMLPurifier_Config object
*/
function create($config) {
- if (is_a($config, 'HTMLPurifier_Config')) return $config;
+ if (is_a($config, 'HTMLPurifier_Config')) {
+ // pass-through
+ return $config;
+ }
$ret = HTMLPurifier_Config::createDefault();
if (is_string($config)) $ret->loadIni($config);
elseif (is_array($config)) $ret->loadArray($config);
* @param $key String key
*/
function get($namespace, $key, $from_alias = false) {
+ if (!$this->finalized && $this->autoFinalize) $this->finalize();
if (!isset($this->def->info[$namespace][$key])) {
- trigger_error('Cannot retrieve value of undefined directive',
+ // can't add % due to SimpleTest bug
+ trigger_error('Cannot retrieve value of undefined directive ' . htmlspecialchars("$namespace.$key"),
E_USER_WARNING);
return;
}
if ($this->def->info[$namespace][$key]->class == 'alias') {
- trigger_error('Cannot get value from aliased directive, use real name',
+ $d = $this->def->info[$namespace][$key];
+ trigger_error('Cannot get value from aliased directive, use real name ' . $d->namespace . '.' . $d->name,
E_USER_ERROR);
return;
}
* @param $namespace String namespace
*/
function getBatch($namespace) {
+ if (!$this->finalized && $this->autoFinalize) $this->finalize();
if (!isset($this->def->info[$namespace])) {
- trigger_error('Cannot retrieve undefined namespace',
+ trigger_error('Cannot retrieve undefined namespace ' . htmlspecialchars($namespace),
E_USER_WARNING);
return;
}
return $this->conf[$namespace];
}
+ /**
+ * Returns a md5 signature of a segment of the configuration object
+ * that uniquely identifies that particular configuration
+ * @note Revision is handled specially and is removed from the batch
+ * before processing!
+ * @param $namespace Namespace to get serial for
+ */
+ function getBatchSerial($namespace) {
+ if (empty($this->serials[$namespace])) {
+ $batch = $this->getBatch($namespace);
+ unset($batch['DefinitionRev']);
+ $this->serials[$namespace] = md5(serialize($batch));
+ }
+ return $this->serials[$namespace];
+ }
+
+ /**
+ * Returns a md5 signature for the entire configuration object
+ * that uniquely identifies that particular configuration
+ */
+ function getSerial() {
+ if (empty($this->serial)) {
+ $this->serial = md5(serialize($this->getAll()));
+ }
+ return $this->serial;
+ }
+
+ /**
+ * Retrieves all directives, organized by namespace
+ */
+ function getAll() {
+ if (!$this->finalized && $this->autoFinalize) $this->finalize();
+ return $this->conf;
+ }
+
/**
* Sets a value to configuration.
* @param $namespace String namespace
* @param $value Mixed value
*/
function set($namespace, $key, $value, $from_alias = false) {
+ if ($this->isFinalized('Cannot set directive after finalization')) return;
if (!isset($this->def->info[$namespace][$key])) {
- trigger_error('Cannot set undefined directive to value',
+ trigger_error('Cannot set undefined directive ' . htmlspecialchars("$namespace.$key") . ' to value',
E_USER_WARNING);
return;
}
if ($this->def->info[$namespace][$key]->class == 'alias') {
if ($from_alias) {
trigger_error('Double-aliases not allowed, please fix '.
- 'ConfigSchema bug');
+ 'ConfigSchema bug with' . "$namespace.$key");
}
$this->set($this->def->info[$namespace][$key]->namespace,
$this->def->info[$namespace][$key]->name,
}
$value = $this->def->validate(
$value,
- $this->def->info[$namespace][$key]->type,
+ $type = $this->def->info[$namespace][$key]->type,
$this->def->info[$namespace][$key]->allow_null
);
if (is_string($value)) {
if ($this->def->info[$namespace][$key]->allowed !== true) {
// check to see if the value is allowed
if (!isset($this->def->info[$namespace][$key]->allowed[$value])) {
- trigger_error('Value not supported', E_USER_WARNING);
+ trigger_error('Value not supported, valid values are: ' .
+ $this->_listify($this->def->info[$namespace][$key]->allowed), E_USER_WARNING);
return;
}
}
}
if ($this->def->isError($value)) {
- trigger_error('Value is of invalid type', E_USER_WARNING);
+ trigger_error('Value for ' . "$namespace.$key" . ' is of invalid type, should be ' . $type, E_USER_WARNING);
return;
}
$this->conf[$namespace][$key] = $value;
- if ($namespace == 'HTML' || $namespace == 'Attr') {
- // reset HTML definition if relevant attributes changed
- $this->html_definition = null;
- }
- if ($namespace == 'CSS') {
- $this->css_definition = null;
+
+ // reset definitions if the directives they depend on changed
+ // this is a very costly process, so it's discouraged
+ // with finalization
+ if ($namespace == 'HTML' || $namespace == 'CSS') {
+ $this->definitions[$namespace] = null;
}
+
+ $this->serials[$namespace] = false;
+ }
+
+ /**
+ * Convenience function for error reporting
+ * @private
+ */
+ function _listify($lookup) {
+ $list = array();
+ foreach ($lookup as $name => $b) $list[] = $name;
+ return implode(', ', $list);
}
/**
* called before it's been setup, otherwise won't work.
*/
function &getHTMLDefinition($raw = false) {
- if (
- empty($this->html_definition) || // hasn't ever been setup
- ($raw && $this->html_definition->setup) // requesting new one
- ) {
- $this->html_definition = new HTMLPurifier_HTMLDefinition($this);
- if ($raw) return $this->html_definition; // no setup!
- }
- if (!$this->html_definition->setup) $this->html_definition->setup();
- return $this->html_definition;
+ $def =& $this->getDefinition('HTML', $raw);
+ return $def; // prevent PHP 4.4.0 from complaining
}
/**
* Retrieves reference to the CSS definition
*/
- function &getCSSDefinition() {
- if ($this->css_definition === null) {
- $this->css_definition = new HTMLPurifier_CSSDefinition();
- $this->css_definition->setup($this);
+ function &getCSSDefinition($raw = false) {
+ $def =& $this->getDefinition('CSS', $raw);
+ return $def;
+ }
+
+ /**
+ * Retrieves a definition
+ * @param $type Type of definition: HTML, CSS, etc
+ * @param $raw Whether or not definition should be returned raw
+ */
+ function &getDefinition($type, $raw = false) {
+ if (!$this->finalized && $this->autoFinalize) $this->finalize();
+ $factory = HTMLPurifier_DefinitionCacheFactory::instance();
+ $cache = $factory->create($type, $this);
+ if (!$raw) {
+ // see if we can quickly supply a definition
+ if (!empty($this->definitions[$type])) {
+ if (!$this->definitions[$type]->setup) {
+ $this->definitions[$type]->setup($this);
+ $cache->set($this->definitions[$type], $this);
+ }
+ return $this->definitions[$type];
+ }
+ // memory check missed, try cache
+ $this->definitions[$type] = $cache->get($this);
+ if ($this->definitions[$type]) {
+ // definition in cache, return it
+ return $this->definitions[$type];
+ }
+ } elseif (
+ !empty($this->definitions[$type]) &&
+ !$this->definitions[$type]->setup
+ ) {
+ // raw requested, raw in memory, quick return
+ return $this->definitions[$type];
+ }
+ // quick checks failed, let's create the object
+ if ($type == 'HTML') {
+ $this->definitions[$type] = new HTMLPurifier_HTMLDefinition();
+ } elseif ($type == 'CSS') {
+ $this->definitions[$type] = new HTMLPurifier_CSSDefinition();
+ } elseif ($type == 'URI') {
+ $this->definitions[$type] = new HTMLPurifier_URIDefinition();
+ } else {
+ trigger_error("Definition of $type type not supported");
+ $false = false;
+ return $false;
+ }
+ // quick abort if raw
+ if ($raw) {
+ if (is_null($this->get($type, 'DefinitionID'))) {
+ // fatally error out if definition ID not set
+ trigger_error("Cannot retrieve raw version without specifying %$type.DefinitionID", E_USER_ERROR);
+ $false = new HTMLPurifier_Error();
+ return $false;
+ }
+ return $this->definitions[$type];
}
- return $this->css_definition;
+ // set it up
+ $this->definitions[$type]->setup($this);
+ // save in cache
+ $cache->set($this->definitions[$type], $this);
+ return $this->definitions[$type];
}
/**
* @param $config_array Configuration associative array
*/
function loadArray($config_array) {
+ if ($this->isFinalized('Cannot load directives after finalization')) return;
foreach ($config_array as $key => $value) {
$key = str_replace('_', '.', $key);
if (strpos($key, '.') !== false) {
}
}
+ /**
+ * Returns a list of array(namespace, directive) for all directives
+ * that are allowed in a web-form context as per an allowed
+ * namespaces/directives list.
+ * @param $allowed List of allowed namespaces/directives
+ * @static
+ */
+ function getAllowedDirectivesForForm($allowed) {
+ $schema = HTMLPurifier_ConfigSchema::instance();
+ if ($allowed !== true) {
+ if (is_string($allowed)) $allowed = array($allowed);
+ $allowed_ns = array();
+ $allowed_directives = array();
+ $blacklisted_directives = array();
+ foreach ($allowed as $ns_or_directive) {
+ if (strpos($ns_or_directive, '.') !== false) {
+ // directive
+ if ($ns_or_directive[0] == '-') {
+ $blacklisted_directives[substr($ns_or_directive, 1)] = true;
+ } else {
+ $allowed_directives[$ns_or_directive] = true;
+ }
+ } else {
+ // namespace
+ $allowed_ns[$ns_or_directive] = true;
+ }
+ }
+ }
+ $ret = array();
+ foreach ($schema->info as $ns => $keypairs) {
+ foreach ($keypairs as $directive => $def) {
+ if ($allowed !== true) {
+ if (isset($blacklisted_directives["$ns.$directive"])) continue;
+ if (!isset($allowed_directives["$ns.$directive"]) && !isset($allowed_ns[$ns])) continue;
+ }
+ if ($def->class == 'alias') continue;
+ if ($directive == 'DefinitionID' || $directive == 'DefinitionRev') continue;
+ $ret[] = array($ns, $directive);
+ }
+ }
+ return $ret;
+ }
+
+ /**
+ * Loads configuration values from $_GET/$_POST that were posted
+ * via ConfigForm
+ * @param $array $_GET or $_POST array to import
+ * @param $index Index/name that the config variables are in
+ * @param $allowed List of allowed namespaces/directives
+ * @param $mq_fix Boolean whether or not to enable magic quotes fix
+ * @static
+ */
+ function loadArrayFromForm($array, $index, $allowed = true, $mq_fix = true) {
+ $ret = HTMLPurifier_Config::prepareArrayFromForm($array, $index, $allowed, $mq_fix);
+ $config = HTMLPurifier_Config::create($ret);
+ return $config;
+ }
+
+ /**
+ * Merges in configuration values from $_GET/$_POST to object. NOT STATIC.
+ * @note Same parameters as loadArrayFromForm
+ */
+ function mergeArrayFromForm($array, $index, $allowed = true, $mq_fix = true) {
+ $ret = HTMLPurifier_Config::prepareArrayFromForm($array, $index, $allowed, $mq_fix);
+ $this->loadArray($ret);
+ }
+
+ /**
+ * Prepares an array from a form into something usable for the more
+ * strict parts of HTMLPurifier_Config
+ * @static
+ */
+ function prepareArrayFromForm($array, $index, $allowed = true, $mq_fix = true) {
+ $array = (isset($array[$index]) && is_array($array[$index])) ? $array[$index] : array();
+ $mq = get_magic_quotes_gpc() && $mq_fix;
+
+ $allowed = HTMLPurifier_Config::getAllowedDirectivesForForm($allowed);
+ $ret = array();
+ foreach ($allowed as $key) {
+ list($ns, $directive) = $key;
+ $skey = "$ns.$directive";
+ if (!empty($array["Null_$skey"])) {
+ $ret[$ns][$directive] = null;
+ continue;
+ }
+ if (!isset($array[$skey])) continue;
+ $value = $mq ? stripslashes($array[$skey]) : $array[$skey];
+ $ret[$ns][$directive] = $value;
+ }
+ return $ret;
+ }
+
/**
* Loads configuration values from an ini file
* @param $filename Name of ini file
*/
function loadIni($filename) {
+ if ($this->isFinalized('Cannot load directives after finalization')) return;
$array = parse_ini_file($filename, true);
$this->loadArray($array);
}
+ /**
+ * Checks whether or not the configuration object is finalized.
+ * @param $error String error message, or false for no error
+ */
+ function isFinalized($error = false) {
+ if ($this->finalized && $error) {
+ trigger_error($error, E_USER_ERROR);
+ }
+ return $this->finalized;
+ }
+
+ /**
+ * Finalizes configuration only if auto finalize is on and not
+ * already finalized
+ */
+ function autoFinalize() {
+ if (!$this->finalized && $this->autoFinalize) $this->finalize();
+ }
+
+ /**
+ * Finalizes a configuration object, prohibiting further change
+ */
+ function finalize() {
+ $this->finalized = true;
+ }
+
}
-?>
+
var $class = false;
}
-?>
\ No newline at end of file
*/
var $aliases = array();
+ /**
+ * Advisory list of directive aliases, i.e. other directives that
+ * redirect here
+ */
+ var $directiveAliases = array();
+
/**
* Adds a description to the array
*/
}
-?>
\ No newline at end of file
}
}
-?>
\ No newline at end of file
}
-?>
\ No newline at end of file
require_once 'HTMLPurifier/ConfigDef/Directive.php';
require_once 'HTMLPurifier/ConfigDef/DirectiveAlias.php';
+if (!defined('HTMLPURIFIER_SCHEMA_STRICT')) define('HTMLPURIFIER_SCHEMA_STRICT', false);
+
/**
* Configuration definition, defines directives and their defaults.
+ * @note If you update this, please update Printer_ConfigForm
* @todo The ability to define things multiple times is confusing and should
* be factored out to its own function named registerDependency() or
* addNote(), where only the namespace.name and an extra descriptions
var $types = array(
'string' => 'String',
'istring' => 'Case-insensitive string',
+ 'text' => 'Text',
+ 'itext' => 'Case-insensitive text',
'int' => 'Integer',
'float' => 'Float',
'bool' => 'Boolean',
$this->defineNamespace('URI', 'Features regarding Uniform Resource Identifiers.');
$this->defineNamespace('HTML', 'Configuration regarding allowed HTML.');
$this->defineNamespace('CSS', 'Configuration regarding allowed CSS.');
+ $this->defineNamespace('AutoFormat', 'Configuration for activating auto-formatting functionality (also known as <code>Injector</code>s)');
+ $this->defineNamespace('AutoFormatParam', 'Configuration for customizing auto-formatting functionality');
+ $this->defineNamespace('Output', 'Configuration relating to the generation of (X)HTML.');
+ $this->defineNamespace('Cache', 'Configuration for DefinitionCache and related subclasses.');
$this->defineNamespace('Test', 'Developer testing configuration for our unit tests.');
}
* HTMLPurifier_DirectiveDef::$type for allowed values
* @param $description Description of directive for documentation
*/
- function define(
- $namespace, $name, $default, $type,
- $description
- ) {
+ function define($namespace, $name, $default, $type, $description) {
$def =& HTMLPurifier_ConfigSchema::instance();
- if (!isset($def->info[$namespace])) {
- trigger_error('Cannot define directive for undefined namespace',
- E_USER_ERROR);
- return;
- }
- if (!ctype_alnum($name)) {
- trigger_error('Directive name must be alphanumeric',
- E_USER_ERROR);
- return;
- }
- if (empty($description)) {
- trigger_error('Description must be non-empty',
- E_USER_ERROR);
- return;
+
+ // basic sanity checks
+ if (HTMLPURIFIER_SCHEMA_STRICT) {
+ if (!isset($def->info[$namespace])) {
+ trigger_error('Cannot define directive for undefined namespace',
+ E_USER_ERROR);
+ return;
+ }
+ if (!ctype_alnum($name)) {
+ trigger_error('Directive name must be alphanumeric',
+ E_USER_ERROR);
+ return;
+ }
+ if (empty($description)) {
+ trigger_error('Description must be non-empty',
+ E_USER_ERROR);
+ return;
+ }
}
+
if (isset($def->info[$namespace][$name])) {
+ // already defined
if (
$def->info[$namespace][$name]->type !== $type ||
$def->defaults[$namespace][$name] !== $default
return;
}
} else {
- // process modifiers
+ // needs defining
+
+ // process modifiers (OPTIMIZE!)
$type_values = explode('/', $type, 2);
$type = $type_values[0];
$modifier = isset($type_values[1]) ? $type_values[1] : false;
$allow_null = ($modifier === 'null');
- if (!isset($def->types[$type])) {
- trigger_error('Invalid type for configuration directive',
- E_USER_ERROR);
- return;
- }
- $default = $def->validate($default, $type, $allow_null);
- if ($def->isError($default)) {
- trigger_error('Default value does not match directive type',
- E_USER_ERROR);
- return;
+ if (HTMLPURIFIER_SCHEMA_STRICT) {
+ if (!isset($def->types[$type])) {
+ trigger_error('Invalid type for configuration directive',
+ E_USER_ERROR);
+ return;
+ }
+ $default = $def->validate($default, $type, $allow_null);
+ if ($def->isError($default)) {
+ trigger_error('Default value does not match directive type',
+ E_USER_ERROR);
+ return;
+ }
}
+
$def->info[$namespace][$name] =
new HTMLPurifier_ConfigDef_Directive();
$def->info[$namespace][$name]->type = $type;
$def->info[$namespace][$name]->allow_null = $allow_null;
$def->defaults[$namespace][$name] = $default;
}
+ if (!HTMLPURIFIER_SCHEMA_STRICT) return;
$backtrace = debug_backtrace();
$file = $def->mungeFilename($backtrace[0]['file']);
$line = $backtrace[0]['line'];
*/
function defineNamespace($namespace, $description) {
$def =& HTMLPurifier_ConfigSchema::instance();
- if (isset($def->info[$namespace])) {
- trigger_error('Cannot redefine namespace', E_USER_ERROR);
- return;
- }
- if (!ctype_alnum($namespace)) {
- trigger_error('Namespace name must be alphanumeric',
- E_USER_ERROR);
- return;
- }
- if (empty($description)) {
- trigger_error('Description must be non-empty',
- E_USER_ERROR);
- return;
+ if (HTMLPURIFIER_SCHEMA_STRICT) {
+ if (isset($def->info[$namespace])) {
+ trigger_error('Cannot redefine namespace', E_USER_ERROR);
+ return;
+ }
+ if (!ctype_alnum($namespace)) {
+ trigger_error('Namespace name must be alphanumeric',
+ E_USER_ERROR);
+ return;
+ }
+ if (empty($description)) {
+ trigger_error('Description must be non-empty',
+ E_USER_ERROR);
+ return;
+ }
}
$def->info[$namespace] = array();
$def->info_namespace[$namespace] = new HTMLPurifier_ConfigDef_Namespace();
*/
function defineValueAliases($namespace, $name, $aliases) {
$def =& HTMLPurifier_ConfigSchema::instance();
- if (!isset($def->info[$namespace][$name])) {
+ if (HTMLPURIFIER_SCHEMA_STRICT && !isset($def->info[$namespace][$name])) {
trigger_error('Cannot set value alias for non-existant directive',
E_USER_ERROR);
return;
}
foreach ($aliases as $alias => $real) {
- if (!$def->info[$namespace][$name] !== true &&
- !isset($def->info[$namespace][$name]->allowed[$real])
- ) {
- trigger_error('Cannot define alias to value that is not allowed',
- E_USER_ERROR);
- return;
- }
- if (isset($def->info[$namespace][$name]->allowed[$alias])) {
- trigger_error('Cannot define alias over allowed value',
- E_USER_ERROR);
- return;
+ if (HTMLPURIFIER_SCHEMA_STRICT) {
+ if (!$def->info[$namespace][$name] !== true &&
+ !isset($def->info[$namespace][$name]->allowed[$real])
+ ) {
+ trigger_error('Cannot define alias to value that is not allowed',
+ E_USER_ERROR);
+ return;
+ }
+ if (isset($def->info[$namespace][$name]->allowed[$alias])) {
+ trigger_error('Cannot define alias over allowed value',
+ E_USER_ERROR);
+ return;
+ }
}
$def->info[$namespace][$name]->aliases[$alias] = $real;
}
*/
function defineAllowedValues($namespace, $name, $allowed_values) {
$def =& HTMLPurifier_ConfigSchema::instance();
- if (!isset($def->info[$namespace][$name])) {
+ if (HTMLPURIFIER_SCHEMA_STRICT && !isset($def->info[$namespace][$name])) {
trigger_error('Cannot define allowed values for undefined directive',
E_USER_ERROR);
return;
}
$directive =& $def->info[$namespace][$name];
$type = $directive->type;
- if ($type != 'string' && $type != 'istring') {
+ if (HTMLPURIFIER_SCHEMA_STRICT && $type != 'string' && $type != 'istring') {
trigger_error('Cannot define allowed values for directive whose type is not string',
E_USER_ERROR);
return;
foreach ($allowed_values as $value) {
$directive->allowed[$value] = true;
}
- if ($def->defaults[$namespace][$name] !== null &&
- !isset($directive->allowed[$def->defaults[$namespace][$name]])) {
+ if (
+ HTMLPURIFIER_SCHEMA_STRICT &&
+ $def->defaults[$namespace][$name] !== null &&
+ !isset($directive->allowed[$def->defaults[$namespace][$name]])
+ ) {
trigger_error('Default value must be in allowed range of variables',
E_USER_ERROR);
$directive->allowed = true; // undo undo!
*/
function defineAlias($namespace, $name, $new_namespace, $new_name) {
$def =& HTMLPurifier_ConfigSchema::instance();
- if (!isset($def->info[$namespace])) {
- trigger_error('Cannot define directive alias in undefined namespace',
- E_USER_ERROR);
- return;
- }
- if (!ctype_alnum($name)) {
- trigger_error('Directive name must be alphanumeric',
- E_USER_ERROR);
- return;
- }
- if (isset($def->info[$namespace][$name])) {
- trigger_error('Cannot define alias over directive',
- E_USER_ERROR);
- return;
- }
- if (!isset($def->info[$new_namespace][$new_name])) {
- trigger_error('Cannot define alias to undefined directive',
- E_USER_ERROR);
- return;
- }
- if ($def->info[$new_namespace][$new_name]->class == 'alias') {
- trigger_error('Cannot define alias to alias',
- E_USER_ERROR);
- return;
+ if (HTMLPURIFIER_SCHEMA_STRICT) {
+ if (!isset($def->info[$namespace])) {
+ trigger_error('Cannot define directive alias in undefined namespace',
+ E_USER_ERROR);
+ return;
+ }
+ if (!ctype_alnum($name)) {
+ trigger_error('Directive name must be alphanumeric',
+ E_USER_ERROR);
+ return;
+ }
+ if (isset($def->info[$namespace][$name])) {
+ trigger_error('Cannot define alias over directive',
+ E_USER_ERROR);
+ return;
+ }
+ if (!isset($def->info[$new_namespace][$new_name])) {
+ trigger_error('Cannot define alias to undefined directive',
+ E_USER_ERROR);
+ return;
+ }
+ if ($def->info[$new_namespace][$new_name]->class == 'alias') {
+ trigger_error('Cannot define alias to alias',
+ E_USER_ERROR);
+ return;
+ }
}
$def->info[$namespace][$name] =
new HTMLPurifier_ConfigDef_DirectiveAlias(
$new_namespace, $new_name);
+ $def->info[$new_namespace][$new_name]->directiveAliases[] = "$namespace.$name";
}
/**
if ($allow_null && $var === null) return null;
switch ($type) {
case 'mixed':
+ //if (is_string($var)) $var = unserialize($var);
return $var;
case 'istring':
case 'string':
+ case 'text': // no difference, just is longer/multiple line string
+ case 'itext':
if (!is_string($var)) break;
- if ($type === 'istring') $var = strtolower($var);
+ if ($type === 'istring' || $type === 'itext') $var = strtolower($var);
return $var;
case 'int':
if (is_string($var) && ctype_digit($var)) $var = (int) $var;
// a single empty string item, but having an empty
// array is more intuitive
if ($var == '') return array();
- // simplistic string to array method that only works
- // for simple lists of tag names or alphanumeric characters
- $var = explode(',',$var);
+ if (strpos($var, "\n") === false && strpos($var, "\r") === false) {
+ // simplistic string to array method that only works
+ // for simple lists of tag names or alphanumeric characters
+ $var = explode(',',$var);
+ } else {
+ $var = preg_split('/(,|[\n\r]+)/', $var);
+ }
// remove spaces
foreach ($var as $i => $j) $var[$i] = trim($j);
+ if ($type === 'hash') {
+ // key:value,key2:value2
+ $nvar = array();
+ foreach ($var as $keypair) {
+ $c = explode(':', $keypair, 2);
+ if (!isset($c[1])) continue;
+ $nvar[$c[0]] = $c[1];
+ }
+ $var = $nvar;
+ }
}
if (!is_array($var)) break;
$keys = array_keys($var);
* Takes an absolute path and munges it into a more manageable relative path
*/
function mungeFilename($filename) {
+ if (!HTMLPURIFIER_SCHEMA_STRICT) return $filename;
$offset = strrpos($filename, 'HTMLPurifier');
$filename = substr($filename, $offset);
$filename = str_replace('\\', '/', $filename);
}
}
-?>
+
require_once 'HTMLPurifier/ChildDef/Empty.php';
require_once 'HTMLPurifier/ChildDef/Required.php';
require_once 'HTMLPurifier/ChildDef/Optional.php';
+require_once 'HTMLPurifier/ChildDef/Custom.php';
+
+// NOT UNIT TESTED!!!
class HTMLPurifier_ContentSets
{
}
-?>
\ No newline at end of file
/**
* Registry object that contains information about the current context.
+ * @warning Is a bit buggy when variables are set to null: it thinks
+ * they don't exist! So use false instead, please.
*/
class HTMLPurifier_Context
{
*/
function register($name, &$ref) {
if (isset($this->_storage[$name])) {
- trigger_error('Name collision, cannot re-register',
+ trigger_error("Name $name produces collision, cannot re-register",
E_USER_ERROR);
return;
}
/**
* Retrieves a variable reference from the context.
* @param $name String name
+ * @param $ignore_error Boolean whether or not to ignore error
*/
- function &get($name) {
+ function &get($name, $ignore_error = false) {
if (!isset($this->_storage[$name])) {
- trigger_error('Attempted to retrieve non-existent variable',
- E_USER_ERROR);
+ if (!$ignore_error) {
+ trigger_error("Attempted to retrieve non-existent variable $name",
+ E_USER_ERROR);
+ }
$var = null; // so we can return by reference
return $var;
}
*/
function destroy($name) {
if (!isset($this->_storage[$name])) {
- trigger_error('Attempted to destroy non-existent variable',
+ trigger_error("Attempted to destroy non-existent variable $name",
E_USER_ERROR);
return;
}
}
-?>
\ No newline at end of file
--- /dev/null
+<?php
+
+/**
+ * Super-class for definition datatype objects, implements serialization
+ * functions for the class.
+ */
+class HTMLPurifier_Definition
+{
+
+ /**
+ * Has setup() been called yet?
+ */
+ var $setup = false;
+
+ /**
+ * What type of definition is it?
+ */
+ var $type;
+
+ /**
+ * Sets up the definition object into the final form, something
+ * not done by the constructor
+ * @param $config HTMLPurifier_Config instance
+ */
+ function doSetup($config) {
+ trigger_error('Cannot call abstract method', E_USER_ERROR);
+ }
+
+ /**
+ * Setup function that aborts if already setup
+ * @param $config HTMLPurifier_Config instance
+ */
+ function setup($config) {
+ if ($this->setup) return;
+ $this->setup = true;
+ $this->doSetup($config);
+ }
+
+}
+
--- /dev/null
+<?php
+
+require_once 'HTMLPurifier/DefinitionCache/Serializer.php';
+require_once 'HTMLPurifier/DefinitionCache/Null.php';
+
+require_once 'HTMLPurifier/DefinitionCache/Decorator.php';
+require_once 'HTMLPurifier/DefinitionCache/Decorator/Memory.php';
+require_once 'HTMLPurifier/DefinitionCache/Decorator/Cleanup.php';
+
+/**
+ * Abstract class representing Definition cache managers that implements
+ * useful common methods and is a factory.
+ * @todo Get some sort of versioning variable so the library can easily
+ * invalidate the cache with a new version
+ * @todo Make the test runner cache aware and allow the user to easily
+ * flush the cache
+ * @todo Create a separate maintenance file advanced users can use to
+ * cache their custom HTMLDefinition, which can be loaded
+ * via a configuration directive
+ * @todo Implement memcached
+ */
+class HTMLPurifier_DefinitionCache
+{
+
+ var $type;
+
+ /**
+ * @param $name Type of definition objects this instance of the
+ * cache will handle.
+ */
+ function HTMLPurifier_DefinitionCache($type) {
+ $this->type = $type;
+ }
+
+ /**
+ * Generates a unique identifier for a particular configuration
+ * @param Instance of HTMLPurifier_Config
+ */
+ function generateKey($config) {
+ return $config->version . '-' . // possibly replace with function calls
+ $config->getBatchSerial($this->type) . '-' .
+ $config->get($this->type, 'DefinitionRev');
+ }
+
+ /**
+ * Tests whether or not a key is old with respect to the configuration's
+ * version and revision number.
+ * @param $key Key to test
+ * @param $config Instance of HTMLPurifier_Config to test against
+ */
+ function isOld($key, $config) {
+ if (substr_count($key, '-') < 2) return true;
+ list($version, $hash, $revision) = explode('-', $key, 3);
+ $compare = version_compare($version, $config->version);
+ // version mismatch, is always old
+ if ($compare != 0) return true;
+ // versions match, ids match, check revision number
+ if (
+ $hash == $config->getBatchSerial($this->type) &&
+ $revision < $config->get($this->type, 'DefinitionRev')
+ ) return true;
+ return false;
+ }
+
+ /**
+ * Checks if a definition's type jives with the cache's type
+ * @note Throws an error on failure
+ * @param $def Definition object to check
+ * @return Boolean true if good, false if not
+ */
+ function checkDefType($def) {
+ if ($def->type !== $this->type) {
+ trigger_error("Cannot use definition of type {$def->type} in cache for {$this->type}");
+ return false;
+ }
+ return true;
+ }
+
+ /**
+ * Adds a definition object to the cache
+ */
+ function add($def, $config) {
+ trigger_error('Cannot call abstract method', E_USER_ERROR);
+ }
+
+ /**
+ * Unconditionally saves a definition object to the cache
+ */
+ function set($def, $config) {
+ trigger_error('Cannot call abstract method', E_USER_ERROR);
+ }
+
+ /**
+ * Replace an object in the cache
+ */
+ function replace($def, $config) {
+ trigger_error('Cannot call abstract method', E_USER_ERROR);
+ }
+
+ /**
+ * Retrieves a definition object from the cache
+ */
+ function get($config) {
+ trigger_error('Cannot call abstract method', E_USER_ERROR);
+ }
+
+ /**
+ * Removes a definition object to the cache
+ */
+ function remove($config) {
+ trigger_error('Cannot call abstract method', E_USER_ERROR);
+ }
+
+ /**
+ * Clears all objects from cache
+ */
+ function flush($config) {
+ trigger_error('Cannot call abstract method', E_USER_ERROR);
+ }
+
+ /**
+ * Clears all expired (older version or revision) objects from cache
+ */
+ function cleanup($config) {
+ trigger_error('Cannot call abstract method', E_USER_ERROR);
+ }
+}
+
--- /dev/null
+<?php
+
+require_once 'HTMLPurifier/DefinitionCache.php';
+
+class HTMLPurifier_DefinitionCache_Decorator extends HTMLPurifier_DefinitionCache
+{
+
+ /**
+ * Cache object we are decorating
+ */
+ var $cache;
+
+ function HTMLPurifier_DefinitionCache_Decorator() {}
+
+ /**
+ * Lazy decorator function
+ * @param $cache Reference to cache object to decorate
+ */
+ function decorate(&$cache) {
+ $decorator = $this->copy();
+ // reference is necessary for mocks in PHP 4
+ $decorator->cache =& $cache;
+ $decorator->type = $cache->type;
+ return $decorator;
+ }
+
+ /**
+ * Cross-compatible clone substitute
+ */
+ function copy() {
+ return new HTMLPurifier_DefinitionCache_Decorator();
+ }
+
+ function add($def, $config) {
+ return $this->cache->add($def, $config);
+ }
+
+ function set($def, $config) {
+ return $this->cache->set($def, $config);
+ }
+
+ function replace($def, $config) {
+ return $this->cache->replace($def, $config);
+ }
+
+ function get($config) {
+ return $this->cache->get($config);
+ }
+
+ function flush($config) {
+ return $this->cache->flush($config);
+ }
+
+ function cleanup($config) {
+ return $this->cache->cleanup($config);
+ }
+
+}
+
--- /dev/null
+<?php
+
+require_once 'HTMLPurifier/DefinitionCache/Decorator.php';
+
+/**
+ * Definition cache decorator class that cleans up the cache
+ * whenever there is a cache miss.
+ */
+class HTMLPurifier_DefinitionCache_Decorator_Cleanup extends
+ HTMLPurifier_DefinitionCache_Decorator
+{
+
+ var $name = 'Cleanup';
+
+ function copy() {
+ return new HTMLPurifier_DefinitionCache_Decorator_Cleanup();
+ }
+
+ function add($def, $config) {
+ $status = parent::add($def, $config);
+ if (!$status) parent::cleanup($config);
+ return $status;
+ }
+
+ function set($def, $config) {
+ $status = parent::set($def, $config);
+ if (!$status) parent::cleanup($config);
+ return $status;
+ }
+
+ function replace($def, $config) {
+ $status = parent::replace($def, $config);
+ if (!$status) parent::cleanup($config);
+ return $status;
+ }
+
+ function get($config) {
+ $ret = parent::get($config);
+ if (!$ret) parent::cleanup($config);
+ return $ret;
+ }
+
+}
+
--- /dev/null
+<?php
+
+require_once 'HTMLPurifier/DefinitionCache/Decorator.php';
+
+/**
+ * Definition cache decorator class that saves all cache retrievals
+ * to PHP's memory; good for unit tests or circumstances where
+ * there are lots of configuration objects floating around.
+ */
+class HTMLPurifier_DefinitionCache_Decorator_Memory extends
+ HTMLPurifier_DefinitionCache_Decorator
+{
+
+ var $definitions;
+ var $name = 'Memory';
+
+ function copy() {
+ return new HTMLPurifier_DefinitionCache_Decorator_Memory();
+ }
+
+ function add($def, $config) {
+ $status = parent::add($def, $config);
+ if ($status) $this->definitions[$this->generateKey($config)] = $def;
+ return $status;
+ }
+
+ function set($def, $config) {
+ $status = parent::set($def, $config);
+ if ($status) $this->definitions[$this->generateKey($config)] = $def;
+ return $status;
+ }
+
+ function replace($def, $config) {
+ $status = parent::replace($def, $config);
+ if ($status) $this->definitions[$this->generateKey($config)] = $def;
+ return $status;
+ }
+
+ function get($config) {
+ $key = $this->generateKey($config);
+ if (isset($this->definitions[$key])) return $this->definitions[$key];
+ $this->definitions[$key] = parent::get($config);
+ return $this->definitions[$key];
+ }
+
+}
+
--- /dev/null
+<?php\r
+\r
+require_once 'HTMLPurifier/DefinitionCache/Decorator.php';\r
+\r
+/**\r
+ * Definition cache decorator template.\r
+ */\r
+class HTMLPurifier_DefinitionCache_Decorator_Template extends\r
+ HTMLPurifier_DefinitionCache_Decorator\r
+{\r
+ \r
+ var $name = 'Template'; // replace this\r
+ \r
+ function copy() {\r
+ // replace class name with yours\r
+ return new HTMLPurifier_DefinitionCache_Decorator_Template();\r
+ }\r
+ \r
+ // remove methods you don't need\r
+ \r
+ function add($def, $config) {\r
+ return parent::add($def, $config);\r
+ }\r
+ \r
+ function set($def, $config) {\r
+ return parent::set($def, $config);\r
+ }\r
+ \r
+ function replace($def, $config) {\r
+ return parent::replace($def, $config);\r
+ }\r
+ \r
+ function get($config) {\r
+ return parent::get($config);\r
+ }\r
+ \r
+ function flush() {\r
+ return parent::flush();\r
+ }\r
+ \r
+ function cleanup($config) {\r
+ return parent::cleanup($config);\r
+ }\r
+ \r
+}\r
+\r
--- /dev/null
+<?php
+
+require_once 'HTMLPurifier/DefinitionCache.php';
+
+/**
+ * Null cache object to use when no caching is on.
+ */
+class HTMLPurifier_DefinitionCache_Null extends HTMLPurifier_DefinitionCache
+{
+
+ function add($def, $config) {
+ return false;
+ }
+
+ function set($def, $config) {
+ return false;
+ }
+
+ function replace($def, $config) {
+ return false;
+ }
+
+ function get($config) {
+ return false;
+ }
+
+ function flush($config) {
+ return false;
+ }
+
+ function cleanup($config) {
+ return false;
+ }
+
+}
+
--- /dev/null
+<?php
+
+require_once 'HTMLPurifier/DefinitionCache.php';
+
+HTMLPurifier_ConfigSchema::define(
+ 'Cache', 'SerializerPath', null, 'string/null', '
+<p>
+ Absolute path with no trailing slash to store serialized definitions in.
+ Default is within the
+ HTML Purifier library inside DefinitionCache/Serializer. This
+ path must be writable by the webserver. This directive has been
+ available since 2.0.0.
+</p>
+');
+
+class HTMLPurifier_DefinitionCache_Serializer extends
+ HTMLPurifier_DefinitionCache
+{
+
+ function add($def, $config) {
+ if (!$this->checkDefType($def)) return;
+ $file = $this->generateFilePath($config);
+ if (file_exists($file)) return false;
+ if (!$this->_prepareDir($config)) return false;
+ return $this->_write($file, serialize($def));
+ }
+
+ function set($def, $config) {
+ if (!$this->checkDefType($def)) return;
+ $file = $this->generateFilePath($config);
+ if (!$this->_prepareDir($config)) return false;
+ return $this->_write($file, serialize($def));
+ }
+
+ function replace($def, $config) {
+ if (!$this->checkDefType($def)) return;
+ $file = $this->generateFilePath($config);
+ if (!file_exists($file)) return false;
+ if (!$this->_prepareDir($config)) return false;
+ return $this->_write($file, serialize($def));
+ }
+
+ function get($config) {
+ $file = $this->generateFilePath($config);
+ if (!file_exists($file)) return false;
+ return unserialize(file_get_contents($file));
+ }
+
+ function remove($config) {
+ $file = $this->generateFilePath($config);
+ if (!file_exists($file)) return false;
+ return unlink($file);
+ }
+
+ function flush($config) {
+ if (!$this->_prepareDir($config)) return false;
+ $dir = $this->generateDirectoryPath($config);
+ $dh = opendir($dir);
+ while (false !== ($filename = readdir($dh))) {
+ if (empty($filename)) continue;
+ if ($filename[0] === '.') continue;
+ unlink($dir . '/' . $filename);
+ }
+ }
+
+ function cleanup($config) {
+ if (!$this->_prepareDir($config)) return false;
+ $dir = $this->generateDirectoryPath($config);
+ $dh = opendir($dir);
+ while (false !== ($filename = readdir($dh))) {
+ if (empty($filename)) continue;
+ if ($filename[0] === '.') continue;
+ $key = substr($filename, 0, strlen($filename) - 4);
+ if ($this->isOld($key, $config)) unlink($dir . '/' . $filename);
+ }
+ }
+
+ /**
+ * Generates the file path to the serial file corresponding to
+ * the configuration and definition name
+ */
+ function generateFilePath($config) {
+ $key = $this->generateKey($config);
+ return $this->generateDirectoryPath($config) . '/' . $key . '.ser';
+ }
+
+ /**
+ * Generates the path to the directory contain this cache's serial files
+ * @note No trailing slash
+ */
+ function generateDirectoryPath($config) {
+ $base = $this->generateBaseDirectoryPath($config);
+ return $base . '/' . $this->type;
+ }
+
+ /**
+ * Generates path to base directory that contains all definition type
+ * serials
+ */
+ function generateBaseDirectoryPath($config) {
+ $base = $config->get('Cache', 'SerializerPath');
+ $base = is_null($base) ? HTMLPURIFIER_PREFIX . '/HTMLPurifier/DefinitionCache/Serializer' : $base;
+ return $base;
+ }
+
+ /**
+ * Convenience wrapper function for file_put_contents
+ * @param $file File name to write to
+ * @param $data Data to write into file
+ * @return Number of bytes written if success, or false if failure.
+ */
+ function _write($file, $data) {
+ static $file_put_contents;
+ if ($file_put_contents === null) {
+ $file_put_contents = function_exists('file_put_contents');
+ }
+ if ($file_put_contents) {
+ return file_put_contents($file, $data);
+ }
+ $fh = fopen($file, 'w');
+ if (!$fh) return false;
+ $status = fwrite($fh, $data);
+ fclose($fh);
+ return $status;
+ }
+
+ /**
+ * Prepares the directory that this type stores the serials in
+ * @return True if successful
+ */
+ function _prepareDir($config) {
+ $directory = $this->generateDirectoryPath($config);
+ if (!is_dir($directory)) {
+ $base = $this->generateBaseDirectoryPath($config);
+ if (!is_dir($base)) {
+ trigger_error('Base directory '.$base.' does not exist,
+ please create or change using %Cache.SerializerPath',
+ E_USER_ERROR);
+ return false;
+ } elseif (!$this->_testPermissions($base)) {
+ return false;
+ }
+ mkdir($directory);
+ } elseif (!$this->_testPermissions($directory)) {
+ return false;
+ }
+ return true;
+ }
+
+ /**
+ * Tests permissions on a directory and throws out friendly
+ * error messages and attempts to chmod it itself if possible
+ */
+ function _testPermissions($dir) {
+ // early abort, if it is writable, everything is hunky-dory
+ if (is_writable($dir)) return true;
+ if (!is_dir($dir)) {
+ // generally, you'll want to handle this beforehand
+ // so a more specific error message can be given
+ trigger_error('Directory '.$dir.' does not exist',
+ E_USER_ERROR);
+ return false;
+ }
+ if (function_exists('posix_getuid')) {
+ // POSIX system, we can give more specific advice
+ if (fileowner($dir) === posix_getuid()) {
+ // we can chmod it ourselves
+ chmod($dir, 0755);
+ return true;
+ } elseif (filegroup($dir) === posix_getgid()) {
+ $chmod = '775';
+ } else {
+ // PHP's probably running as nobody, so we'll
+ // need to give global permissions
+ $chmod = '777';
+ }
+ trigger_error('Directory '.$dir.' not writable, '.
+ 'please chmod to ' . $chmod,
+ E_USER_ERROR);
+ } else {
+ // generic error message
+ trigger_error('Directory '.$dir.' not writable, '.
+ 'please alter file permissions',
+ E_USER_ERROR);
+ }
+ return false;
+ }
+
+}
+
--- /dev/null
+<?php
+
+require_once 'HTMLPurifier/DefinitionCache.php';
+
+HTMLPurifier_ConfigSchema::define(
+ 'Cache', 'DefinitionImpl', 'Serializer', 'string/null', '
+This directive defines which method to use when caching definitions,
+the complex data-type that makes HTML Purifier tick. Set to null
+to disable caching (not recommended, as you will see a definite
+performance degradation). This directive has been available since 2.0.0.
+');
+
+HTMLPurifier_ConfigSchema::defineAllowedValues(
+ 'Cache', 'DefinitionImpl', array('Serializer')
+);
+
+HTMLPurifier_ConfigSchema::defineAlias(
+ 'Core', 'DefinitionCache',
+ 'Cache', 'DefinitionImpl'
+);
+
+
+/**
+ * Responsible for creating definition caches.
+ */
+class HTMLPurifier_DefinitionCacheFactory
+{
+
+ var $caches = array('Serializer' => array());
+ var $decorators = array();
+
+ /**
+ * Initialize default decorators
+ */
+ function setup() {
+ $this->addDecorator('Cleanup');
+ }
+
+ /**
+ * Retrieves an instance of global definition cache factory.
+ * @static
+ */
+ function &instance($prototype = null) {
+ static $instance;
+ if ($prototype !== null) {
+ $instance = $prototype;
+ } elseif ($instance === null || $prototype === true) {
+ $instance = new HTMLPurifier_DefinitionCacheFactory();
+ $instance->setup();
+ }
+ return $instance;
+ }
+
+ /**
+ * Factory method that creates a cache object based on configuration
+ * @param $name Name of definitions handled by cache
+ * @param $config Instance of HTMLPurifier_Config
+ */
+ function &create($type, $config) {
+ // only one implementation as for right now, $config will
+ // be used to determine implementation
+ $method = $config->get('Cache', 'DefinitionImpl');
+ if ($method === null) {
+ $null = new HTMLPurifier_DefinitionCache_Null($type);
+ return $null;
+ }
+ if (!empty($this->caches[$method][$type])) {
+ return $this->caches[$method][$type];
+ }
+ $cache = new HTMLPurifier_DefinitionCache_Serializer($type);
+ foreach ($this->decorators as $decorator) {
+ $new_cache = $decorator->decorate($cache);
+ // prevent infinite recursion in PHP 4
+ unset($cache);
+ $cache = $new_cache;
+ }
+ $this->caches[$method][$type] = $cache;
+ return $this->caches[$method][$type];
+ }
+
+ /**
+ * Registers a decorator to add to all new cache objects
+ * @param
+ */
+ function addDecorator($decorator) {
+ if (is_string($decorator)) {
+ $class = "HTMLPurifier_DefinitionCache_Decorator_$decorator";
+ $decorator = new $class;
+ }
+ $this->decorators[$decorator->name] = $decorator;
+ }
+
+}
+
--- /dev/null
+<?php
+
+/**
+ * Represents a document type, contains information on which modules
+ * need to be loaded.
+ * @note This class is inspected by Printer_HTMLDefinition->renderDoctype.
+ * If structure changes, please update that function.
+ */
+class HTMLPurifier_Doctype
+{
+ /**
+ * Full name of doctype
+ */
+ var $name;
+
+ /**
+ * List of standard modules (string identifiers or literal objects)
+ * that this doctype uses
+ */
+ var $modules = array();
+
+ /**
+ * List of modules to use for tidying up code
+ */
+ var $tidyModules = array();
+
+ /**
+ * Is the language derived from XML (i.e. XHTML)?
+ */
+ var $xml = true;
+
+ /**
+ * List of aliases for this doctype
+ */
+ var $aliases = array();
+
+ /**
+ * Public DTD identifier
+ */
+ var $dtdPublic;
+
+ /**
+ * System DTD identifier
+ */
+ var $dtdSystem;
+
+ function HTMLPurifier_Doctype($name = null, $xml = true, $modules = array(),
+ $tidyModules = array(), $aliases = array(), $dtd_public = null, $dtd_system = null
+ ) {
+ $this->name = $name;
+ $this->xml = $xml;
+ $this->modules = $modules;
+ $this->tidyModules = $tidyModules;
+ $this->aliases = $aliases;
+ $this->dtdPublic = $dtd_public;
+ $this->dtdSystem = $dtd_system;
+ }
+
+ /**
+ * Clones the doctype, use before resolving modes and the like
+ */
+ function copy() {
+ return unserialize(serialize($this));
+ }
+}
+
--- /dev/null
+<?php
+
+require_once 'HTMLPurifier/Doctype.php';
+
+// Legacy directives for doctype specification
+HTMLPurifier_ConfigSchema::define(
+ 'HTML', 'Strict', false, 'bool',
+ 'Determines whether or not to use Transitional (loose) or Strict rulesets. '.
+ 'This directive is deprecated in favor of %HTML.Doctype. '.
+ 'This directive has been available since 1.3.0.'
+);
+
+HTMLPurifier_ConfigSchema::define(
+ 'HTML', 'XHTML', true, 'bool',
+ 'Determines whether or not output is XHTML 1.0 or HTML 4.01 flavor. '.
+ 'This directive is deprecated in favor of %HTML.Doctype. '.
+ 'This directive was available since 1.1.'
+);
+HTMLPurifier_ConfigSchema::defineAlias('Core', 'XHTML', 'HTML', 'XHTML');
+
+class HTMLPurifier_DoctypeRegistry
+{
+
+ /**
+ * Hash of doctype names to doctype objects
+ * @protected
+ */
+ var $doctypes;
+
+ /**
+ * Lookup table of aliases to real doctype names
+ * @protected
+ */
+ var $aliases;
+
+ /**
+ * Registers a doctype to the registry
+ * @note Accepts a fully-formed doctype object, or the
+ * parameters for constructing a doctype object
+ * @param $doctype Name of doctype or literal doctype object
+ * @param $modules Modules doctype will load
+ * @param $modules_for_modes Modules doctype will load for certain modes
+ * @param $aliases Alias names for doctype
+ * @return Reference to registered doctype (usable for further editing)
+ */
+ function ®ister($doctype, $xml = true, $modules = array(),
+ $tidy_modules = array(), $aliases = array(), $dtd_public = null, $dtd_system = null
+ ) {
+ if (!is_array($modules)) $modules = array($modules);
+ if (!is_array($tidy_modules)) $tidy_modules = array($tidy_modules);
+ if (!is_array($aliases)) $aliases = array($aliases);
+ if (!is_object($doctype)) {
+ $doctype = new HTMLPurifier_Doctype(
+ $doctype, $xml, $modules, $tidy_modules, $aliases, $dtd_public, $dtd_system
+ );
+ }
+ $this->doctypes[$doctype->name] =& $doctype;
+ $name = $doctype->name;
+ // hookup aliases
+ foreach ($doctype->aliases as $alias) {
+ if (isset($this->doctypes[$alias])) continue;
+ $this->aliases[$alias] = $name;
+ }
+ // remove old aliases
+ if (isset($this->aliases[$name])) unset($this->aliases[$name]);
+ return $doctype;
+ }
+
+ /**
+ * Retrieves reference to a doctype of a certain name
+ * @note This function resolves aliases
+ * @note When possible, use the more fully-featured make()
+ * @param $doctype Name of doctype
+ * @return Reference to doctype object
+ */
+ function &get($doctype) {
+ if (isset($this->aliases[$doctype])) $doctype = $this->aliases[$doctype];
+ if (!isset($this->doctypes[$doctype])) {
+ trigger_error('Doctype ' . htmlspecialchars($doctype) . ' does not exist', E_USER_ERROR);
+ $anon = new HTMLPurifier_Doctype($doctype);
+ return $anon;
+ }
+ return $this->doctypes[$doctype];
+ }
+
+ /**
+ * Creates a doctype based on a configuration object,
+ * will perform initialization on the doctype
+ * @note Use this function to get a copy of doctype that config
+ * can hold on to (this is necessary in order to tell
+ * Generator whether or not the current document is XML
+ * based or not).
+ */
+ function make($config) {
+ $original_doctype = $this->get($this->getDoctypeFromConfig($config));
+ $doctype = $original_doctype->copy();
+ return $doctype;
+ }
+
+ /**
+ * Retrieves the doctype from the configuration object
+ */
+ function getDoctypeFromConfig($config) {
+ // recommended test
+ $doctype = $config->get('HTML', 'Doctype');
+ if (!empty($doctype)) return $doctype;
+ $doctype = $config->get('HTML', 'CustomDoctype');
+ if (!empty($doctype)) return $doctype;
+ // backwards-compatibility
+ if ($config->get('HTML', 'XHTML')) {
+ $doctype = 'XHTML 1.0';
+ } else {
+ $doctype = 'HTML 4.01';
+ }
+ if ($config->get('HTML', 'Strict')) {
+ $doctype .= ' Strict';
+ } else {
+ $doctype .= ' Transitional';
+ }
+ return $doctype;
+ }
+
+}
+
/**
* Structure that stores an HTML element definition. Used by
* HTMLPurifier_HTMLDefinition and HTMLPurifier_HTMLModule.
+ * @note This class is inspected by HTMLPurifier_Printer_HTMLDefinition.
+ * Please update that class too.
*/
class HTMLPurifier_ElementDef
{
* Abstract string representation of internal ChildDef rules. See
* HTMLPurifier_ContentSets for how this is parsed and then transformed
* into an HTMLPurifier_ChildDef.
+ * @warning This is a temporary variable that is not available after
+ * being processed by HTMLDefinition
* @public
*/
var $content_model;
/**
* Value of $child->type, used to determine which ChildDef to use,
* used in combination with $content_model.
+ * @warning This must be lowercase
+ * @warning This is a temporary variable that is not available after
+ * being processed by HTMLDefinition
* @public
*/
var $content_model_type;
- /**
- * Lookup table of tags that close this tag. Used during parsing
- * to make sure we don't attempt to nest unclosed tags.
- * @public
- */
- var $auto_close = array();
-
/**
* Does the element have a content model (#PCDATA | Inline)*? This
* is important for chameleon ins and del processing in
* have to worry about this one.
* @public
*/
- var $descendants_are_inline;
+ var $descendants_are_inline = false;
+
+ /**
+ * List of the names of required attributes this element has. Dynamically
+ * populated.
+ * @public
+ */
+ var $required_attr = array();
/**
* Lookup table of tags excluded from all descendants of this tag.
+ * @note SGML permits exclusions for all descendants, but this is
+ * not possible with DTDs or XML Schemas. W3C has elected to
+ * use complicated compositions of content_models to simulate
+ * exclusion for children, but we go the simpler, SGML-style
+ * route of flat-out exclusions, which correctly apply to
+ * all descendants and not just children. Note that the XHTML
+ * Modularization Abstract Modules are blithely unaware of such
+ * distinctions.
* @public
*/
var $excludes = array();
+ /**
+ * Is this element safe for untrusted users to use?
+ */
+ var $safe;
+
+ /**
+ * Low-level factory constructor for creating new standalone element defs
+ * @static
+ */
+ function create($safe, $content_model, $content_model_type, $attr) {
+ $def = new HTMLPurifier_ElementDef();
+ $def->safe = (bool) $safe;
+ $def->content_model = $content_model;
+ $def->content_model_type = $content_model_type;
+ $def->attr = $attr;
+ return $def;
+ }
+
/**
* Merges the values of another element definition into this one.
* Values from the new element def take precedence if a value is
// merge in the includes
// sorry, no way to override an include
foreach ($v as $v2) {
- $def->attr[0][] = $v2;
+ $this->attr[0][] = $v2;
}
continue;
}
+ if ($v === false) {
+ if (isset($this->attr[$k])) unset($this->attr[$k]);
+ continue;
+ }
$this->attr[$k] = $v;
}
- foreach($def->attr_transform_pre as $k => $v) $this->attr_transform_pre[$k] = $v;
- foreach($def->attr_transform_post as $k => $v) $this->attr_transform_post[$k] = $v;
- foreach($def->auto_close as $k => $v) $this->auto_close[$k] = $v;
- foreach($def->excludes as $k => $v) $this->excludes[$k] = $v;
+ $this->_mergeAssocArray($this->attr_transform_pre, $def->attr_transform_pre);
+ $this->_mergeAssocArray($this->attr_transform_post, $def->attr_transform_post);
+ $this->_mergeAssocArray($this->excludes, $def->excludes);
+ if(!empty($def->content_model)) {
+ $this->content_model .= ' | ' . $def->content_model;
+ $this->child = false;
+ }
+ if(!empty($def->content_model_type)) {
+ $this->content_model_type = $def->content_model_type;
+ $this->child = false;
+ }
if(!is_null($def->child)) $this->child = $def->child;
- if(!empty($def->content_model)) $this->content_model .= ' | ' . $def->content_model;
- if(!empty($def->content_model_type)) $this->content_model_type = $def->content_model_type;
- if(!is_null($def->descendants_are_inline)) $this->descendants_are_inline = $def->descendants_are_inline;
+ if($def->descendants_are_inline) $this->descendants_are_inline = $def->descendants_are_inline;
+ if(!is_null($def->safe)) $this->safe = $def->safe;
}
+ /**
+ * Merges one array into another, removes values which equal false
+ * @param $a1 Array by reference that is merged into
+ * @param $a2 Array that merges into $a1
+ */
+ function _mergeAssocArray(&$a1, $a2) {
+ foreach ($a2 as $k => $v) {
+ if ($v === false) {
+ if (isset($a1[$k])) unset($a1[$k]);
+ continue;
+ }
+ $a1[$k] = $v;
+ }
+ }
+
+ /**
+ * Retrieves a copy of the element definition
+ */
+ function copy() {
+ return unserialize(serialize($this));
+ }
+
}
-?>
+
<?php
-require_once 'HTMLPurifier/EntityLookup.php';
-
HTMLPurifier_ConfigSchema::define(
'Core', 'Encoding', 'utf-8', 'istring',
'If for some reason you are unable to convert all webpages to UTF-8, '.
}
-?>
\ No newline at end of file
*/
function setup($file = false) {
if (!$file) {
- $file = dirname(__FILE__) . '/EntityLookup/entities.ser';
+ $file = HTMLPURIFIER_PREFIX . '/HTMLPurifier/EntityLookup/entities.ser';
}
$this->table = unserialize(file_get_contents($file));
}
}
-?>
\ No newline at end of file
* @protected
*/
var $_substituteEntitiesRegex =
-'/&(?:[#]x([a-fA-F0-9]+)|[#]0*(\d+)|([A-Za-z]+));?/';
-// 1. hex 2. dec 3. string
+'/&(?:[#]x([a-fA-F0-9]+)|[#]0*(\d+)|([A-Za-z_:][A-Za-z0-9.\-_:]*));?/';
+// 1. hex 2. dec 3. string (XML style)
/**
} else {
if (isset($this->_special_ent2dec[$matches[3]])) return $entity;
if (!$this->_entity_lookup) {
- require_once 'HTMLPurifier/EntityLookup.php';
$this->_entity_lookup = HTMLPurifier_EntityLookup::instance();
}
if (isset($this->_entity_lookup->table[$matches[3]])) {
}
-?>
\ No newline at end of file
*/
class HTMLPurifier_Error {}
-?>
\ No newline at end of file
--- /dev/null
+<?php
+
+require_once 'HTMLPurifier/Generator.php';
+
+/**
+ * Error collection class that enables HTML Purifier to report HTML
+ * problems back to the user
+ */
+class HTMLPurifier_ErrorCollector
+{
+
+ var $errors = array();
+ var $locale;
+ var $generator;
+ var $context;
+
+ function HTMLPurifier_ErrorCollector(&$context) {
+ $this->locale =& $context->get('Locale');
+ $this->generator =& $context->get('Generator');
+ $this->context =& $context;
+ }
+
+ /**
+ * Sends an error message to the collector for later use
+ * @param $line Integer line number, or HTMLPurifier_Token that caused error
+ * @param $severity int Error severity, PHP error style (don't use E_USER_)
+ * @param $msg string Error message text
+ */
+ function send($severity, $msg) {
+
+ $args = array();
+ if (func_num_args() > 2) {
+ $args = func_get_args();
+ array_shift($args);
+ unset($args[0]);
+ }
+
+ $token = $this->context->get('CurrentToken', true);
+ $line = $token ? $token->line : $this->context->get('CurrentLine', true);
+ $attr = $this->context->get('CurrentAttr', true);
+
+ // perform special substitutions, also add custom parameters
+ $subst = array();
+ if (!is_null($token)) {
+ $args['CurrentToken'] = $token;
+ }
+ if (!is_null($attr)) {
+ $subst['$CurrentAttr.Name'] = $attr;
+ if (isset($token->attr[$attr])) $subst['$CurrentAttr.Value'] = $token->attr[$attr];
+ }
+
+ if (empty($args)) {
+ $msg = $this->locale->getMessage($msg);
+ } else {
+ $msg = $this->locale->formatMessage($msg, $args);
+ }
+
+ if (!empty($subst)) $msg = strtr($msg, $subst);
+
+ $this->errors[] = array($line, $severity, $msg);
+ }
+
+ /**
+ * Retrieves raw error data for custom formatter to use
+ * @param List of arrays in format of array(Error message text,
+ * token that caused error, tokens surrounding token)
+ */
+ function getRaw() {
+ return $this->errors;
+ }
+
+ /**
+ * Default HTML formatting implementation for error messages
+ * @param $config Configuration array, vital for HTML output nature
+ */
+ function getHTMLFormatted($config) {
+ $ret = array();
+
+ $errors = $this->errors;
+
+ // sort error array by line
+ // line numbers are enabled if they aren't explicitly disabled
+ if ($config->get('Core', 'MaintainLineNumbers') !== false) {
+ $has_line = array();
+ $lines = array();
+ $original_order = array();
+ foreach ($errors as $i => $error) {
+ $has_line[] = (int) (bool) $error[0];
+ $lines[] = $error[0];
+ $original_order[] = $i;
+ }
+ array_multisort($has_line, SORT_DESC, $lines, SORT_ASC, $original_order, SORT_ASC, $errors);
+ }
+
+ foreach ($errors as $error) {
+ list($line, $severity, $msg) = $error;
+ $string = '';
+ $string .= '<strong>' . $this->locale->getErrorName($severity) . '</strong>: ';
+ $string .= $this->generator->escape($msg);
+ if ($line) {
+ // have javascript link generation that causes
+ // textarea to skip to the specified line
+ $string .= $this->locale->formatMessage(
+ 'ErrorCollector: At line', array('line' => $line));
+ }
+ $ret[] = $string;
+ }
+
+ if (empty($errors)) {
+ return '<p>' . $this->locale->getMessage('ErrorCollector: No errors') . '</p>';
+ } else {
+ return '<ul><li>' . implode('</li><li>', $ret) . '</li></ul>';
+ }
+
+ }
+
+}
+
}
-?>
\ No newline at end of file
}
-?>
\ No newline at end of file
<?php
-require_once 'HTMLPurifier/Lexer.php';
-
HTMLPurifier_ConfigSchema::define(
- 'Core', 'CleanUTF8DuringGeneration', false, 'bool',
- 'When true, HTMLPurifier_Generator will also check all strings it '.
- 'escapes for UTF-8 well-formedness as a defense in depth measure. '.
- 'This could cause a considerable performance impact, and is not '.
- 'strictly necessary due to the fact that the Lexers should have '.
- 'ensured that all the UTF-8 strings were well-formed. Note that '.
- 'the configuration value is only read at the beginning of '.
- 'generateFromTokens.'
-);
-
-HTMLPurifier_ConfigSchema::define(
- 'Core', 'XHTML', true, 'bool',
- 'Determines whether or not output is XHTML or not. When disabled, HTML '.
- 'Purifier goes into HTML 4.01 removes XHTML-specific markup constructs, '.
- 'such as boolean attribute expansion and trailing slashes in empty tags. '.
- 'This directive was available since 1.1.'
+ 'Output', 'CommentScriptContents', true, 'bool',
+ 'Determines whether or not HTML Purifier should attempt to fix up '.
+ 'the contents of script tags for legacy browsers with comments. This '.
+ 'directive was available since 2.0.0.'
);
+HTMLPurifier_ConfigSchema::defineAlias('Core', 'CommentScriptContents', 'Output', 'CommentScriptContents');
// extension constraints could be factored into ConfigSchema
HTMLPurifier_ConfigSchema::define(
- 'Core', 'TidyFormat', false, 'bool',
- '<p>Determines whether or not to run Tidy on the final output for pretty '.
- 'formatting reasons, such as indentation and wrap.</p><p>This can greatly '.
- 'improve readability for editors who are hand-editing the HTML, but is '.
- 'by no means necessary as HTML Purifier has already fixed all major '.
- 'errors the HTML may have had. Tidy is a non-default extension, and this directive '.
- 'will silently fail if Tidy is not available.</p><p>If you are looking to make '.
- 'the overall look of your page\'s source better, I recommend running Tidy '.
- 'on the entire page rather than just user-content (after all, the '.
- 'indentation relative to the containing blocks will be incorrect).</p><p>This '.
- 'directive was available since 1.1.1.</p>'
+ 'Output', 'TidyFormat', false, 'bool', <<<HTML
+<p>
+ Determines whether or not to run Tidy on the final output for pretty
+ formatting reasons, such as indentation and wrap.
+</p>
+<p>
+ This can greatly improve readability for editors who are hand-editing
+ the HTML, but is by no means necessary as HTML Purifier has already
+ fixed all major errors the HTML may have had. Tidy is a non-default
+ extension, and this directive will silently fail if Tidy is not
+ available.
+</p>
+<p>
+ If you are looking to make the overall look of your page's source
+ better, I recommend running Tidy on the entire page rather than just
+ user-content (after all, the indentation relative to the containing
+ blocks will be incorrect).
+</p>
+<p>
+ This directive was available since 1.1.1.
+</p>
+HTML
);
+HTMLPurifier_ConfigSchema::defineAlias('Core', 'TidyFormat', 'Output', 'TidyFormat');
+
+HTMLPurifier_ConfigSchema::define('Output', 'Newline', null, 'string/null', '
+<p>
+ Newline string to format final output with. If left null, HTML Purifier
+ will auto-detect the default newline type of the system and use that;
+ you can manually override it here. Remember, \r\n is Windows, \r
+ is Mac, and \n is Unix. This directive was available since 2.0.1.
+</p>
+');
/**
* Generates HTML from tokens.
+ * @todo Refactor interface so that configuration/context is determined
+ * upon instantiation, no need for messy generateFromTokens() calls
*/
class HTMLPurifier_Generator
{
/**
- * Bool cache of %Core.CleanUTF8DuringGeneration
+ * Bool cache of %HTML.XHTML
* @private
*/
- var $_clean_utf8 = false;
+ var $_xhtml = true;
/**
- * Bool cache of %Core.XHTML
+ * Bool cache of %Output.CommentScriptContents
* @private
*/
- var $_xhtml = true;
+ var $_scriptFix = false;
+
+ /**
+ * Cache of HTMLDefinition
+ * @private
+ */
+ var $_def;
/**
* Generates HTML from an array of tokens.
function generateFromTokens($tokens, $config, &$context) {
$html = '';
if (!$config) $config = HTMLPurifier_Config::createDefault();
- $this->_clean_utf8 = $config->get('Core', 'CleanUTF8DuringGeneration');
- $this->_xhtml = $config->get('Core', 'XHTML');
+ $this->_scriptFix = $config->get('Output', 'CommentScriptContents');
+
+ $this->_def = $config->getHTMLDefinition();
+ $this->_xhtml = $this->_def->doctype->xml;
+
if (!$tokens) return '';
- foreach ($tokens as $token) {
- $html .= $this->generateFromToken($token);
+ for ($i = 0, $size = count($tokens); $i < $size; $i++) {
+ if ($this->_scriptFix && $tokens[$i]->name === 'script'
+ && $i + 2 < $size && $tokens[$i+2]->type == 'end') {
+ // script special case
+ // the contents of the script block must be ONE token
+ // for this to work
+ $html .= $this->generateFromToken($tokens[$i++]);
+ $html .= $this->generateScriptFromToken($tokens[$i++]);
+ // We're not going to do this: it wouldn't be valid anyway
+ //while ($tokens[$i]->name != 'script') {
+ // $html .= $this->generateScriptFromToken($tokens[$i++]);
+ //}
+ }
+ $html .= $this->generateFromToken($tokens[$i]);
}
- if ($config->get('Core', 'TidyFormat') && extension_loaded('tidy')) {
+ if ($config->get('Output', 'TidyFormat') && extension_loaded('tidy')) {
$tidy_options = array(
'indent'=> true,
$html = (string) $tidy;
}
}
+ // normalize newlines to system
+ $nl = $config->get('Output', 'Newline');
+ if ($nl === null) $nl = PHP_EOL;
+ $html = str_replace("\n", $nl, $html);
return $html;
}
function generateFromToken($token) {
if (!isset($token->type)) return '';
if ($token->type == 'start') {
- $attr = $this->generateAttributes($token->attr);
+ $attr = $this->generateAttributes($token->attr, $token->name);
return '<' . $token->name . ($attr ? ' ' : '') . $attr . '>';
} elseif ($token->type == 'end') {
return '</' . $token->name . '>';
} elseif ($token->type == 'empty') {
- $attr = $this->generateAttributes($token->attr);
+ $attr = $this->generateAttributes($token->attr, $token->name);
return '<' . $token->name . ($attr ? ' ' : '') . $attr .
( $this->_xhtml ? ' /': '' )
. '>';
}
}
+ /**
+ * Special case processor for the contents of script tags
+ * @warning This runs into problems if there's already a literal
+ * --> somewhere inside the script contents.
+ */
+ function generateScriptFromToken($token) {
+ if ($token->type != 'text') return $this->generateFromToken($token);
+ // return '<!--' . "\n" . trim($token->data) . "\n" . '// -->';
+ // more advanced version:
+ // thanks <http://lachy.id.au/log/2005/05/script-comments>
+ $data = preg_replace('#//\s*$#', '', $token->data);
+ return '<!--//--><![CDATA[//><!--' . "\n" . trim($data) . "\n" . '//--><!]]>';
+ }
+
/**
* Generates attribute declarations from attribute array.
* @param $assoc_array_of_attributes Attribute array
* @return Generate HTML fragment for insertion.
*/
- function generateAttributes($assoc_array_of_attributes) {
+ function generateAttributes($assoc_array_of_attributes, $element) {
$html = '';
foreach ($assoc_array_of_attributes as $key => $value) {
if (!$this->_xhtml) {
// remove namespaced attributes
if (strpos($key, ':') !== false) continue;
- // also needed: check for attribute minimization
+ if (!empty($this->_def->info[$element]->attr[$key]->minimized)) {
+ $html .= $key . ' ';
+ continue;
+ }
}
$html .= $key.'="'.$this->escape($value).'" ';
}
* @return String escaped data.
*/
function escape($string) {
- if ($this->_clean_utf8) $string = HTMLPurifier_Lexer::cleanUTF8($string);
return htmlspecialchars($string, ENT_COMPAT, 'UTF-8');
}
}
-?>
\ No newline at end of file
<?php
-// components
+require_once 'HTMLPurifier/Definition.php';
require_once 'HTMLPurifier/HTMLModuleManager.php';
// this definition and its modules MUST NOT define configuration directives
// outside of the HTML or Attr namespaces
-// will be superceded by more accurate doctype declaration schemes
HTMLPurifier_ConfigSchema::define(
- 'HTML', 'Strict', false, 'bool',
- 'Determines whether or not to use Transitional (loose) or Strict rulesets. '.
- 'This directive has been available since 1.3.0.'
-);
+ 'HTML', 'DefinitionID', null, 'string/null', '
+<p>
+ Unique identifier for a custom-built HTML definition. If you edit
+ the raw version of the HTMLDefinition, introducing changes that the
+ configuration object does not reflect, you must specify this variable.
+ If you change your custom edits, you should change this directive, or
+ clear your cache. Example:
+</p>
+<pre>
+$config = HTMLPurifier_Config::createDefault();
+$config->set(\'HTML\', \'DefinitionID\', \'1\');
+$def = $config->getHTMLDefinition();
+$def->addAttribute(\'a\', \'tabindex\', \'Number\');
+</pre>
+<p>
+ In the above example, the configuration is still at the defaults, but
+ using the advanced API, an extra attribute has been added. The
+ configuration object normally has no way of knowing that this change
+ has taken place, so it needs an extra directive: %HTML.DefinitionID.
+ If someone else attempts to use the default configuration, these two
+ pieces of code will not clobber each other in the cache, since one has
+ an extra directive attached to it.
+</p>
+<p>
+ This directive has been available since 2.0.0, and in that version or
+ later you <em>must</em> specify a value to this directive to use the
+ advanced API features.
+</p>
+');
HTMLPurifier_ConfigSchema::define(
- 'HTML', 'BlockWrapper', 'p', 'string',
- 'String name of element to wrap inline elements that are inside a block '.
- 'context. This only occurs in the children of blockquote in strict mode. '.
- 'Example: by default value, <code><blockquote>Foo</blockquote></code> '.
- 'would become <code><blockquote><p>Foo</p></blockquote></code>. The '.
- '<code><p></code> tags can be replaced '.
- 'with whatever you desire, as long as it is a block level element. '.
- 'This directive has been available since 1.3.0.'
-);
+ 'HTML', 'DefinitionRev', 1, 'int', '
+<p>
+ Revision identifier for your custom definition specified in
+ %HTML.DefinitionID. This serves the same purpose: uniquely identifying
+ your custom definition, but this one does so in a chronological
+ context: revision 3 is more up-to-date then revision 2. Thus, when
+ this gets incremented, the cache handling is smart enough to clean
+ up any older revisions of your definition as well as flush the
+ cache. This directive has been available since 2.0.0.
+</p>
+');
HTMLPurifier_ConfigSchema::define(
- 'HTML', 'Parent', 'div', 'string',
- 'String name of element that HTML fragment passed to library will be '.
- 'inserted in. An interesting variation would be using span as the '.
- 'parent element, meaning that only inline tags would be allowed. '.
- 'This directive has been available since 1.3.0.'
-);
+ 'HTML', 'BlockWrapper', 'p', 'string', '
+<p>
+ String name of element to wrap inline elements that are inside a block
+ context. This only occurs in the children of blockquote in strict mode.
+</p>
+<p>
+ Example: by default value,
+ <code><blockquote>Foo</blockquote></code> would become
+ <code><blockquote><p>Foo</p></blockquote></code>.
+ The <code><p></code> tags can be replaced with whatever you desire,
+ as long as it is a block level element. This directive has been available
+ since 1.3.0.
+</p>
+');
HTMLPurifier_ConfigSchema::define(
- 'HTML', 'AllowedElements', null, 'lookup/null',
- 'If HTML Purifier\'s tag set is unsatisfactory for your needs, you '.
- 'can overload it with your own list of tags to allow. Note that this '.
- 'method is subtractive: it does its job by taking away from HTML Purifier '.
- 'usual feature set, so you cannot add a tag that HTML Purifier never '.
- 'supported in the first place (like embed, form or head). If you change this, you '.
- 'probably also want to change %HTML.AllowedAttributes. '.
- '<strong>Warning:</strong> If another directive conflicts with the '.
- 'elements here, <em>that</em> directive will win and override. '.
- 'This directive has been available since 1.3.0.'
-);
+ 'HTML', 'Parent', 'div', 'string', '
+<p>
+ String name of element that HTML fragment passed to library will be
+ inserted in. An interesting variation would be using span as the
+ parent element, meaning that only inline tags would be allowed.
+ This directive has been available since 1.3.0.
+</p>
+');
HTMLPurifier_ConfigSchema::define(
- 'HTML', 'AllowedAttributes', null, 'lookup/null',
- 'IF HTML Purifier\'s attribute set is unsatisfactory, overload it! '.
- 'The syntax is \'tag.attr\' or \'*.attr\' for the global attributes '.
- '(style, id, class, dir, lang, xml:lang).'.
- '<strong>Warning:</strong> If another directive conflicts with the '.
- 'elements here, <em>that</em> directive will win and override. For '.
- 'example, %HTML.EnableAttrID will take precedence over *.id in this '.
- 'directive. You must set that directive to true before you can use '.
- 'IDs at all. This directive has been available since 1.3.0.'
-);
+ 'HTML', 'AllowedElements', null, 'lookup/null', '
+<p>
+ If HTML Purifier\'s tag set is unsatisfactory for your needs, you
+ can overload it with your own list of tags to allow. Note that this
+ method is subtractive: it does its job by taking away from HTML Purifier
+ usual feature set, so you cannot add a tag that HTML Purifier never
+ supported in the first place (like embed, form or head). If you
+ change this, you probably also want to change %HTML.AllowedAttributes.
+</p>
+<p>
+ <strong>Warning:</strong> If another directive conflicts with the
+ elements here, <em>that</em> directive will win and override.
+ This directive has been available since 1.3.0.
+</p>
+');
+
+HTMLPurifier_ConfigSchema::define(
+ 'HTML', 'AllowedAttributes', null, 'lookup/null', '
+<p>
+ If HTML Purifier\'s attribute set is unsatisfactory, overload it!
+ The syntax is "tag.attr" or "*.attr" for the global attributes
+ (style, id, class, dir, lang, xml:lang).
+</p>
+<p>
+ <strong>Warning:</strong> If another directive conflicts with the
+ elements here, <em>that</em> directive will win and override. For
+ example, %HTML.EnableAttrID will take precedence over *.id in this
+ directive. You must set that directive to true before you can use
+ IDs at all. This directive has been available since 1.3.0.
+</p>
+');
+
+HTMLPurifier_ConfigSchema::define(
+ 'HTML', 'Allowed', null, 'itext/null', '
+<p>
+ This is a convenience directive that rolls the functionality of
+ %HTML.AllowedElements and %HTML.AllowedAttributes into one directive.
+ Specify elements and attributes that are allowed using:
+ <code>element1[attr1|attr2],element2...</code>. You can also use
+ newlines instead of commas to separate elements.
+</p>
+<p>
+ <strong>Warning</strong>:
+ All of the constraints on the component directives are still enforced.
+ The syntax is a <em>subset</em> of TinyMCE\'s <code>valid_elements</code>
+ whitelist: directly copy-pasting it here will probably result in
+ broken whitelists. If %HTML.AllowedElements or %HTML.AllowedAttributes
+ are set, this directive has no effect.
+ This directive has been available since 2.0.0.
+</p>
+');
/**
* Definition of the purified HTML that describes allowed children,
* Purifier internals. Many of them, however, are public, and may be
* edited by userspace code to tweak the behavior of HTMLDefinition.
*
- * HTMLPurifier_Printer_HTMLDefinition is a notable exception to this
- * rule: in the interest of comprehensiveness, it will sniff everything.
+ * @note This class is inspected by Printer_HTMLDefinition; please
+ * update that class if things here change.
*/
-class HTMLPurifier_HTMLDefinition
+class HTMLPurifier_HTMLDefinition extends HTMLPurifier_Definition
{
- /** FULLY-PUBLIC VARIABLES */
+ // FULLY-PUBLIC VARIABLES ---------------------------------------------
/**
* Associative array of element names to HTMLPurifier_ElementDef
*/
var $info_content_sets = array();
+ /**
+ * Doctype object
+ */
+ var $doctype;
+
+
+
+ // RAW CUSTOMIZATION STUFF --------------------------------------------
+
+ /**
+ * Adds a custom attribute to a pre-existing element
+ * @param $element_name String element name to add attribute to
+ * @param $attr_name String name of attribute
+ * @param $def Attribute definition, can be string or object, see
+ * HTMLPurifier_AttrTypes for details
+ */
+ function addAttribute($element_name, $attr_name, $def) {
+ $module =& $this->getAnonymousModule();
+ $element =& $module->addBlankElement($element_name);
+ $element->attr[$attr_name] = $def;
+ }
+
+ /**
+ * Adds a custom element to your HTML definition
+ * @note See HTMLPurifier_HTMLModule::addElement for detailed
+ * parameter descriptions.
+ */
+ function addElement($element_name, $type, $contents, $attr_collections, $attributes) {
+ $module =& $this->getAnonymousModule();
+ // assume that if the user is calling this, the element
+ // is safe. This may not be a good idea
+ $module->addElement($element_name, true, $type, $contents, $attr_collections, $attributes);
+ }
+
+ /**
+ * Retrieves a reference to the anonymous module, so you can
+ * bust out advanced features without having to make your own
+ * module.
+ */
+ function &getAnonymousModule() {
+ if (!$this->_anonModule) {
+ $this->_anonModule = new HTMLPurifier_HTMLModule();
+ $this->_anonModule->name = 'Anonymous';
+ }
+ return $this->_anonModule;
+ }
+ var $_anonModule;
- /** PUBLIC BUT INTERNAL VARIABLES */
- var $setup = false; /**< Has setup() been called yet? */
- var $config; /**< Temporary instance of HTMLPurifier_Config */
+ // PUBLIC BUT INTERNAL VARIABLES --------------------------------------
+ var $type = 'HTML';
var $manager; /**< Instance of HTMLPurifier_HTMLModuleManager */
/**
* Performs low-cost, preliminary initialization.
- * @param $config Instance of HTMLPurifier_Config
*/
- function HTMLPurifier_HTMLDefinition(&$config) {
- $this->config =& $config;
+ function HTMLPurifier_HTMLDefinition() {
$this->manager = new HTMLPurifier_HTMLModuleManager();
}
- /**
- * Processes internals into form usable by HTMLPurifier internals.
- * Modifying the definition after calling this function should not
- * be done.
- */
- function setup() {
-
- // multiple call guard
- if ($this->setup) {return;} else {$this->setup = true;}
-
- $this->processModules();
- $this->setupConfigStuff();
-
- unset($this->config);
+ function doSetup($config) {
+ $this->processModules($config);
+ $this->setupConfigStuff($config);
unset($this->manager);
+ // cleanup some of the element definitions
+ foreach ($this->info as $k => $v) {
+ unset($this->info[$k]->content_model);
+ unset($this->info[$k]->content_model_type);
+ }
}
/**
* Extract out the information from the manager
*/
- function processModules() {
+ function processModules($config) {
+
+ if ($this->_anonModule) {
+ // for user specific changes
+ // this is late-loaded so we don't have to deal with PHP4
+ // reference wonky-ness
+ $this->manager->addModule($this->_anonModule);
+ unset($this->_anonModule);
+ }
- $this->manager->setup($this->config);
+ $this->manager->setup($config);
+ $this->doctype = $this->manager->doctype;
- foreach ($this->manager->activeModules as $module) {
+ foreach ($this->manager->modules as $module) {
foreach($module->info_tag_transform as $k => $v) {
if ($v === false) unset($this->info_tag_transform[$k]);
else $this->info_tag_transform[$k] = $v;
}
}
- $this->info = $this->manager->getElements($this->config);
+ $this->info = $this->manager->getElements();
$this->info_content_sets = $this->manager->contentSets->lookup;
}
/**
* Sets up stuff based on config. We need a better way of doing this.
*/
- function setupConfigStuff() {
+ function setupConfigStuff($config) {
- $block_wrapper = $this->config->get('HTML', 'BlockWrapper');
+ $block_wrapper = $config->get('HTML', 'BlockWrapper');
if (isset($this->info_content_sets['Block'][$block_wrapper])) {
$this->info_block_wrapper = $block_wrapper;
} else {
E_USER_ERROR);
}
- $parent = $this->config->get('HTML', 'Parent');
- $def = $this->manager->getElement($parent, $this->config);
+ $parent = $config->get('HTML', 'Parent');
+ $def = $this->manager->getElement($parent, true);
if ($def) {
$this->info_parent = $parent;
$this->info_parent_def = $def;
} else {
trigger_error('Cannot use unrecognized element as parent.',
E_USER_ERROR);
- $this->info_parent_def = $this->manager->getElement(
- $this->info_parent, $this->config);
+ $this->info_parent_def = $this->manager->getElement($this->info_parent, true);
}
// support template text
$support = "(for information on implementing this, see the ".
"support forums) ";
- // setup allowed elements, SubtractiveWhitelist module
- $allowed_elements = $this->config->get('HTML', 'AllowedElements');
+ // setup allowed elements
+
+ $allowed_elements = $config->get('HTML', 'AllowedElements');
+ $allowed_attributes = $config->get('HTML', 'AllowedAttributes');
+
+ if (!is_array($allowed_elements) && !is_array($allowed_attributes)) {
+ $allowed = $config->get('HTML', 'Allowed');
+ if (is_string($allowed)) {
+ list($allowed_elements, $allowed_attributes) = $this->parseTinyMCEAllowedList($allowed);
+ }
+ }
+
if (is_array($allowed_elements)) {
foreach ($this->info as $name => $d) {
if(!isset($allowed_elements[$name])) unset($this->info[$name]);
}
// emit errors
foreach ($allowed_elements as $element => $d) {
+ $element = htmlspecialchars($element);
trigger_error("Element '$element' is not supported $support", E_USER_WARNING);
}
}
- $allowed_attributes = $this->config->get('HTML', 'AllowedAttributes');
$allowed_attributes_mutable = $allowed_attributes; // by copy!
if (is_array($allowed_attributes)) {
foreach ($this->info_global_attr as $attr_key => $info) {
// emit errors
foreach ($allowed_attributes_mutable as $elattr => $d) {
list($element, $attribute) = explode('.', $elattr);
+ $element = htmlspecialchars($element);
+ $attribute = htmlspecialchars($attribute);
if ($element == '*') {
trigger_error("Global attribute '$attribute' is not ".
"supported in any elements $support",
}
+ /**
+ * Parses a TinyMCE-flavored Allowed Elements and Attributes list into
+ * separate lists for processing. Format is element[attr1|attr2],element2...
+ * @warning Although it's largely drawn from TinyMCE's implementation,
+ * it is different, and you'll probably have to modify your lists
+ * @param $list String list to parse
+ * @param array($allowed_elements, $allowed_attributes)
+ */
+ function parseTinyMCEAllowedList($list) {
+
+ $elements = array();
+ $attributes = array();
+
+ $chunks = preg_split('/(,|[\n\r]+)/', $list);
+ foreach ($chunks as $chunk) {
+ if (empty($chunk)) continue;
+ // remove TinyMCE element control characters
+ if (!strpos($chunk, '[')) {
+ $element = $chunk;
+ $attr = false;
+ } else {
+ list($element, $attr) = explode('[', $chunk);
+ }
+ if ($element !== '*') $elements[$element] = true;
+ if (!$attr) continue;
+ $attr = substr($attr, 0, strlen($attr) - 1); // remove trailing ]
+ $attr = explode('|', $attr);
+ foreach ($attr as $key) {
+ $attributes["$element.$key"] = true;
+ }
+ }
+
+ return array($elements, $attributes);
+
+ }
+
}
-?>
+
class HTMLPurifier_HTMLModule
{
+
+ // -- Overloadable ----------------------------------------------------
+
/**
* Short unique string identifier of the module
*/
var $name;
- /**
- * Dynamically set integer that specifies when the module was loaded in.
- */
- var $order;
-
/**
* Informally, a list of elements this module changes. Not used in
* any significant way.
*/
function getChildDef($def) {return false;}
+ // -- Convenience -----------------------------------------------------
+
+ /**
+ * Convenience function that sets up a new element
+ * @param $element Name of element to add
+ * @param $safe Is element safe for untrusted users to use?
+ * @param $type What content set should element be registered to?
+ * Set as false to skip this step.
+ * @param $contents Allowed children in form of:
+ * "$content_model_type: $content_model"
+ * @param $attr_includes What attribute collections to register to
+ * element?
+ * @param $attr What unique attributes does the element define?
+ * @note See ElementDef for in-depth descriptions of these parameters.
+ * @return Reference to created element definition object, so you
+ * can set advanced parameters
+ * @protected
+ */
+ function &addElement($element, $safe, $type, $contents, $attr_includes = array(), $attr = array()) {
+ $this->elements[] = $element;
+ // parse content_model
+ list($content_model_type, $content_model) = $this->parseContents($contents);
+ // merge in attribute inclusions
+ $this->mergeInAttrIncludes($attr, $attr_includes);
+ // add element to content sets
+ if ($type) $this->addElementToContentSet($element, $type);
+ // create element
+ $this->info[$element] = HTMLPurifier_ElementDef::create(
+ $safe, $content_model, $content_model_type, $attr
+ );
+ // literal object $contents means direct child manipulation
+ if (!is_string($contents)) $this->info[$element]->child = $contents;
+ return $this->info[$element];
+ }
+
+ /**
+ * Convenience function that creates a totally blank, non-standalone
+ * element.
+ * @param $element Name of element to create
+ * @return Reference to created element
+ */
+ function &addBlankElement($element) {
+ if (!isset($this->info[$element])) {
+ $this->elements[] = $element;
+ $this->info[$element] = new HTMLPurifier_ElementDef();
+ $this->info[$element]->standalone = false;
+ } else {
+ trigger_error("Definition for $element already exists in module, cannot redefine");
+ }
+ return $this->info[$element];
+ }
+
/**
- * Hook method that lets module perform arbitrary operations on
- * HTMLPurifier_HTMLDefinition before the module gets processed.
- * @param $definition Reference to HTMLDefinition being setup
+ * Convenience function that registers an element to a content set
+ * @param Element to register
+ * @param Name content set (warning: case sensitive, usually upper-case
+ * first letter)
+ * @protected
*/
- function preProcess(&$definition) {}
+ function addElementToContentSet($element, $type) {
+ if (!isset($this->content_sets[$type])) $this->content_sets[$type] = '';
+ else $this->content_sets[$type] .= ' | ';
+ $this->content_sets[$type] .= $element;
+ }
/**
- * Hook method that lets module perform arbitrary operations
- * on HTMLPurifier_HTMLDefinition after the module gets processed.
- * @param $definition Reference to HTMLDefinition being setup
+ * Convenience function that transforms single-string contents
+ * into separate content model and content model type
+ * @param $contents Allowed children in form of:
+ * "$content_model_type: $content_model"
+ * @note If contents is an object, an array of two nulls will be
+ * returned, and the callee needs to take the original $contents
+ * and use it directly.
*/
- function postProcess(&$definition) {}
+ function parseContents($contents) {
+ if (!is_string($contents)) return array(null, null); // defer
+ switch ($contents) {
+ // check for shorthand content model forms
+ case 'Empty':
+ return array('empty', '');
+ case 'Inline':
+ return array('optional', 'Inline | #PCDATA');
+ case 'Flow':
+ return array('optional', 'Flow | #PCDATA');
+ }
+ list($content_model_type, $content_model) = explode(':', $contents);
+ $content_model_type = strtolower(trim($content_model_type));
+ $content_model = trim($content_model);
+ return array($content_model_type, $content_model);
+ }
/**
- * Hook method that is called when a module gets registered to
- * the definition.
- * @param $definition Reference to HTMLDefinition being setup
+ * Convenience function that merges a list of attribute includes into
+ * an attribute array.
+ * @param $attr Reference to attr array to modify
+ * @param $attr_includes Array of includes / string include to merge in
*/
- function setup(&$definition) {}
+ function mergeInAttrIncludes(&$attr, $attr_includes) {
+ if (!is_array($attr_includes)) {
+ if (empty($attr_includes)) $attr_includes = array();
+ else $attr_includes = array($attr_includes);
+ }
+ $attr[0] = $attr_includes;
+ }
+ /**
+ * Convenience function that generates a lookup table with boolean
+ * true as value.
+ * @param $list List of values to turn into a lookup
+ * @note You can also pass an arbitrary number of arguments in
+ * place of the regular argument
+ * @return Lookup array equivalent of list
+ */
+ function makeLookup($list) {
+ if (is_string($list)) $list = func_get_args();
+ $ret = array();
+ foreach ($list as $value) {
+ if (is_null($value)) continue;
+ $ret[$value] = true;
+ }
+ return $ret;
+ }
}
-?>
\ No newline at end of file
{
var $name = 'Bdo';
- var $elements = array('bdo');
- var $content_sets = array('Inline' => 'bdo');
var $attr_collections = array(
'I18N' => array('dir' => false)
);
function HTMLPurifier_HTMLModule_Bdo() {
- $dir = new HTMLPurifier_AttrDef_Enum(array('ltr','rtl'), false);
- $this->attr_collections['I18N']['dir'] = $dir;
- $this->info['bdo'] = new HTMLPurifier_ElementDef();
- $this->info['bdo']->attr = array(
- 0 => array('Core', 'Lang'),
- 'dir' => $dir, // required
- // The Abstract Module specification has the attribute
- // inclusions wrong for bdo: bdo allows
- // xml:lang too (and we'll toss in lang for good measure,
- // though it is not allowed for XHTML 1.1, this will
- // be managed with a global attribute transform)
+ $bdo =& $this->addElement(
+ 'bdo', true, 'Inline', 'Inline', array('Core', 'Lang'),
+ array(
+ 'dir' => 'Enum#ltr,rtl', // required
+ // The Abstract Module specification has the attribute
+ // inclusions wrong for bdo: bdo allows Lang
+ )
);
- $this->info['bdo']->content_model = '#PCDATA | Inline';
- $this->info['bdo']->content_model_type = 'optional';
- // provides fallback behavior if dir's missing (dir is required)
- $this->info['bdo']->attr_transform_post['required-dir'] =
- new HTMLPurifier_AttrTransform_BdoDir();
+ $bdo->attr_transform_post['required-dir'] = new HTMLPurifier_AttrTransform_BdoDir();
+
+ $this->attr_collections['I18N']['dir'] = 'Enum#ltr,rtl';
}
}
-?>
\ No newline at end of file
<?php
+require_once 'HTMLPurifier/HTMLModule.php';
+
class HTMLPurifier_HTMLModule_CommonAttributes extends HTMLPurifier_HTMLModule
{
var $name = 'CommonAttributes';
'id' => 'ID',
'title' => 'CDATA',
),
- 'Lang' => array(
- 'xml:lang' => false, // see constructor
- ),
+ 'Lang' => array(),
'I18N' => array(
0 => array('Lang'), // proprietary, for xml:lang/lang
),
0 => array('Core', 'I18N')
)
);
-
- function HTMLPurifier_HTMLModule_CommonAttributes() {
- $this->attr_collections['Lang']['xml:lang'] = new HTMLPurifier_AttrDef_Lang();
- }
}
-?>
\ No newline at end of file
{
var $name = 'Edit';
- var $elements = array('del', 'ins');
- var $content_sets = array('Inline' => 'del | ins');
function HTMLPurifier_HTMLModule_Edit() {
- foreach ($this->elements as $element) {
- $this->info[$element] = new HTMLPurifier_ElementDef();
- $this->info[$element]->attr = array(
- 0 => array('Common'),
- 'cite' => 'URI',
- // 'datetime' => 'Datetime' // Datetime not implemented
- );
- // Inline context ! Block context (exclamation mark is
- // separator, see getChildDef for parsing)
- $this->info[$element]->content_model =
- '#PCDATA | Inline ! #PCDATA | Flow';
- // HTML 4.01 specifies that ins/del must not contain block
- // elements when used in an inline context, chameleon is
- // a complicated workaround to acheive this effect
- $this->info[$element]->content_model_type = 'chameleon';
- }
+ $contents = 'Chameleon: #PCDATA | Inline ! #PCDATA | Flow';
+ $attr = array(
+ 'cite' => 'URI',
+ // 'datetime' => 'Datetime', // not implemented
+ );
+ $this->addElement('del', true, 'Inline', $contents, 'Common', $attr);
+ $this->addElement('ins', true, 'Inline', $contents, 'Common', $attr);
}
+ // HTML 4.01 specifies that ins/del must not contain block
+ // elements when used in an inline context, chameleon is
+ // a complicated workaround to acheive this effect
+
+ // Inline context ! Block context (exclamation mark is
+ // separator, see getChildDef for parsing)
+
var $defines_child_def = true;
function getChildDef($def) {
if ($def->content_model_type != 'chameleon') return false;
}
-?>
\ No newline at end of file
{
var $name = 'Hypertext';
- var $elements = array('a');
- var $content_sets = array('Inline' => 'a');
function HTMLPurifier_HTMLModule_Hypertext() {
- $this->info['a'] = new HTMLPurifier_ElementDef();
- $this->info['a']->attr = array(
- 0 => array('Common'),
- // 'accesskey' => 'Character',
- // 'charset' => 'Charset',
- 'href' => 'URI',
- //'hreflang' => 'LanguageCode',
- 'rel' => new HTMLPurifier_AttrDef_HTML_LinkTypes('rel'),
- 'rev' => new HTMLPurifier_AttrDef_HTML_LinkTypes('rev'),
- //'tabindex' => 'Number',
- //'type' => 'ContentType',
+ $a =& $this->addElement(
+ 'a', true, 'Inline', 'Inline', 'Common',
+ array(
+ // 'accesskey' => 'Character',
+ // 'charset' => 'Charset',
+ 'href' => 'URI',
+ // 'hreflang' => 'LanguageCode',
+ 'rel' => new HTMLPurifier_AttrDef_HTML_LinkTypes('rel'),
+ 'rev' => new HTMLPurifier_AttrDef_HTML_LinkTypes('rev'),
+ // 'tabindex' => 'Number',
+ // 'type' => 'ContentType',
+ )
);
- $this->info['a']->content_model = '#PCDATA | Inline';
- $this->info['a']->content_model_type = 'optional';
- $this->info['a']->excludes = array('a' => true);
+ $a->excludes = array('a' => true);
}
}
-?>
\ No newline at end of file
{
var $name = 'Image';
- var $elements = array('img');
- var $content_sets = array('Inline' => 'img');
function HTMLPurifier_HTMLModule_Image() {
- $this->info['img'] = new HTMLPurifier_ElementDef();
- $this->info['img']->attr = array(
- 0 => array('Common'),
- 'alt' => 'Text',
- 'height' => 'Length',
- 'longdesc' => 'URI',
- 'src' => new HTMLPurifier_AttrDef_URI(true), // embedded
- 'width' => 'Length'
+ $img =& $this->addElement(
+ 'img', true, 'Inline', 'Empty', 'Common',
+ array(
+ 'alt*' => 'Text',
+ 'height' => 'Length',
+ 'longdesc' => 'URI',
+ 'src*' => new HTMLPurifier_AttrDef_URI(true), // embedded
+ 'width' => 'Length'
+ )
);
- $this->info['img']->content_model_type = 'empty';
- $this->info['img']->attr_transform_post[] =
+ // kind of strange, but splitting things up would be inefficient
+ $img->attr_transform_pre[] =
+ $img->attr_transform_post[] =
new HTMLPurifier_AttrTransform_ImgRequired();
}
}
-?>
\ No newline at end of file
<?php
+require_once 'HTMLPurifier/AttrDef/HTML/Bool.php';
+
/**
* XHTML 1.1 Legacy module defines elements that were previously
* deprecated.
// incomplete
var $name = 'Legacy';
- var $elements = array('u', 's', 'strike');
- var $non_standalone_elements = array('li', 'ol', 'address', 'blockquote');
function HTMLPurifier_HTMLModule_Legacy() {
- // setup new elements
- foreach ($this->elements as $name) {
- $this->info[$name] = new HTMLPurifier_ElementDef();
- // for u, s, strike, as more elements get added, add
- // conditionals as necessary
- $this->info[$name]->content_model = 'Inline | #PCDATA';
- $this->info[$name]->content_model_type = 'optional';
- $this->info[$name]->attr[0] = array('Common');
- }
+
+ $this->addElement('basefont', true, 'Inline', 'Empty', false, array(
+ 'color' => 'Color',
+ 'face' => 'Text', // extremely broad, we should
+ 'size' => 'Text', // tighten it
+ 'id' => 'ID'
+ ));
+ $this->addElement('center', true, 'Block', 'Flow', 'Common');
+ $this->addElement('dir', true, 'Block', 'Required: li', 'Common', array(
+ 'compact' => 'Bool#compact'
+ ));
+ $this->addElement('font', true, 'Inline', 'Inline', array('Core', 'I18N'), array(
+ 'color' => 'Color',
+ 'face' => 'Text', // extremely broad, we should
+ 'size' => 'Text', // tighten it
+ ));
+ $this->addElement('menu', true, 'Block', 'Required: li', 'Common', array(
+ 'compact' => 'Bool#compact'
+ ));
+ $this->addElement('s', true, 'Inline', 'Inline', 'Common');
+ $this->addElement('strike', true, 'Inline', 'Inline', 'Common');
+ $this->addElement('u', true, 'Inline', 'Inline', 'Common');
// setup modifications to old elements
- foreach ($this->non_standalone_elements as $name) {
- $this->info[$name] = new HTMLPurifier_ElementDef();
- $this->info[$name]->standalone = false;
+
+ $align = 'Enum#left,right,center,justify';
+
+ $address =& $this->addBlankElement('address');
+ $address->content_model = 'Inline | #PCDATA | p';
+ $address->content_model_type = 'optional';
+ $address->child = false;
+
+ $blockquote =& $this->addBlankElement('blockquote');
+ $blockquote->content_model = 'Flow | #PCDATA';
+ $blockquote->content_model_type = 'optional';
+ $blockquote->child = false;
+
+ $br =& $this->addBlankElement('br');
+ $br->attr['clear'] = 'Enum#left,all,right,none';
+
+ $caption =& $this->addBlankElement('caption');
+ $caption->attr['align'] = 'Enum#top,bottom,left,right';
+
+ $div =& $this->addBlankElement('div');
+ $div->attr['align'] = $align;
+
+ $dl =& $this->addBlankElement('dl');
+ $dl->attr['compact'] = 'Bool#compact';
+
+ for ($i = 1; $i <= 6; $i++) {
+ $h =& $this->addBlankElement("h$i");
+ $h->attr['align'] = $align;
}
- $this->info['li']->attr['value'] = new HTMLPurifier_AttrDef_Integer();
- $this->info['ol']->attr['start'] = new HTMLPurifier_AttrDef_Integer();
+ $hr =& $this->addBlankElement('hr');
+ $hr->attr['align'] = $align;
+ $hr->attr['noshade'] = 'Bool#noshade';
+ $hr->attr['size'] = 'Pixels';
+ $hr->attr['width'] = 'Length';
+
+ $img =& $this->addBlankElement('img');
+ $img->attr['align'] = 'Enum#top,middle,bottom,left,right';
+ $img->attr['border'] = 'Pixels';
+ $img->attr['hspace'] = 'Pixels';
+ $img->attr['vspace'] = 'Pixels';
+
+ // figure out this integer business
+
+ $li =& $this->addBlankElement('li');
+ $li->attr['value'] = new HTMLPurifier_AttrDef_Integer();
+ $li->attr['type'] = 'Enum#s:1,i,I,a,A,disc,square,circle';
+
+ $ol =& $this->addBlankElement('ol');
+ $ol->attr['compact'] = 'Bool#compact';
+ $ol->attr['start'] = new HTMLPurifier_AttrDef_Integer();
+ $ol->attr['type'] = 'Enum#s:1,i,I,a,A';
+
+ $p =& $this->addBlankElement('p');
+ $p->attr['align'] = $align;
+
+ $pre =& $this->addBlankElement('pre');
+ $pre->attr['width'] = 'Number';
+
+ // script omitted
+
+ $table =& $this->addBlankElement('table');
+ $table->attr['align'] = 'Enum#left,center,right';
+ $table->attr['bgcolor'] = 'Color';
+
+ $tr =& $this->addBlankElement('tr');
+ $tr->attr['bgcolor'] = 'Color';
+
+ $th =& $this->addBlankElement('th');
+ $th->attr['bgcolor'] = 'Color';
+ $th->attr['height'] = 'Length';
+ $th->attr['nowrap'] = 'Bool#nowrap';
+ $th->attr['width'] = 'Length';
- $this->info['address']->content_model = 'Inline | #PCDATA | p';
- $this->info['address']->content_model_type = 'optional';
- $this->info['address']->child = false;
+ $td =& $this->addBlankElement('td');
+ $td->attr['bgcolor'] = 'Color';
+ $td->attr['height'] = 'Length';
+ $td->attr['nowrap'] = 'Bool#nowrap';
+ $td->attr['width'] = 'Length';
- $this->info['blockquote']->content_model = 'Flow | #PCDATA';
- $this->info['blockquote']->content_model_type = 'optional';
- $this->info['blockquote']->child = false;
+ $ul =& $this->addBlankElement('ul');
+ $ul->attr['compact'] = 'Bool#compact';
+ $ul->attr['type'] = 'Enum#square,disc,circle';
}
}
-?>
\ No newline at end of file
{
var $name = 'List';
- var $elements = array('dl', 'dt', 'dd', 'ol', 'ul', 'li');
// According to the abstract schema, the List content set is a fully formed
// one or more expr, but it invariably occurs in an optional declaration
// Furthermore, the actual XML Schema may disagree. Regardless,
// we don't have support for such nested expressions without using
// the incredibly inefficient and draconic Custom ChildDef.
- var $content_sets = array('List' => 'dl | ol | ul', 'Flow' => 'List');
+
+ var $content_sets = array('Flow' => 'List');
function HTMLPurifier_HTMLModule_List() {
- foreach ($this->elements as $element) {
- $this->info[$element] = new HTMLPurifier_ElementDef();
- $this->info[$element]->attr = array(0 => array('Common'));
- if ($element == 'li' || $element == 'dd') {
- $this->info[$element]->content_model = '#PCDATA | Flow';
- $this->info[$element]->content_model_type = 'optional';
- } elseif ($element == 'ol' || $element == 'ul') {
- $this->info[$element]->content_model = 'li';
- $this->info[$element]->content_model_type = 'required';
- }
- }
- $this->info['dt']->content_model = '#PCDATA | Inline';
- $this->info['dt']->content_model_type = 'optional';
- $this->info['dl']->content_model = 'dt | dd';
- $this->info['dl']->content_model_type = 'required';
- // this could be a LOT more robust
- $this->info['li']->auto_close = array('li' => true);
+ $this->addElement('ol', true, 'List', 'Required: li', 'Common');
+ $this->addElement('ul', true, 'List', 'Required: li', 'Common');
+ $this->addElement('dl', true, 'List', 'Required: dt | dd', 'Common');
+
+ $this->addElement('li', true, false, 'Flow', 'Common');
+
+ $this->addElement('dd', true, false, 'Flow', 'Common');
+ $this->addElement('dt', true, false, 'Inline', 'Common');
}
}
-?>
\ No newline at end of file
--- /dev/null
+<?php
+
+require_once 'HTMLPurifier/HTMLModule.php';
+
+class HTMLPurifier_HTMLModule_NonXMLCommonAttributes extends HTMLPurifier_HTMLModule
+{
+ var $name = 'NonXMLCommonAttributes';
+
+ var $attr_collections = array(
+ 'Lang' => array(
+ 'lang' => 'LanguageCode',
+ )
+ );
+}
+
{
var $name = 'Presentation';
- var $elements = array('b', 'big', 'hr', 'i', 'small', 'sub', 'sup', 'tt');
- var $content_sets = array(
- 'Block' => 'hr',
- 'Inline' => 'b | big | i | small | sub | sup | tt'
- );
function HTMLPurifier_HTMLModule_Presentation() {
- foreach ($this->elements as $element) {
- $this->info[$element] = new HTMLPurifier_ElementDef();
- $this->info[$element]->attr = array(0 => array('Common'));
- if ($element == 'hr') {
- $this->info[$element]->content_model_type = 'empty';
- } else {
- $this->info[$element]->content_model = '#PCDATA | Inline';
- $this->info[$element]->content_model_type = 'optional';
- }
- }
+ $this->addElement('b', true, 'Inline', 'Inline', 'Common');
+ $this->addElement('big', true, 'Inline', 'Inline', 'Common');
+ $this->addElement('hr', true, 'Block', 'Empty', 'Common');
+ $this->addElement('i', true, 'Inline', 'Inline', 'Common');
+ $this->addElement('small', true, 'Inline', 'Inline', 'Common');
+ $this->addElement('sub', true, 'Inline', 'Inline', 'Common');
+ $this->addElement('sup', true, 'Inline', 'Inline', 'Common');
+ $this->addElement('tt', true, 'Inline', 'Inline', 'Common');
}
}
-?>
\ No newline at end of file
--- /dev/null
+<?php
+
+require_once 'HTMLPurifier/HTMLModule.php';
+
+/**
+ * XHTML 1.1 Ruby Annotation Module, defines elements that indicate
+ * short runs of text alongside base text for annotation or pronounciation.
+ */
+class HTMLPurifier_HTMLModule_Ruby extends HTMLPurifier_HTMLModule
+{
+
+ var $name = 'Ruby';
+
+ function HTMLPurifier_HTMLModule_Ruby() {
+ $this->addElement('ruby', true, 'Inline',
+ 'Custom: ((rb, (rt | (rp, rt, rp))) | (rbc, rtc, rtc?))',
+ 'Common');
+ $this->addElement('rbc', true, false, 'Required: rb', 'Common');
+ $this->addElement('rtc', true, false, 'Required: rt', 'Common');
+ $rb =& $this->addElement('rb', true, false, 'Inline', 'Common');
+ $rb->excludes = array('ruby' => true);
+ $rt =& $this->addElement('rt', true, false, 'Inline', 'Common', array('rbspan' => 'Number'));
+ $rt->excludes = array('ruby' => true);
+ $this->addElement('rp', true, false, 'Optional: #PCDATA', 'Common');
+ }
+
+}
+
WARNING: THIS MODULE IS EXTREMELY DANGEROUS AS IT ENABLES INLINE SCRIPTING
INSIDE HTML PURIFIER DOCUMENTS. USE ONLY WITH TRUSTED USER INPUT!!!
-Usage:
-
-require_once 'HTMLPurifier/HTMLModule/Scripting.php';
-$def =& $config->getHTMLDefinition(true); // get the raw version
-$def->manager->addModule('Scripting');
-
-This must come before any other calls to getHTMLDefinition()
-
*/
/**
// blockquote's custom definition (we would use it but
// blockquote's contents are optional while noscript's contents
// are required)
+
+ // TODO: convert this to new syntax, main problem is getting
+ // both content sets working
foreach ($this->elements as $element) {
$this->info[$element] = new HTMLPurifier_ElementDef();
+ $this->info[$element]->safe = false;
}
$this->info['noscript']->attr = array( 0 => array('Common') );
$this->info['noscript']->content_model = 'Heading | List | Block';
);
$this->info['script']->content_model = '#PCDATA';
$this->info['script']->content_model_type = 'optional';
+ $this->info['script']->attr_transform_pre['type'] =
$this->info['script']->attr_transform_post['type'] =
new HTMLPurifier_AttrTransform_ScriptRequired();
}
}
-?>
\ No newline at end of file
}
-?>
\ No newline at end of file
{
var $name = 'Tables';
- var $elements = array('caption', 'table', 'td', 'th', 'tr', 'col',
- 'colgroup', 'tbody', 'thead', 'tfoot');
- var $content_sets = array('Block' => 'table');
function HTMLPurifier_HTMLModule_Tables() {
- foreach ($this->elements as $e) {
- $this->info[$e] = new HTMLPurifier_ElementDef();
- $this->info[$e]->attr = array(0 => array('Common'));
- $attr =& $this->info[$e]->attr;
- if ($e == 'caption') continue;
- if ($e == 'table'){
- $attr['border'] = 'Pixels';
- $attr['cellpadding'] = 'Length';
- $attr['cellspacing'] = 'Length';
- $attr['frame'] = new HTMLPurifier_AttrDef_Enum(array(
- 'void', 'above', 'below', 'hsides', 'lhs', 'rhs',
- 'vsides', 'box', 'border'
- ), false);
- $attr['rules'] = new HTMLPurifier_AttrDef_Enum(array(
- 'none', 'groups', 'rows', 'cols', 'all'
- ), false);
- $attr['summary'] = 'Text';
- $attr['width'] = 'Length';
- continue;
- }
- if ($e == 'col' || $e == 'colgroup') {
- $attr['span'] = 'Number';
- $attr['width'] = 'MultiLength';
- }
- if ($e == 'td' || $e == 'th') {
- $attr['abbr'] = 'Text';
- $attr['colspan'] = 'Number';
- $attr['rowspan'] = 'Number';
- }
- $attr['align'] = new HTMLPurifier_AttrDef_Enum(array(
- 'left', 'center', 'right', 'justify', 'char'
- ), false);
- $attr['valign'] = new HTMLPurifier_AttrDef_Enum(array(
- 'top', 'middle', 'bottom', 'baseline'
- ), false);
- $attr['charoff'] = 'Length';
- }
- $this->info['caption']->content_model = '#PCDATA | Inline';
- $this->info['caption']->content_model_type = 'optional';
- // Is done directly because it doesn't leverage substitution
- // mechanisms. True model is:
- // 'caption?, ( col* | colgroup* ), (( thead?, tfoot?, tbody+ ) | ( tr+ ))'
- $this->info['table']->child = new HTMLPurifier_ChildDef_Table();
+ $this->addElement('caption', true, false, 'Inline', 'Common');
- $this->info['td']->content_model =
- $this->info['th']->content_model = '#PCDATA | Flow';
- $this->info['td']->content_model_type =
- $this->info['th']->content_model_type = 'optional';
+ $this->addElement('table', true, 'Block',
+ new HTMLPurifier_ChildDef_Table(), 'Common',
+ array(
+ 'border' => 'Pixels',
+ 'cellpadding' => 'Length',
+ 'cellspacing' => 'Length',
+ 'frame' => 'Enum#void,above,below,hsides,lhs,rhs,vsides,box,border',
+ 'rules' => 'Enum#none,groups,rows,cols,all',
+ 'summary' => 'Text',
+ 'width' => 'Length'
+ )
+ );
- $this->info['tr']->content_model = 'td | th';
- $this->info['tr']->content_model_type = 'required';
+ // common attributes
+ $cell_align = array(
+ 'align' => 'Enum#left,center,right,justify,char',
+ 'charoff' => 'Length',
+ 'valign' => 'Enum#top,middle,bottom,baseline',
+ );
- $this->info['col']->content_model_type = 'empty';
+ $cell_t = array_merge(
+ array(
+ 'abbr' => 'Text',
+ 'colspan' => 'Number',
+ 'rowspan' => 'Number',
+ ),
+ $cell_align
+ );
+ $this->addElement('td', true, false, 'Flow', 'Common', $cell_t);
+ $this->addElement('th', true, false, 'Flow', 'Common', $cell_t);
- $this->info['colgroup']->content_model = 'col';
- $this->info['colgroup']->content_model_type = 'optional';
+ $this->addElement('tr', true, false, 'Required: td | th', 'Common', $cell_align);
- $this->info['tbody']->content_model =
- $this->info['thead']->content_model =
- $this->info['tfoot']->content_model = 'tr';
- $this->info['tbody']->content_model_type =
- $this->info['thead']->content_model_type =
- $this->info['tfoot']->content_model_type = 'required';
+ $cell_col = array_merge(
+ array(
+ 'span' => 'Number',
+ 'width' => 'MultiLength',
+ ),
+ $cell_align
+ );
+ $this->addElement('col', true, false, 'Empty', 'Common', $cell_col);
+ $this->addElement('colgroup', true, false, 'Optional: col', 'Common', $cell_col);
+
+ $this->addElement('tbody', true, false, 'Required: tr', 'Common', $cell_align);
+ $this->addElement('thead', true, false, 'Required: tr', 'Common', $cell_align);
+ $this->addElement('tfoot', true, false, 'Required: tr', 'Common', $cell_align);
}
}
-?>
\ No newline at end of file
{
var $name = 'Target';
- var $elements = array('a');
function HTMLPurifier_HTMLModule_Target() {
- foreach ($this->elements as $e) {
- $this->info[$e] = new HTMLPurifier_ElementDef();
- $this->info[$e]->standalone = false;
- $this->info[$e]->attr = array(
+ $elements = array('a');
+ foreach ($elements as $name) {
+ $e =& $this->addBlankElement($name);
+ $e->attr = array(
'target' => new HTMLPurifier_AttrDef_HTML_FrameTarget()
);
}
}
-?>
\ No newline at end of file
* - Block Structural (div, p)
* - Inline Phrasal (abbr, acronym, cite, code, dfn, em, kbd, q, samp, strong, var)
* - Inline Structural (br, span)
- * We have elected not to follow suite, but this may change.
+ * This module, functionally, does not distinguish between these
+ * sub-modules, but the code is internally structured to reflect
+ * these distinctions.
*/
class HTMLPurifier_HTMLModule_Text extends HTMLPurifier_HTMLModule
{
var $name = 'Text';
-
- var $elements = array('abbr', 'acronym', 'address', 'blockquote',
- 'br', 'cite', 'code', 'dfn', 'div', 'em', 'h1', 'h2', 'h3',
- 'h4', 'h5', 'h6', 'kbd', 'p', 'pre', 'q', 'samp', 'span', 'strong',
- 'var', 'nolink', 'tex', 'algebra'); //moodle modification
-
var $content_sets = array(
- 'Heading' => 'h1 | h2 | h3 | h4 | h5 | h6',
- 'Block' => 'address | blockquote | div | p | pre | nolink | tex | algebra', //moodle modification
- 'Inline' => 'abbr | acronym | br | cite | code | dfn | em | kbd | q | samp | span | strong | var',
'Flow' => 'Heading | Block | Inline'
);
function HTMLPurifier_HTMLModule_Text() {
- foreach ($this->elements as $element) {
- $this->info[$element] = new HTMLPurifier_ElementDef();
- // attributes
- if ($element == 'br') {
- $this->info[$element]->attr = array(0 => array('Core'));
- } elseif ($element == 'blockquote' || $element == 'q') {
- $this->info[$element]->attr = array(0 => array('Common'), 'cite' => 'URI');
- } else {
- $this->info[$element]->attr = array(0 => array('Common'));
- }
- // content models
- if ($element == 'br') {
- $this->info[$element]->content_model_type = 'empty';
- } elseif ($element == 'blockquote') {
- $this->info[$element]->content_model = 'Heading | Block | List';
- $this->info[$element]->content_model_type = 'optional';
- } elseif ($element == 'div') {
- $this->info[$element]->content_model = '#PCDATA | Flow';
- $this->info[$element]->content_model_type = 'optional';
- } else {
- $this->info[$element]->content_model = '#PCDATA | Inline';
- $this->info[$element]->content_model_type = 'optional';
- }
- }
- // SGML permits exclusions for all descendants, but this is
- // not possible with DTDs or XML Schemas. W3C has elected to
- // use complicated compositions of content_models to simulate
- // exclusion for children, but we go the simpler, SGML-style
- // route of flat-out exclusions. Note that the Abstract Module
- // is blithely unaware of such distinctions.
- $this->info['pre']->excludes = array_flip(array(
- 'img', 'big', 'small',
- 'object', 'applet', 'font', 'basefont' // generally not allowed
- ));
- $this->info['p']->auto_close = array_flip(array(
- 'address', 'blockquote', 'dd', 'dir', 'div', 'dl', 'dt',
- 'h1', 'h2', 'h3', 'h4', 'h5', 'h6', 'hr', 'ol', 'p', 'pre',
- 'table', 'ul', 'nolink', 'tex', 'algebra' //moodle modification
- ));
+
+ // Inline Phrasal -------------------------------------------------
+ $this->addElement('abbr', true, 'Inline', 'Inline', 'Common');
+ $this->addElement('acronym', true, 'Inline', 'Inline', 'Common');
+ $this->addElement('cite', true, 'Inline', 'Inline', 'Common');
+ $this->addElement('code', true, 'Inline', 'Inline', 'Common');
+ $this->addElement('dfn', true, 'Inline', 'Inline', 'Common');
+ $this->addElement('em', true, 'Inline', 'Inline', 'Common');
+ $this->addElement('kbd', true, 'Inline', 'Inline', 'Common');
+ $this->addElement('q', true, 'Inline', 'Inline', 'Common', array('cite' => 'URI'));
+ $this->addElement('samp', true, 'Inline', 'Inline', 'Common');
+ $this->addElement('strong', true, 'Inline', 'Inline', 'Common');
+ $this->addElement('var', true, 'Inline', 'Inline', 'Common');
+
+ // Inline Structural ----------------------------------------------
+ $this->addElement('span', true, 'Inline', 'Inline', 'Common');
+ $this->addElement('br', true, 'Inline', 'Empty', 'Core');
+
+ // Moodle specific elements - start
+ $this->addElement('nolink', true, 'Inline', 'Flow');
+ $this->addElement('tex', true, 'Inline', 'Flow');
+ $this->addElement('algebra', true, 'Inline', 'Flow');
+ $this->addElement('lang', true, 'Inline', 'Flow', 'I18N');
+ // Moodle specific elements - end
+
+ // Block Phrasal --------------------------------------------------
+ $this->addElement('address', true, 'Block', 'Inline', 'Common');
+ $this->addElement('blockquote', true, 'Block', 'Optional: Heading | Block | List', 'Common', array('cite' => 'URI') );
+ $pre =& $this->addElement('pre', true, 'Block', 'Inline', 'Common');
+ $pre->excludes = $this->makeLookup(
+ 'img', 'big', 'small', 'object', 'applet', 'font', 'basefont' );
+ $this->addElement('h1', true, 'Heading', 'Inline', 'Common');
+ $this->addElement('h2', true, 'Heading', 'Inline', 'Common');
+ $this->addElement('h3', true, 'Heading', 'Inline', 'Common');
+ $this->addElement('h4', true, 'Heading', 'Inline', 'Common');
+ $this->addElement('h5', true, 'Heading', 'Inline', 'Common');
+ $this->addElement('h6', true, 'Heading', 'Inline', 'Common');
+
+ // Block Structural -----------------------------------------------
+ $this->addElement('p', true, 'Block', 'Inline', 'Common');
+ $this->addElement('div', true, 'Block', 'Flow', 'Common');
+
}
}
-?>
\ No newline at end of file
--- /dev/null
+<?php
+
+require_once 'HTMLPurifier/HTMLModule.php';
+
+HTMLPurifier_ConfigSchema::define(
+ 'HTML', 'TidyLevel', 'medium', 'string', '
+<p>General level of cleanliness the Tidy module should enforce.
+There are four allowed values:</p>
+<dl>
+ <dt>none</dt>
+ <dd>No extra tidying should be done</dd>
+ <dt>light</dt>
+ <dd>Only fix elements that would be discarded otherwise due to
+ lack of support in doctype</dd>
+ <dt>medium</dt>
+ <dd>Enforce best practices</dd>
+ <dt>heavy</dt>
+ <dd>Transform all deprecated elements and attributes to standards
+ compliant equivalents</dd>
+</dl>
+<p>This directive has been available since 2.0.0</p>
+' );
+HTMLPurifier_ConfigSchema::defineAllowedValues(
+ 'HTML', 'TidyLevel', array('none', 'light', 'medium', 'heavy')
+);
+
+HTMLPurifier_ConfigSchema::define(
+ 'HTML', 'TidyAdd', array(), 'lookup', '
+Fixes to add to the default set of Tidy fixes as per your level. This
+directive has been available since 2.0.0.
+' );
+
+HTMLPurifier_ConfigSchema::define(
+ 'HTML', 'TidyRemove', array(), 'lookup', '
+Fixes to remove from the default set of Tidy fixes as per your level. This
+directive has been available since 2.0.0.
+' );
+
+/**
+ * Abstract class for a set of proprietary modules that clean up (tidy)
+ * poorly written HTML.
+ */
+class HTMLPurifier_HTMLModule_Tidy extends HTMLPurifier_HTMLModule
+{
+
+ /**
+ * List of supported levels. Index zero is a special case "no fixes"
+ * level.
+ */
+ var $levels = array(0 => 'none', 'light', 'medium', 'heavy');
+
+ /**
+ * Default level to place all fixes in. Disabled by default
+ */
+ var $defaultLevel = null;
+
+ /**
+ * Lists of fixes used by getFixesForLevel(). Format is:
+ * HTMLModule_Tidy->fixesForLevel[$level] = array('fix-1', 'fix-2');
+ */
+ var $fixesForLevel = array(
+ 'light' => array(),
+ 'medium' => array(),
+ 'heavy' => array()
+ );
+
+ /**
+ * Lazy load constructs the module by determining the necessary
+ * fixes to create and then delegating to the populate() function.
+ * @todo Wildcard matching and error reporting when an added or
+ * subtracted fix has no effect.
+ */
+ function construct($config) {
+
+ // create fixes, initialize fixesForLevel
+ $fixes = $this->makeFixes();
+ $this->makeFixesForLevel($fixes);
+
+ // figure out which fixes to use
+ $level = $config->get('HTML', 'TidyLevel');
+ $fixes_lookup = $this->getFixesForLevel($level);
+
+ // get custom fix declarations: these need namespace processing
+ $add_fixes = $config->get('HTML', 'TidyAdd');
+ $remove_fixes = $config->get('HTML', 'TidyRemove');
+
+ foreach ($fixes as $name => $fix) {
+ // needs to be refactored a little to implement globbing
+ if (
+ isset($remove_fixes[$name]) ||
+ (!isset($add_fixes[$name]) && !isset($fixes_lookup[$name]))
+ ) {
+ unset($fixes[$name]);
+ }
+ }
+
+ // populate this module with necessary fixes
+ $this->populate($fixes);
+
+ }
+
+ /**
+ * Retrieves all fixes per a level, returning fixes for that specific
+ * level as well as all levels below it.
+ * @param $level String level identifier, see $levels for valid values
+ * @return Lookup up table of fixes
+ */
+ function getFixesForLevel($level) {
+ if ($level == $this->levels[0]) {
+ return array();
+ }
+ $activated_levels = array();
+ for ($i = 1, $c = count($this->levels); $i < $c; $i++) {
+ $activated_levels[] = $this->levels[$i];
+ if ($this->levels[$i] == $level) break;
+ }
+ if ($i == $c) {
+ trigger_error(
+ 'Tidy level ' . htmlspecialchars($level) . ' not recognized',
+ E_USER_WARNING
+ );
+ return array();
+ }
+ $ret = array();
+ foreach ($activated_levels as $level) {
+ foreach ($this->fixesForLevel[$level] as $fix) {
+ $ret[$fix] = true;
+ }
+ }
+ return $ret;
+ }
+
+ /**
+ * Dynamically populates the $fixesForLevel member variable using
+ * the fixes array. It may be custom overloaded, used in conjunction
+ * with $defaultLevel, or not used at all.
+ */
+ function makeFixesForLevel($fixes) {
+ if (!isset($this->defaultLevel)) return;
+ if (!isset($this->fixesForLevel[$this->defaultLevel])) {
+ trigger_error(
+ 'Default level ' . $this->defaultLevel . ' does not exist',
+ E_USER_ERROR
+ );
+ return;
+ }
+ $this->fixesForLevel[$this->defaultLevel] = array_keys($fixes);
+ }
+
+ /**
+ * Populates the module with transforms and other special-case code
+ * based on a list of fixes passed to it
+ * @param $lookup Lookup table of fixes to activate
+ */
+ function populate($fixes) {
+ foreach ($fixes as $name => $fix) {
+ // determine what the fix is for
+ list($type, $params) = $this->getFixType($name);
+ switch ($type) {
+ case 'attr_transform_pre':
+ case 'attr_transform_post':
+ $attr = $params['attr'];
+ if (isset($params['element'])) {
+ $element = $params['element'];
+ if (empty($this->info[$element])) {
+ $e =& $this->addBlankElement($element);
+ } else {
+ $e =& $this->info[$element];
+ }
+ } else {
+ $type = "info_$type";
+ $e =& $this;
+ }
+ $f =& $e->$type;
+ $f[$attr] = $fix;
+ break;
+ case 'tag_transform':
+ $this->info_tag_transform[$params['element']] = $fix;
+ break;
+ case 'child':
+ case 'content_model_type':
+ $element = $params['element'];
+ if (empty($this->info[$element])) {
+ $e =& $this->addBlankElement($element);
+ } else {
+ $e =& $this->info[$element];
+ }
+ $e->$type = $fix;
+ break;
+ default:
+ trigger_error("Fix type $type not supported", E_USER_ERROR);
+ break;
+ }
+ }
+ }
+
+ /**
+ * Parses a fix name and determines what kind of fix it is, as well
+ * as other information defined by the fix
+ * @param $name String name of fix
+ * @return array(string $fix_type, array $fix_parameters)
+ * @note $fix_parameters is type dependant, see populate() for usage
+ * of these parameters
+ */
+ function getFixType($name) {
+ // parse it
+ $property = $attr = null;
+ if (strpos($name, '#') !== false) list($name, $property) = explode('#', $name);
+ if (strpos($name, '@') !== false) list($name, $attr) = explode('@', $name);
+
+ // figure out the parameters
+ $params = array();
+ if ($name !== '') $params['element'] = $name;
+ if (!is_null($attr)) $params['attr'] = $attr;
+
+ // special case: attribute transform
+ if (!is_null($attr)) {
+ if (is_null($property)) $property = 'pre';
+ $type = 'attr_transform_' . $property;
+ return array($type, $params);
+ }
+
+ // special case: tag transform
+ if (is_null($property)) {
+ return array('tag_transform', $params);
+ }
+
+ return array($property, $params);
+
+ }
+
+ /**
+ * Defines all fixes the module will perform in a compact
+ * associative array of fix name to fix implementation.
+ * @abstract
+ */
+ function makeFixes() {}
+
+}
+
+
--- /dev/null
+<?php
+
+require_once 'HTMLPurifier/HTMLModule/Tidy.php';
+
+class HTMLPurifier_HTMLModule_Tidy_Proprietary extends
+ HTMLPurifier_HTMLModule_Tidy
+{
+
+ var $name = 'Tidy_Proprietary';
+ var $defaultLevel = 'light';
+
+ function makeFixes() {
+ return array();
+ }
+
+}
+
--- /dev/null
+<?php
+
+require_once 'HTMLPurifier/HTMLModule/Tidy.php';
+require_once 'HTMLPurifier/AttrTransform/Lang.php';
+
+class HTMLPurifier_HTMLModule_Tidy_XHTML extends
+ HTMLPurifier_HTMLModule_Tidy
+{
+
+ var $name = 'Tidy_XHTML';
+ var $defaultLevel = 'medium';
+
+ function makeFixes() {
+ $r = array();
+ $r['@lang'] = new HTMLPurifier_AttrTransform_Lang();
+ return $r;
+ }
+
+}
+
--- /dev/null
+<?php
+
+require_once 'HTMLPurifier/HTMLModule/Tidy.php';
+
+require_once 'HTMLPurifier/TagTransform/Simple.php';
+require_once 'HTMLPurifier/TagTransform/Font.php';
+
+require_once 'HTMLPurifier/AttrTransform/BgColor.php';
+require_once 'HTMLPurifier/AttrTransform/BoolToCSS.php';
+require_once 'HTMLPurifier/AttrTransform/Border.php';
+require_once 'HTMLPurifier/AttrTransform/Name.php';
+require_once 'HTMLPurifier/AttrTransform/Length.php';
+require_once 'HTMLPurifier/AttrTransform/ImgSpace.php';
+require_once 'HTMLPurifier/AttrTransform/EnumToCSS.php';
+
+class HTMLPurifier_HTMLModule_Tidy_XHTMLAndHTML4 extends
+ HTMLPurifier_HTMLModule_Tidy
+{
+
+ function makeFixes() {
+
+ $r = array();
+
+ // == deprecated tag transforms ===================================
+
+ $r['font'] = new HTMLPurifier_TagTransform_Font();
+ $r['menu'] = new HTMLPurifier_TagTransform_Simple('ul');
+ $r['dir'] = new HTMLPurifier_TagTransform_Simple('ul');
+ $r['center'] = new HTMLPurifier_TagTransform_Simple('div', 'text-align:center;');
+ $r['u'] = new HTMLPurifier_TagTransform_Simple('span', 'text-decoration:underline;');
+ $r['s'] = new HTMLPurifier_TagTransform_Simple('span', 'text-decoration:line-through;');
+ $r['strike'] = new HTMLPurifier_TagTransform_Simple('span', 'text-decoration:line-through;');
+
+ // == deprecated attribute transforms =============================
+
+ $r['caption@align'] =
+ new HTMLPurifier_AttrTransform_EnumToCSS('align', array(
+ // we're following IE's behavior, not Firefox's, due
+ // to the fact that no one supports caption-side:right,
+ // W3C included (with CSS 2.1). This is a slightly
+ // unreasonable attribute!
+ 'left' => 'text-align:left;',
+ 'right' => 'text-align:right;',
+ 'top' => 'caption-side:top;',
+ 'bottom' => 'caption-side:bottom;' // not supported by IE
+ ));
+
+ // @align for img -------------------------------------------------
+ $r['img@align'] =
+ new HTMLPurifier_AttrTransform_EnumToCSS('align', array(
+ 'left' => 'float:left;',
+ 'right' => 'float:right;',
+ 'top' => 'vertical-align:top;',
+ 'middle' => 'vertical-align:middle;',
+ 'bottom' => 'vertical-align:baseline;',
+ ));
+
+ // @align for table -----------------------------------------------
+ $r['table@align'] =
+ new HTMLPurifier_AttrTransform_EnumToCSS('align', array(
+ 'left' => 'float:left;',
+ 'center' => 'margin-left:auto;margin-right:auto;',
+ 'right' => 'float:right;'
+ ));
+
+ // @align for hr -----------------------------------------------
+ $r['hr@align'] =
+ new HTMLPurifier_AttrTransform_EnumToCSS('align', array(
+ // we use both text-align and margin because these work
+ // for different browsers (IE and Firefox, respectively)
+ // and the melange makes for a pretty cross-compatible
+ // solution
+ 'left' => 'margin-left:0;margin-right:auto;text-align:left;',
+ 'center' => 'margin-left:auto;margin-right:auto;text-align:center;',
+ 'right' => 'margin-left:auto;margin-right:0;text-align:right;'
+ ));
+
+ // @align for h1, h2, h3, h4, h5, h6, p, div ----------------------
+ // {{{
+ $align_lookup = array();
+ $align_values = array('left', 'right', 'center', 'justify');
+ foreach ($align_values as $v) $align_lookup[$v] = "text-align:$v;";
+ // }}}
+ $r['h1@align'] =
+ $r['h2@align'] =
+ $r['h3@align'] =
+ $r['h4@align'] =
+ $r['h5@align'] =
+ $r['h6@align'] =
+ $r['p@align'] =
+ $r['div@align'] =
+ new HTMLPurifier_AttrTransform_EnumToCSS('align', $align_lookup);
+
+ // @bgcolor for table, tr, td, th ---------------------------------
+ $r['table@bgcolor'] =
+ $r['td@bgcolor'] =
+ $r['th@bgcolor'] =
+ new HTMLPurifier_AttrTransform_BgColor();
+
+ // @border for img ------------------------------------------------
+ $r['img@border'] = new HTMLPurifier_AttrTransform_Border();
+
+ // @clear for br --------------------------------------------------
+ $r['br@clear'] =
+ new HTMLPurifier_AttrTransform_EnumToCSS('clear', array(
+ 'left' => 'clear:left;',
+ 'right' => 'clear:right;',
+ 'all' => 'clear:both;',
+ 'none' => 'clear:none;',
+ ));
+
+ // @height for td, th ---------------------------------------------
+ $r['td@height'] =
+ $r['th@height'] =
+ new HTMLPurifier_AttrTransform_Length('height');
+
+ // @hspace for img ------------------------------------------------
+ $r['img@hspace'] = new HTMLPurifier_AttrTransform_ImgSpace('hspace');
+
+ // @name for img, a -----------------------------------------------
+ $r['img@name'] =
+ $r['a@name'] = new HTMLPurifier_AttrTransform_Name();
+
+ // @noshade for hr ------------------------------------------------
+ // this transformation is not precise but often good enough.
+ // different browsers use different styles to designate noshade
+ $r['hr@noshade'] =
+ new HTMLPurifier_AttrTransform_BoolToCSS(
+ 'noshade',
+ 'color:#808080;background-color:#808080;border:0;'
+ );
+
+ // @nowrap for td, th ---------------------------------------------
+ $r['td@nowrap'] =
+ $r['th@nowrap'] =
+ new HTMLPurifier_AttrTransform_BoolToCSS(
+ 'nowrap',
+ 'white-space:nowrap;'
+ );
+
+ // @size for hr --------------------------------------------------
+ $r['hr@size'] = new HTMLPurifier_AttrTransform_Length('size', 'height');
+
+ // @type for li, ol, ul -------------------------------------------
+ // {{{
+ $ul_types = array(
+ 'disc' => 'list-style-type:disc;',
+ 'square' => 'list-style-type:square;',
+ 'circle' => 'list-style-type:circle;'
+ );
+ $ol_types = array(
+ '1' => 'list-style-type:decimal;',
+ 'i' => 'list-style-type:lower-roman;',
+ 'I' => 'list-style-type:upper-roman;',
+ 'a' => 'list-style-type:lower-alpha;',
+ 'A' => 'list-style-type:upper-alpha;'
+ );
+ $li_types = $ul_types + $ol_types;
+ // }}}
+
+ $r['ul@type'] = new HTMLPurifier_AttrTransform_EnumToCSS('type', $ul_types);
+ $r['ol@type'] = new HTMLPurifier_AttrTransform_EnumToCSS('type', $ol_types, true);
+ $r['li@type'] = new HTMLPurifier_AttrTransform_EnumToCSS('type', $li_types, true);
+
+ // @vspace for img ------------------------------------------------
+ $r['img@vspace'] = new HTMLPurifier_AttrTransform_ImgSpace('vspace');
+
+ // @width for hr, td, th ------------------------------------------
+ $r['td@width'] =
+ $r['th@width'] =
+ $r['hr@width'] = new HTMLPurifier_AttrTransform_Length('width');
+
+ return $r;
+
+ }
+
+}
+
+class HTMLPurifier_HTMLModule_Tidy_Transitional extends
+ HTMLPurifier_HTMLModule_Tidy_XHTMLAndHTML4
+{
+ var $name = 'Tidy_Transitional';
+ var $defaultLevel = 'heavy';
+}
+
+class HTMLPurifier_HTMLModule_Tidy_Strict extends
+ HTMLPurifier_HTMLModule_Tidy_XHTMLAndHTML4
+{
+ var $name = 'Tidy_Strict';
+ var $defaultLevel = 'light';
+}
+
--- /dev/null
+<?php
+
+require_once 'HTMLPurifier/HTMLModule/Tidy.php';
+require_once 'HTMLPurifier/ChildDef/StrictBlockquote.php';
+
+class HTMLPurifier_HTMLModule_Tidy_XHTMLStrict extends
+ HTMLPurifier_HTMLModule_Tidy
+{
+
+ var $name = 'Tidy_XHTMLStrict';
+ var $defaultLevel = 'light';
+
+ function makeFixes() {
+ $r = array();
+ $r['blockquote#content_model_type'] = 'strictblockquote';
+ return $r;
+ }
+
+ var $defines_child_def = true;
+ function getChildDef($def) {
+ if ($def->content_model_type != 'strictblockquote') return false;
+ return new HTMLPurifier_ChildDef_StrictBlockquote($def->content_model);
+ }
+
+}
+
+++ /dev/null
-<?php
-
-require_once 'HTMLPurifier/ChildDef/StrictBlockquote.php';
-
-require_once 'HTMLPurifier/TagTransform/Simple.php';
-require_once 'HTMLPurifier/TagTransform/Center.php';
-require_once 'HTMLPurifier/TagTransform/Font.php';
-
-require_once 'HTMLPurifier/AttrTransform/Lang.php';
-require_once 'HTMLPurifier/AttrTransform/BgColor.php';
-require_once 'HTMLPurifier/AttrTransform/BoolToCSS.php';
-require_once 'HTMLPurifier/AttrTransform/Border.php';
-require_once 'HTMLPurifier/AttrTransform/Name.php';
-require_once 'HTMLPurifier/AttrTransform/Length.php';
-require_once 'HTMLPurifier/AttrTransform/ImgSpace.php';
-require_once 'HTMLPurifier/AttrTransform/EnumToCSS.php';
-
-/**
- * Proprietary module that transforms deprecated elements into Strict
- * HTML (see HTML 4.01 and XHTML 1.0) when possible.
- */
-
-class HTMLPurifier_HTMLModule_TransformToStrict extends HTMLPurifier_HTMLModule
-{
-
- var $name = 'TransformToStrict';
-
- // we're actually modifying these elements, not defining them
- var $elements = array('h1', 'h2', 'h3', 'h4', 'h5', 'h6', 'p',
- 'blockquote', 'table', 'td', 'th', 'tr', 'img', 'a', 'hr', 'br',
- 'caption', 'ul', 'ol', 'li');
-
- var $info_tag_transform = array(
- // placeholders, see constructor for definitions
- 'font' => false,
- 'menu' => false,
- 'dir' => false,
- 'center'=> false
- );
-
- var $attr_collections = array(
- 'Lang' => array(
- 'lang' => false // placeholder
- )
- );
-
- var $info_attr_transform_post = array(
- 'lang' => false // placeholder
- );
-
- function HTMLPurifier_HTMLModule_TransformToStrict() {
-
- // behavior with transformations when there's another CSS property
- // working on it is interesting: the CSS will *always* override
- // the deprecated attribute, whereas an inline CSS declaration will
- // override the corresponding declaration in, say, an external
- // stylesheet. This behavior won't affect most people, but it
- // does represent an operational difference we CANNOT fix.
-
- // deprecated tag transforms
- $this->info_tag_transform['font'] = new HTMLPurifier_TagTransform_Font();
- $this->info_tag_transform['menu'] = new HTMLPurifier_TagTransform_Simple('ul');
- $this->info_tag_transform['dir'] = new HTMLPurifier_TagTransform_Simple('ul');
- $this->info_tag_transform['center'] = new HTMLPurifier_TagTransform_Center();
-
- foreach ($this->elements as $name) {
- $this->info[$name] = new HTMLPurifier_ElementDef();
- $this->info[$name]->standalone = false;
- }
-
- // deprecated attribute transforms
-
- // align battery
- $align_lookup = array();
- $align_values = array('left', 'right', 'center', 'justify');
- foreach ($align_values as $v) $align_lookup[$v] = "text-align:$v;";
- $this->info['h1']->attr_transform_pre['align'] =
- $this->info['h2']->attr_transform_pre['align'] =
- $this->info['h3']->attr_transform_pre['align'] =
- $this->info['h4']->attr_transform_pre['align'] =
- $this->info['h5']->attr_transform_pre['align'] =
- $this->info['h6']->attr_transform_pre['align'] =
- $this->info['p'] ->attr_transform_pre['align'] =
- new HTMLPurifier_AttrTransform_EnumToCSS('align', $align_lookup);
-
- // xml:lang <=> lang mirroring, implement in TransformToStrict,
- // this is overridden in TransformToXHTML11
- $this->info_attr_transform_post['lang'] = new HTMLPurifier_AttrTransform_Lang();
- $this->attr_collections['Lang']['lang'] = new HTMLPurifier_AttrDef_Lang();
-
- // this should not be applied to XHTML 1.0 Transitional, ONLY
- // XHTML 1.0 Strict. We may need three classes
- $this->info['blockquote']->content_model_type = 'strictblockquote';
- $this->info['blockquote']->child = false; // recalculate please!
-
- $this->info['table']->attr_transform_pre['bgcolor'] =
- $this->info['tr']->attr_transform_pre['bgcolor'] =
- $this->info['td']->attr_transform_pre['bgcolor'] =
- $this->info['th']->attr_transform_pre['bgcolor'] = new HTMLPurifier_AttrTransform_BgColor();
-
- $this->info['img']->attr_transform_pre['border'] = new HTMLPurifier_AttrTransform_Border();
-
- $this->info['img']->attr_transform_pre['name'] =
- $this->info['a']->attr_transform_pre['name'] = new HTMLPurifier_AttrTransform_Name();
-
- $this->info['td']->attr_transform_pre['width'] =
- $this->info['th']->attr_transform_pre['width'] =
- $this->info['hr']->attr_transform_pre['width'] = new HTMLPurifier_AttrTransform_Length('width');
-
- $this->info['td']->attr_transform_pre['nowrap'] =
- $this->info['th']->attr_transform_pre['nowrap'] = new HTMLPurifier_AttrTransform_BoolToCSS('nowrap', 'white-space:nowrap;');
-
- $this->info['td']->attr_transform_pre['height'] =
- $this->info['th']->attr_transform_pre['height'] = new HTMLPurifier_AttrTransform_Length('height');
-
- $this->info['img']->attr_transform_pre['hspace'] = new HTMLPurifier_AttrTransform_ImgSpace('hspace');
- $this->info['img']->attr_transform_pre['vspace'] = new HTMLPurifier_AttrTransform_ImgSpace('vspace');
-
- $this->info['hr']->attr_transform_pre['size'] = new HTMLPurifier_AttrTransform_Length('size', 'height');
-
- // this transformation is not precise but often good enough.
- // different browsers use different styles to designate noshade
- $this->info['hr']->attr_transform_pre['noshade'] = new HTMLPurifier_AttrTransform_BoolToCSS('noshade', 'color:#808080;background-color:#808080;border: 0;');
-
- $this->info['br']->attr_transform_pre['clear'] =
- new HTMLPurifier_AttrTransform_EnumToCSS('clear', array(
- 'left' => 'clear:left;',
- 'right' => 'clear:right;',
- 'all' => 'clear:both;',
- 'none' => 'clear:none;',
- ));
-
- // this is a slightly unreasonable attribute
- $this->info['caption']->attr_transform_pre['align'] =
- new HTMLPurifier_AttrTransform_EnumToCSS('align', array(
- // we're following IE's behavior, not Firefox's, due
- // to the fact that no one supports caption-side:right,
- // W3C included (with CSS 2.1)
- 'left' => 'text-align:left;',
- 'right' => 'text-align:right;',
- 'top' => 'caption-side:top;',
- 'bottom' => 'caption-side:bottom;' // not supported by IE
- ));
-
- $this->info['table']->attr_transform_pre['align'] =
- new HTMLPurifier_AttrTransform_EnumToCSS('align', array(
- 'left' => 'float:left;',
- 'center' => 'margin-left:auto;margin-right:auto;',
- 'right' => 'float:right;'
- ));
-
- $this->info['img']->attr_transform_pre['align'] =
- new HTMLPurifier_AttrTransform_EnumToCSS('align', array(
- 'left' => 'float:left;',
- 'right' => 'float:right;',
- 'top' => 'vertical-align:top;',
- 'middle' => 'vertical-align:middle;',
- 'bottom' => 'vertical-align:baseline;',
- ));
-
- $this->info['hr']->attr_transform_pre['align'] =
- new HTMLPurifier_AttrTransform_EnumToCSS('align', array(
- 'left' => 'margin-left:0;margin-right:auto;text-align:left;',
- 'center' => 'margin-left:auto;margin-right:auto;text-align:center;',
- 'right' => 'margin-left:auto;margin-right:0;text-align:right;'
- ));
-
- $ul_types = array(
- 'disc' => 'list-style-type:disc;',
- 'square' => 'list-style-type:square;',
- 'circle' => 'list-style-type:circle;'
- );
- $ol_types = array(
- '1' => 'list-style-type:decimal;',
- 'i' => 'list-style-type:lower-roman;',
- 'I' => 'list-style-type:upper-roman;',
- 'a' => 'list-style-type:lower-alpha;',
- 'A' => 'list-style-type:upper-alpha;'
- );
- $li_types = $ul_types + $ol_types;
-
- $this->info['ul']->attr_transform_pre['type'] =
- new HTMLPurifier_AttrTransform_EnumToCSS('type', $ul_types);
- $this->info['ol']->attr_transform_pre['type'] =
- new HTMLPurifier_AttrTransform_EnumToCSS('type', $ol_types, true);
- $this->info['li']->attr_transform_pre['type'] =
- new HTMLPurifier_AttrTransform_EnumToCSS('type', $li_types, true);
-
-
- }
-
- var $defines_child_def = true;
- function getChildDef($def) {
- if ($def->content_model_type != 'strictblockquote') return false;
- return new HTMLPurifier_ChildDef_StrictBlockquote($def->content_model);
- }
-
-}
-
-?>
\ No newline at end of file
+++ /dev/null
-<?php
-
-require_once 'HTMLPurifier/AttrTransform/Lang.php';
-
-/**
- * Proprietary module that transforms XHTML 1.0 deprecated aspects into
- * XHTML 1.1 compliant ones, when possible. For maximum effectiveness,
- * HTMLPurifier_HTMLModule_TransformToStrict must also be loaded
- * (otherwise, elements that were deprecated from Transitional to Strict
- * will not be transformed).
- *
- * XHTML 1.1 compliant document are automatically XHTML 1.0 compliant too,
- * although they may not be as friendly to legacy browsers.
- */
-
-class HTMLPurifier_HTMLModule_TransformToXHTML11 extends HTMLPurifier_HTMLModule
-{
-
- var $name = 'TransformToXHTML11';
- var $attr_collections = array(
- 'Lang' => array(
- 'lang' => false // remove it
- )
- );
-
- var $info_attr_transform_post = array(
- 'lang' => false // remove it
- );
-
- function HTMLPurifier_HTMLModule_TransformToXHTML11() {
- $this->info_attr_transform_pre['lang'] = new HTMLPurifier_AttrTransform_Lang();
- }
-
-}
-
-?>
\ No newline at end of file
--- /dev/null
+<?php
+
+require_once 'HTMLPurifier/HTMLModule.php';
+
+class HTMLPurifier_HTMLModule_XMLCommonAttributes extends HTMLPurifier_HTMLModule
+{
+ var $name = 'XMLCommonAttributes';
+
+ var $attr_collections = array(
+ 'Lang' => array(
+ 'xml:lang' => 'LanguageCode',
+ )
+ );
+}
+
require_once 'HTMLPurifier/HTMLModule.php';
require_once 'HTMLPurifier/ElementDef.php';
+require_once 'HTMLPurifier/Doctype.php';
+require_once 'HTMLPurifier/DoctypeRegistry.php';
require_once 'HTMLPurifier/ContentSets.php';
require_once 'HTMLPurifier/AttrTypes.php';
require_once 'HTMLPurifier/HTMLModule/StyleAttribute.php';
require_once 'HTMLPurifier/HTMLModule/Legacy.php';
require_once 'HTMLPurifier/HTMLModule/Target.php';
+require_once 'HTMLPurifier/HTMLModule/Scripting.php';
+require_once 'HTMLPurifier/HTMLModule/XMLCommonAttributes.php';
+require_once 'HTMLPurifier/HTMLModule/NonXMLCommonAttributes.php';
+require_once 'HTMLPurifier/HTMLModule/Ruby.php';
-// proprietary modules
-require_once 'HTMLPurifier/HTMLModule/TransformToStrict.php';
-require_once 'HTMLPurifier/HTMLModule/TransformToXHTML11.php';
+// tidy modules
+require_once 'HTMLPurifier/HTMLModule/Tidy.php';
+require_once 'HTMLPurifier/HTMLModule/Tidy/XHTMLAndHTML4.php';
+require_once 'HTMLPurifier/HTMLModule/Tidy/XHTML.php';
+require_once 'HTMLPurifier/HTMLModule/Tidy/XHTMLStrict.php';
+require_once 'HTMLPurifier/HTMLModule/Tidy/Proprietary.php';
HTMLPurifier_ConfigSchema::define(
- 'HTML', 'Doctype', null, 'string/null',
- 'Doctype to use, valid values are HTML 4.01 Transitional, HTML 4.01 '.
- 'Strict, XHTML 1.0 Transitional, XHTML 1.0 Strict, XHTML 1.1. '.
+ 'HTML', 'Doctype', '', 'string',
+ 'Doctype to use during filtering. '.
'Technically speaking this is not actually a doctype (as it does '.
'not identify a corresponding DTD), but we are using this name '.
- 'for sake of simplicity. This will override any older directives '.
- 'like %Core.XHTML or %HTML.Strict.'
+ 'for sake of simplicity. When non-blank, this will override any older directives '.
+ 'like %HTML.XHTML or %HTML.Strict.'
);
+HTMLPurifier_ConfigSchema::defineAllowedValues('HTML', 'Doctype', array(
+ '', 'HTML 4.01 Transitional', 'HTML 4.01 Strict',
+ 'XHTML 1.0 Transitional', 'XHTML 1.0 Strict',
+ 'XHTML 1.1'
+));
+
+HTMLPurifier_ConfigSchema::define(
+ 'HTML', 'CustomDoctype', null, 'string/null',
+'
+A custom doctype for power-users who defined there own document
+type. This directive only applies when %HTML.Doctype is blank.
+This directive has been available since 2.0.1.
+'
+);
+
+HTMLPurifier_ConfigSchema::define(
+ 'HTML', 'Trusted', false, 'bool',
+ 'Indicates whether or not the user input is trusted or not. If the '.
+ 'input is trusted, a more expansive set of allowed tags and attributes '.
+ 'will be used. This directive has been available since 2.0.0.'
+);
+
+HTMLPurifier_ConfigSchema::define(
+ 'HTML', 'AllowedModules', null, 'lookup/null', '
+<p>
+ A doctype comes with a set of usual modules to use. Without having
+ to mucking about with the doctypes, you can quickly activate or
+ disable these modules by specifying which modules you wish to allow
+ with this directive. This is most useful for unit testing specific
+ modules, although end users may find it useful for their own ends.
+</p>
+<p>
+ If you specify a module that does not exist, the manager will silently
+ fail to use it, so be careful! User-defined modules are not affected
+ by this directive. Modules defined in %HTML.CoreModules are not
+ affected by this directive. This directive has been available since 2.0.0.
+</p>
+');
+
+HTMLPurifier_ConfigSchema::define(
+ 'HTML', 'CoreModules', array(
+ 'Structure' => true,
+ 'Text' => true,
+ 'Hypertext' => true,
+ 'List' => true,
+ 'NonXMLCommonAttributes' => true,
+ 'XMLCommonAttributes' => true,
+ 'CommonAttributes' => true
+ ), 'lookup', '
+<p>
+ Certain modularized doctypes (XHTML, namely), have certain modules
+ that must be included for the doctype to be an conforming document
+ type: put those modules here. By default, XHTML\'s core modules
+ are used. You can set this to a blank array to disable core module
+ protection, but this is not recommended. This directive has been
+ available since 2.0.0.
+</p>
+');
class HTMLPurifier_HTMLModuleManager
{
/**
- * Array of HTMLPurifier_Module instances, indexed by module's class name.
- * All known modules, regardless of use, are in this array.
+ * Instance of HTMLPurifier_DoctypeRegistry
+ * @public
*/
- var $modules = array();
+ var $doctypes;
/**
- * String doctype we will validate against. See $validModules for use.
- *
- * @note
- * There is a special doctype '*' that acts both as the "default"
- * doctype if a customized system only defines one doctype and
- * also a catch-all doctype that gets merged into all the other
- * module collections. When possible, use a private collection to
- * share modules between doctypes: this special doctype is to
- * make life more convenient for users.
+ * Instance of current doctype
+ * @public
*/
var $doctype;
- var $doctypeAliases = array(); /**< Lookup array of strings to real doctypes */
/**
- * Associative array: $collections[$type][$doctype] = list of modules.
- * This is used to logically separate types of functionality so that
- * based on the doctype and other configuration settings they may
- * be easily switched and on and off. Custom setups may not need
- * to use this abstraction, opting to have only one big collection
- * with one valid doctype.
+ * Instance of HTMLPurifier_AttrTypes
+ * @public
*/
- var $collections = array();
+ var $attrTypes;
/**
- * Modules that may be used in a valid doctype of this kind.
- * Correctional and leniency modules should not be placed in this
- * array unless the user said so: don't stuff every possible lenient
- * module for this doctype in here.
+ * Active instances of modules for the specified doctype are
+ * indexed, by name, in this array.
*/
- var $validModules = array();
- var $validCollections = array(); /**< Collections to merge into $validModules */
+ var $modules = array();
/**
- * Modules that we will allow in input, subset of $validModules. Single
- * element definitions may result in us consulting validModules.
+ * Array of recognized HTMLPurifier_Module instances, indexed by
+ * module's class name. This array is usually lazy loaded, but a
+ * user can overload a module by pre-emptively registering it.
*/
- var $activeModules = array();
- var $activeCollections = array(); /**< Collections to merge into $activeModules */
-
- var $counter = 0; /**< Designates next available integer order for modules. */
- var $initialized = false; /**< Says whether initialize() was called */
+ var $registeredModules = array();
/**
- * Specifies what doctype to siphon new modules from addModule() to,
- * or false to disable the functionality. Must be used in conjunction
- * with $autoCollection.
+ * List of extra modules that were added by the user using addModule().
+ * These get unconditionally merged into the current doctype, whatever
+ * it may be.
*/
- var $autoDoctype = false;
+ var $userModules = array();
+
/**
- * Specifies what collection to siphon new modules from addModule() to,
- * or false to disable the functionality. Must be used in conjunction
- * with $autoCollection.
+ * Associative array of element name to list of modules that have
+ * definitions for the element; this array is dynamically filled.
*/
- var $autoCollection = false;
-
- /** Associative array of element name to defining modules (always array) */
var $elementLookup = array();
- /** List of prefixes we should use for resolving small names */
+ /** List of prefixes we should use for registering small names */
var $prefixes = array('HTMLPurifier_HTMLModule_');
- var $contentSets; /**< Instance of HTMLPurifier_ContentSets */
- var $attrTypes; /**< Instance of HTMLPurifier_AttrTypes */
+ var $contentSets; /**< Instance of HTMLPurifier_ContentSets */
var $attrCollections; /**< Instance of HTMLPurifier_AttrCollections */
- /**
- * @param $blank If true, don't do any initializing
- */
- function HTMLPurifier_HTMLModuleManager($blank = false) {
+ /** If set to true, unsafe elements and attributes will be allowed */
+ var $trusted = false;
+
+ function HTMLPurifier_HTMLModuleManager() {
- // the only editable internal object. The rest need to
- // be manipulated through modules
+ // editable internal objects
$this->attrTypes = new HTMLPurifier_AttrTypes();
+ $this->doctypes = new HTMLPurifier_DoctypeRegistry();
- if (!$blank) $this->initialize();
+ // setup default HTML doctypes
- }
-
- function initialize() {
- $this->initialized = true;
-
- // load default modules to the recognized modules list (not active)
- $modules = array(
- // define
- 'CommonAttributes',
- 'Text', 'Hypertext', 'List', 'Presentation',
- 'Edit', 'Bdo', 'Tables', 'Image', 'StyleAttribute',
- 'Target',
- // define-redefine
- 'Legacy',
- // redefine
- 'TransformToStrict', 'TransformToXHTML11'
+ // module reuse
+ $common = array(
+ 'CommonAttributes', 'Text', 'Hypertext', 'List',
+ 'Presentation', 'Edit', 'Bdo', 'Tables', 'Image',
+ 'StyleAttribute', 'Scripting'
);
- foreach ($modules as $module) {
- $this->addModule($module);
- }
-
- // Safe modules for supported doctypes. These are included
- // in the valid and active module lists by default
- $this->collections['Safe'] = array(
- '_Common' => array( // leading _ indicates private
- 'CommonAttributes', 'Text', 'Hypertext', 'List',
- 'Presentation', 'Edit', 'Bdo', 'Tables', 'Image',
- 'StyleAttribute'
- ),
- // HTML definitions, defer to XHTML definitions
- 'HTML 4.01 Transitional' => array(array('XHTML 1.0 Transitional')),
- 'HTML 4.01 Strict' => array(array('XHTML 1.0 Strict')),
- // XHTML definitions
- 'XHTML 1.0 Transitional' => array( array('XHTML 1.0 Strict'), 'Legacy', 'Target' ),
- 'XHTML 1.0 Strict' => array(array('_Common')),
- 'XHTML 1.1' => array(array('_Common')),
+ $transitional = array('Legacy', 'Target');
+ $xml = array('XMLCommonAttributes');
+ $non_xml = array('NonXMLCommonAttributes');
+
+ $this->doctypes->register(
+ 'HTML 4.01 Transitional', false,
+ array_merge($common, $transitional, $non_xml),
+ array('Tidy_Transitional', 'Tidy_Proprietary'),
+ array(),
+ '-//W3C//DTD HTML 4.01 Transitional//EN',
+ 'http://www.w3.org/TR/html4/loose.dtd'
);
- // Modules that specify elements that are unsafe from untrusted
- // third-parties. These should be registered in $validModules but
- // almost never $activeModules unless you really know what you're
- // doing.
- $this->collections['Unsafe'] = array();
-
- // Modules to import if lenient mode (attempt to convert everything
- // to a valid representation) is on. These must not be in $validModules
- // unless specified so.
- $this->collections['Lenient'] = array(
- 'HTML 4.01 Strict' => array(array('XHTML 1.0 Strict')),
- 'XHTML 1.0 Strict' => array('TransformToStrict'),
- 'XHTML 1.1' => array(array('XHTML 1.0 Strict'), 'TransformToXHTML11')
+ $this->doctypes->register(
+ 'HTML 4.01 Strict', false,
+ array_merge($common, $non_xml),
+ array('Tidy_Strict', 'Tidy_Proprietary'),
+ array(),
+ '-//W3C//DTD HTML 4.01//EN',
+ 'http://www.w3.org/TR/html4/strict.dtd'
);
- // Modules to import if correctional mode (correct everything that
- // is feasible to strict mode) is on. These must not be in $validModules
- // unless specified so.
- $this->collections['Correctional'] = array(
- 'HTML 4.01 Transitional' => array(array('XHTML 1.0 Transitional')),
- 'XHTML 1.0 Transitional' => array('TransformToStrict'), // probably want a different one
+ $this->doctypes->register(
+ 'XHTML 1.0 Transitional', true,
+ array_merge($common, $transitional, $xml, $non_xml),
+ array('Tidy_Transitional', 'Tidy_XHTML', 'Tidy_Proprietary'),
+ array(),
+ '-//W3C//DTD XHTML 1.0 Transitional//EN',
+ 'http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd'
);
- // User-space modules, custom code or whatever
- $this->collections['Extension'] = array();
-
- // setup active versus valid modules. ORDER IS IMPORTANT!
- // definition modules
- $this->makeCollectionActive('Safe');
- $this->makeCollectionValid('Unsafe');
- // redefinition modules
- $this->makeCollectionActive('Lenient');
- $this->makeCollectionActive('Correctional');
+ $this->doctypes->register(
+ 'XHTML 1.0 Strict', true,
+ array_merge($common, $xml, $non_xml),
+ array('Tidy_Strict', 'Tidy_XHTML', 'Tidy_XHTMLStrict', 'Tidy_Proprietary'),
+ array(),
+ '-//W3C//DTD XHTML 1.0 Strict//EN',
+ 'http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd'
+ );
- $this->autoDoctype = '*';
- $this->autoCollection = 'Extension';
+ $this->doctypes->register(
+ 'XHTML 1.1', true,
+ array_merge($common, $xml, array('Ruby')),
+ array('Tidy_Strict', 'Tidy_XHTML', 'Tidy_Proprietary', 'Tidy_XHTMLStrict'), // Tidy_XHTML1_1
+ array(),
+ '-//W3C//DTD XHTML 1.1//EN',
+ 'http://www.w3.org/TR/xhtml11/DTD/xhtml11.dtd'
+ );
}
/**
- * Adds a module to the recognized module list. This does not
- * do anything else: the module must be added to a corresponding
- * collection to be "activated".
+ * Registers a module to the recognized module list, useful for
+ * overloading pre-existing modules.
* @param $module Mixed: string module name, with or without
* HTMLPurifier_HTMLModule prefix, or instance of
* subclass of HTMLPurifier_HTMLModule.
* - Check for literal object name
* - Throw fatal error
* If your object name collides with an internal class, specify
- * your module manually.
+ * your module manually. All modules must have been included
+ * externally: registerModule will not perform inclusions for you!
+ * @warning If your module has the same name as an already loaded
+ * module, your module will overload the old one WITHOUT
+ * warning.
*/
- function addModule($module) {
+ function registerModule($module) {
if (is_string($module)) {
+ // attempt to load the module
$original_module = $module;
$ok = false;
foreach ($this->prefixes as $prefix) {
}
$module = new $module();
}
- $module->order = $this->counter++; // assign then increment
- $this->modules[$module->name] = $module;
- if ($this->autoDoctype !== false && $this->autoCollection !== false) {
- $this->collections[$this->autoCollection][$this->autoDoctype][] = $module->name;
+ if (empty($module->name)) {
+ trigger_error('Module instance of ' . get_class($module) . ' must have name');
+ return;
}
+ $this->registeredModules[$module->name] = $module;
}
/**
* Safely tests for class existence without invoking __autoload in PHP5
+ * or greater.
* @param $name String class name to test
+ * @note If any other class needs it, we'll need to stash in a
+ * conjectured "compatibility" class
* @private
*/
function _classExists($name) {
}
/**
- * Makes a collection active, while also making it valid if not
- * already done so. See $activeModules for the semantics of "active".
- * @param $collection_name Name of collection to activate
- */
- function makeCollectionActive($collection_name) {
- if (!in_array($collection_name, $this->validCollections)) {
- $this->makeCollectionValid($collection_name);
- }
- $this->activeCollections[] = $collection_name;
- }
-
- /**
- * Makes a collection valid. See $validModules for the semantics of "valid"
+ * Adds a module to the current doctype by first registering it,
+ * and then tacking it on to the active doctype
*/
- function makeCollectionValid($collection_name) {
- $this->validCollections[] = $collection_name;
+ function addModule($module) {
+ $this->registerModule($module);
+ if (is_object($module)) $module = $module->name;
+ $this->userModules[] = $module;
}
/**
- * Adds a class prefix that addModule() will use to resolve a
+ * Adds a class prefix that registerModule() will use to resolve a
* string name to a concrete class
*/
function addPrefix($prefix) {
- $this->prefixes[] = (string) $prefix;
+ $this->prefixes[] = $prefix;
}
+ /**
+ * Performs processing on modules, after being called you may
+ * use getElement() and getElements()
+ * @param $config Instance of HTMLPurifier_Config
+ */
function setup($config) {
- // load up the autocollection
- if ($this->autoCollection !== false) {
- $this->makeCollectionActive($this->autoCollection);
- }
+ $this->trusted = $config->get('HTML', 'Trusted');
+
+ // generate
+ $this->doctype = $this->doctypes->make($config);
+ $modules = $this->doctype->modules;
- // retrieve the doctype
- $this->doctype = $this->getDoctype($config);
- if (isset($this->doctypeAliases[$this->doctype])) {
- $this->doctype = $this->doctypeAliases[$this->doctype];
+ // take out the default modules that aren't allowed
+ $lookup = $config->get('HTML', 'AllowedModules');
+ $special_cases = $config->get('HTML', 'CoreModules');
+
+ if (is_array($lookup)) {
+ foreach ($modules as $k => $m) {
+ if (isset($special_cases[$m])) continue;
+ if (!isset($lookup[$m])) unset($modules[$k]);
+ }
}
- // process module collections to module name => module instance form
- foreach ($this->collections as $col_i => $x) {
- $this->processCollections($this->collections[$col_i]);
+ // merge in custom modules
+ $modules = array_merge($modules, $this->userModules);
+
+ foreach ($modules as $module) {
+ $this->processModule($module);
}
- $this->validModules = $this->assembleModules($this->validCollections);
- $this->activeModules = $this->assembleModules($this->activeCollections);
+ foreach ($this->doctype->tidyModules as $module) {
+ $this->processModule($module);
+ if (method_exists($this->modules[$module], 'construct')) {
+ $this->modules[$module]->construct($config);
+ }
+ }
// setup lookup table based on all valid modules
- foreach ($this->validModules as $module) {
+ foreach ($this->modules as $module) {
foreach ($module->info as $name => $def) {
if (!isset($this->elementLookup[$name])) {
$this->elementLookup[$name] = array();
// note the different choice
$this->contentSets = new HTMLPurifier_ContentSets(
- // content models that contain non-allowed elements are
- // harmless because RemoveForeignElements will ensure
- // they never get in anyway, and there is usually no
- // reason why you should want to restrict a content
- // model beyond what is mandated by the doctype.
- // Note, however, that this means redefinitions of
- // content models can't be tossed in validModels willy-nilly:
- // that stuff still is regulated by configuration.
- $this->validModules
+ // content set assembly deals with all possible modules,
+ // not just ones deemed to be "safe"
+ $this->modules
);
$this->attrCollections = new HTMLPurifier_AttrCollections(
$this->attrTypes,
- // only explicitly allowed modules are allowed to affect
- // the global attribute collections. This mean's there's
- // a distinction between loading the Bdo module, and the
- // bdo element: Bdo will enable the dir attribute on all
- // elements, while bdo will only define the bdo element,
- // which will not have an editable directionality. This might
- // catch people who are loading only elements by surprise, so
- // we should consider loading an entire module if all the
- // elements it defines are requested by the user, especially
- // if it affects the global attribute collections.
- $this->activeModules
+ // there is no way to directly disable a global attribute,
+ // but using AllowedAttributes or simply not including
+ // the module in your custom doctype should be sufficient
+ $this->modules
);
-
}
/**
- * Takes a list of collections and merges together all the defined
- * modules for the current doctype from those collections.
- * @param $collections List of collection suffixes we should grab
- * modules from (like 'Safe' or 'Lenient')
+ * Takes a module and adds it to the active module collection,
+ * registering it if necessary.
*/
- function assembleModules($collections) {
- $modules = array();
- $numOfCollectionsUsed = 0;
- foreach ($collections as $name) {
- $disable_global = false;
- if (!isset($this->collections[$name])) {
- trigger_error("$name collection is undefined", E_USER_ERROR);
- continue;
- }
- $cols = $this->collections[$name];
- if (isset($cols[$this->doctype])) {
- if (isset($cols[$this->doctype]['*'])) {
- unset($cols[$this->doctype]['*']);
- $disable_global = true;
- }
- $modules += $cols[$this->doctype];
- $numOfCollectionsUsed++;
- }
- // accept catch-all doctype
- if (
- $this->doctype !== '*' &&
- isset($cols['*']) &&
- !$disable_global
- ) {
- $modules += $cols['*'];
- }
+ function processModule($module) {
+ if (!isset($this->registeredModules[$module]) || is_object($module)) {
+ $this->registerModule($module);
}
-
- if ($numOfCollectionsUsed < 1) {
- // possible XSS injection if user-specified doctypes
- // are allowed
- trigger_error("Doctype {$this->doctype} does not exist, ".
- "check for typos (if you desire a doctype that allows ".
- "no elements, use an empty array collection)", E_USER_ERROR);
- }
- return $modules;
+ $this->modules[$module] = $this->registeredModules[$module];
}
/**
- * Takes a collection and performs inclusions and substitutions for it.
- * @param $cols Reference to collections class member variable
+ * Retrieves merged element definitions.
+ * @return Array of HTMLPurifier_ElementDef
*/
- function processCollections(&$cols) {
-
- // $cols is the set of collections
- // $col_i is the name (index) of a collection
- // $col is a collection/list of modules
-
- // perform inclusions
- foreach ($cols as $col_i => $col) {
- $seen = array();
- if (!empty($col[0]) && is_array($col[0])) {
- $seen[$col_i] = true; // recursion reporting
- $includes = $col[0];
- unset($cols[$col_i][0]); // remove inclusions value, recursion guard
- } else {
- $includes = array();
- }
- if (empty($includes)) continue;
- for ($i = 0; isset($includes[$i]); $i++) {
- $inc = $includes[$i];
- if (isset($seen[$inc])) {
- trigger_error(
- "Circular inclusion detected in $col_i collection",
- E_USER_ERROR
- );
- continue;
- } else {
- $seen[$inc] = true;
- }
- if (!isset($cols[$inc])) {
- trigger_error(
- "Collection $col_i tried to include undefined ".
- "collection $inc", E_USER_ERROR);
- continue;
- }
- foreach ($cols[$inc] as $module) {
- if (is_array($module)) { // another inclusion!
- foreach ($module as $inc2) $includes[] = $inc2;
- continue;
- }
- $cols[$col_i][] = $module; // merge in the other modules
- }
- }
- }
-
- // replace with real modules, invert module from list to
- // assoc array of module name to module instance
- foreach ($cols as $col_i => $col) {
- $ignore_global = false;
- $order = array();
- foreach ($col as $module_i => $module) {
- unset($cols[$col_i][$module_i]);
- if (is_array($module)) {
- trigger_error("Illegal inclusion array at index".
- " $module_i found collection $col_i, inclusion".
- " arrays must be at start of collection (index 0)",
- E_USER_ERROR);
- continue;
- }
- if ($module_i === '*' && $module === false) {
- $ignore_global = true;
- continue;
- }
- if (!isset($this->modules[$module])) {
- trigger_error(
- "Collection $col_i references undefined ".
- "module $module",
- E_USER_ERROR
- );
- continue;
- }
- $module = $this->modules[$module];
- $cols[$col_i][$module->name] = $module;
- $order[$module->name] = $module->order;
- }
- array_multisort(
- $order, SORT_ASC, SORT_NUMERIC, $cols[$col_i]
- );
- if ($ignore_global) $cols[$col_i]['*'] = false;
- }
-
- // delete pseudo-collections
- foreach ($cols as $col_i => $col) {
- if ($col_i[0] == '_') unset($cols[$col_i]);
- }
-
- }
-
- /**
- * Retrieves the doctype from the configuration object
- */
- function getDoctype($config) {
- $doctype = $config->get('HTML', 'Doctype');
- if ($doctype !== null) {
- return $doctype;
- }
- if (!$this->initialized) {
- // don't do HTML-oriented backwards compatibility stuff
- // use either the auto-doctype, or the catch-all doctype
- return $this->autoDoctype ? $this->autoDoctype : '*';
- }
- // this is backwards-compatibility stuff
- if ($config->get('Core', 'XHTML')) {
- $doctype = 'XHTML 1.0';
- } else {
- $doctype = 'HTML 4.01';
- }
- if ($config->get('HTML', 'Strict')) {
- $doctype .= ' Strict';
- } else {
- $doctype .= ' Transitional';
- }
- return $doctype;
- }
-
- /**
- * Retrieves merged element definitions for all active elements.
- * @note We may want to generate an elements array during setup
- * and pass that on, because a specific combination of
- * elements may trigger the loading of a module.
- * @param $config Instance of HTMLPurifier_Config, for determining
- * stray elements.
- */
- function getElements($config) {
+ function getElements() {
$elements = array();
- foreach ($this->activeModules as $module) {
+ foreach ($this->modules as $module) {
foreach ($module->info as $name => $v) {
if (isset($elements[$name])) continue;
- $elements[$name] = $this->getElement($name, $config);
+ // if element is not safe, don't use it
+ if (!$this->trusted && ($v->safe === false)) continue;
+ $elements[$name] = $this->getElement($name);
}
}
- // standalone elements now loaded
+ // remove dud elements, this happens when an element that
+ // appeared to be safe actually wasn't
+ foreach ($elements as $n => $v) {
+ if ($v === false) unset($elements[$n]);
+ }
return $elements;
/**
* Retrieves a single merged element definition
* @param $name Name of element
- * @param $config Instance of HTMLPurifier_Config, may not be necessary.
+ * @param $trusted Boolean trusted overriding parameter: set to true
+ * if you want the full version of an element
+ * @return Merged HTMLPurifier_ElementDef
*/
- function getElement($name, $config) {
+ function getElement($name, $trusted = null) {
$def = false;
+ if ($trusted === null) $trusted = $this->trusted;
- $modules = $this->validModules;
+ $modules = $this->modules;
if (!isset($this->elementLookup[$name])) {
return false;
foreach($this->elementLookup[$name] as $module_name) {
$module = $modules[$module_name];
- $new_def = $module->info[$name];
+
+ // copy is used because, ideally speaking, the original
+ // definition should not be modified. Usually, this will
+ // make no difference, but for consistency's sake
+ $new_def = $module->info[$name]->copy();
+
+ // refuse to create/merge in a definition that is deemed unsafe
+ if (!$trusted && ($new_def->safe === false)) {
+ $def = false;
+ continue;
+ }
if (!$def && $new_def->standalone) {
+ // element with unknown safety is not to be trusted.
+ // however, a merge-in definition with undefined safety
+ // is fine
+ if (!$trusted && !$new_def->safe) continue;
$def = $new_def;
} elseif ($def) {
$def->mergeIn($new_def);
$this->contentSets->generateChildDef($def, $module);
}
+
+ // add information on required attributes
+ foreach ($def->attr as $attr_name => $attr_def) {
+ if ($attr_def->required) {
+ $def->required_attr[] = $attr_name;
+ }
+ }
return $def;
}
-?>
+
}
-?>
\ No newline at end of file
--- /dev/null
+<?php
+
+/**
+ * Injects tokens into the document while parsing for well-formedness.
+ * This enables "formatter-like" functionality such as auto-paragraphing,
+ * smiley-ification and linkification to take place.
+ */
+class HTMLPurifier_Injector
+{
+
+ /**
+ * Advisory name of injector, this is for friendly error messages
+ */
+ var $name;
+
+ /**
+ * Amount of tokens the injector needs to skip + 1. Because
+ * the decrement is the first thing that happens, this needs to
+ * be one greater than the "real" skip count.
+ */
+ var $skip = 1;
+
+ /**
+ * Instance of HTMLPurifier_HTMLDefinition
+ */
+ var $htmlDefinition;
+
+ /**
+ * Reference to CurrentNesting variable in Context. This is an array
+ * list of tokens that we are currently "inside"
+ */
+ var $currentNesting;
+
+ /**
+ * Reference to InputTokens variable in Context. This is an array
+ * list of the input tokens that are being processed.
+ */
+ var $inputTokens;
+
+ /**
+ * Reference to InputIndex variable in Context. This is an integer
+ * array index for $this->inputTokens that indicates what token
+ * is currently being processed.
+ */
+ var $inputIndex;
+
+ /**
+ * Array of elements and attributes this injector creates and therefore
+ * need to be allowed by the definition. Takes form of
+ * array('element' => array('attr', 'attr2'), 'element2')
+ */
+ var $needed = array();
+
+ /**
+ * Prepares the injector by giving it the config and context objects:
+ * this allows references to important variables to be made within
+ * the injector. This function also checks if the HTML environment
+ * will work with the Injector: if p tags are not allowed, the
+ * Auto-Paragraphing injector should not be enabled.
+ * @param $config Instance of HTMLPurifier_Config
+ * @param $context Instance of HTMLPurifier_Context
+ * @return Boolean false if success, string of missing needed element/attribute if failure
+ */
+ function prepare($config, &$context) {
+ $this->htmlDefinition = $config->getHTMLDefinition();
+ // perform $needed checks
+ foreach ($this->needed as $element => $attributes) {
+ if (is_int($element)) $element = $attributes;
+ if (!isset($this->htmlDefinition->info[$element])) return $element;
+ if (!is_array($attributes)) continue;
+ foreach ($attributes as $name) {
+ if (!isset($this->htmlDefinition->info[$element]->attr[$name])) return "$element.$name";
+ }
+ }
+ $this->currentNesting =& $context->get('CurrentNesting');
+ $this->inputTokens =& $context->get('InputTokens');
+ $this->inputIndex =& $context->get('InputIndex');
+ return false;
+ }
+
+ /**
+ * Tests if the context node allows a certain element
+ * @param $name Name of element to test for
+ * @return True if element is allowed, false if it is not
+ */
+ function allowsElement($name) {
+ if (!empty($this->currentNesting)) {
+ $parent_token = array_pop($this->currentNesting);
+ $this->currentNesting[] = $parent_token;
+ $parent = $this->htmlDefinition->info[$parent_token->name];
+ } else {
+ $parent = $this->htmlDefinition->info_parent_def;
+ }
+ if (!isset($parent->child->elements[$name]) || isset($parent->excludes[$name])) {
+ return false;
+ }
+ return true;
+ }
+
+ /**
+ * Handler that is called when a text token is processed
+ */
+ function handleText(&$token) {}
+
+ /**
+ * Handler that is called when a start or empty token is processed
+ */
+ function handleElement(&$token) {}
+
+}
+
--- /dev/null
+<?php
+
+require_once 'HTMLPurifier/Injector.php';
+
+HTMLPurifier_ConfigSchema::define(
+ 'AutoFormat', 'AutoParagraph', false, 'bool', '
+<p>
+ This directive turns on auto-paragraphing, where double newlines are
+ converted in to paragraphs whenever possible. Auto-paragraphing
+ applies when:
+</p>
+<ul>
+ <li>There are inline elements or text in the root node</li>
+ <li>There are inline elements or text with double newlines or
+ block elements in nodes that allow paragraph tags</li>
+ <li>There are double newlines in paragraph tags</li>
+</ul>
+<p>
+ <code>p</code> tags must be allowed for this directive to take effect.
+ We do not use <code>br</code> tags for paragraphing, as that is
+ semantically incorrect.
+</p>
+<p>
+ This directive has been available since 2.0.1.
+</p>
+');
+
+/**
+ * Injector that auto paragraphs text in the root node based on
+ * double-spacing.
+ */
+class HTMLPurifier_Injector_AutoParagraph extends HTMLPurifier_Injector
+{
+
+ var $name = 'AutoParagraph';
+ var $needed = array('p');
+
+ function _pStart() {
+ $par = new HTMLPurifier_Token_Start('p');
+ $par->armor['MakeWellFormed_TagClosedError'] = true;
+ return $par;
+ }
+
+ function handleText(&$token) {
+ $text = $token->data;
+ if (empty($this->currentNesting)) {
+ if (!$this->allowsElement('p')) return;
+ // case 1: we're in root node (and it allows paragraphs)
+ $token = array($this->_pStart());
+ $this->_splitText($text, $token);
+ } elseif ($this->currentNesting[count($this->currentNesting)-1]->name == 'p') {
+ // case 2: we're in a paragraph
+ $token = array();
+ $this->_splitText($text, $token);
+ } elseif ($this->allowsElement('p')) {
+ // case 3: we're in an element that allows paragraphs
+ if (strpos($text, "\n\n") !== false) {
+ // case 3.1: this text node has a double-newline
+ $token = array($this->_pStart());
+ $this->_splitText($text, $token);
+ } else {
+ $ok = false;
+ // test if up-coming tokens are either block or have
+ // a double newline in them
+ for ($i = $this->inputIndex + 1; isset($this->inputTokens[$i]); $i++) {
+ if ($this->inputTokens[$i]->type == 'start'){
+ if (!$this->_isInline($this->inputTokens[$i])) {
+ $ok = true;
+ }
+ break;
+ }
+ if ($this->inputTokens[$i]->type == 'end') break;
+ if ($this->inputTokens[$i]->type == 'text') {
+ if (strpos($this->inputTokens[$i]->data, "\n\n") !== false) {
+ $ok = true;
+ }
+ if (!$this->inputTokens[$i]->is_whitespace) break;
+ }
+ }
+ if ($ok) {
+ // case 3.2: this text node is next to another node
+ // that will start a paragraph
+ $token = array($this->_pStart(), $token);
+ }
+ }
+ }
+
+ }
+
+ function handleElement(&$token) {
+ // check if we're inside a tag already
+ if (!empty($this->currentNesting)) {
+ if ($this->allowsElement('p')) {
+ // special case: we're in an element that allows paragraphs
+
+ // this token is already paragraph, abort
+ if ($token->name == 'p') return;
+
+ // this token is a block level, abort
+ if (!$this->_isInline($token)) return;
+
+ // check if this token is adjacent to the parent token
+ $prev = $this->inputTokens[$this->inputIndex - 1];
+ if ($prev->type != 'start') {
+ // not adjacent, we can abort early
+ // add lead paragraph tag if our token is inline
+ // and the previous tag was an end paragraph
+ if (
+ $prev->name == 'p' && $prev->type == 'end' &&
+ $this->_isInline($token)
+ ) {
+ $token = array($this->_pStart(), $token);
+ }
+ return;
+ }
+
+ // this token is the first child of the element that allows
+ // paragraph. We have to peek ahead and see whether or not
+ // there is anything inside that suggests that a paragraph
+ // will be needed
+ $ok = false;
+ // maintain a mini-nesting counter, this lets us bail out
+ // early if possible
+ $j = 1; // current nesting, one is due to parent (we recalculate current token)
+ for ($i = $this->inputIndex; isset($this->inputTokens[$i]); $i++) {
+ if ($this->inputTokens[$i]->type == 'start') $j++;
+ if ($this->inputTokens[$i]->type == 'end') $j--;
+ if ($this->inputTokens[$i]->type == 'text') {
+ if (strpos($this->inputTokens[$i]->data, "\n\n") !== false) {
+ $ok = true;
+ break;
+ }
+ }
+ if ($j <= 0) break;
+ }
+ if ($ok) {
+ $token = array($this->_pStart(), $token);
+ }
+ }
+ return;
+ }
+
+ // check if the start tag counts as a "block" element
+ if (!$this->_isInline($token)) return;
+
+ // append a paragraph tag before the token
+ $token = array($this->_pStart(), $token);
+ }
+
+ /**
+ * Splits up a text in paragraph tokens and appends them
+ * to the result stream that will replace the original
+ * @param $data String text data that will be processed
+ * into paragraphs
+ * @param $result Reference to array of tokens that the
+ * tags will be appended onto
+ * @param $config Instance of HTMLPurifier_Config
+ * @param $context Instance of HTMLPurifier_Context
+ * @private
+ */
+ function _splitText($data, &$result) {
+ $raw_paragraphs = explode("\n\n", $data);
+
+ // remove empty paragraphs
+ $paragraphs = array();
+ $needs_start = false;
+ $needs_end = false;
+
+ $c = count($raw_paragraphs);
+ if ($c == 1) {
+ // there were no double-newlines, abort quickly
+ $result[] = new HTMLPurifier_Token_Text($data);
+ return;
+ }
+
+ for ($i = 0; $i < $c; $i++) {
+ $par = $raw_paragraphs[$i];
+ if (trim($par) !== '') {
+ $paragraphs[] = $par;
+ continue;
+ }
+ if ($i == 0 && empty($result)) {
+ // The empty result indicates that the AutoParagraph
+ // injector did not add any start paragraph tokens.
+ // The fact that the first paragraph is empty indicates
+ // that there was a double-newline at the start of the
+ // data.
+ // Combined together, this means that we are in a paragraph,
+ // and the newline means we should start a new one.
+ $result[] = new HTMLPurifier_Token_End('p');
+ // However, the start token should only be added if
+ // there is more processing to be done (i.e. there are
+ // real paragraphs in here). If there are none, the
+ // next start paragraph tag will be handled by the
+ // next run-around the injector
+ $needs_start = true;
+ } elseif ($i + 1 == $c) {
+ // a double-paragraph at the end indicates that
+ // there is an overriding need to start a new paragraph
+ // for the next section. This has no effect until
+ // we've processed all of the other paragraphs though
+ $needs_end = true;
+ }
+ }
+
+ // check if there are no "real" paragraphs to be processed
+ if (empty($paragraphs)) {
+ return;
+ }
+
+ // add a start tag if an end tag was added while processing
+ // the raw paragraphs (that happens if there's a leading double
+ // newline)
+ if ($needs_start) $result[] = $this->_pStart();
+
+ // append the paragraphs onto the result
+ foreach ($paragraphs as $par) {
+ $result[] = new HTMLPurifier_Token_Text($par);
+ $result[] = new HTMLPurifier_Token_End('p');
+ $result[] = $this->_pStart();
+ }
+
+ // remove trailing start token, if one is needed, it will
+ // be handled the next time this injector is called
+ array_pop($result);
+
+ // check the outside to determine whether or not the
+ // end paragraph tag should be removed. It should be removed
+ // unless the next non-whitespace token is a paragraph
+ // or a block element.
+ $remove_paragraph_end = true;
+
+ if (!$needs_end) {
+ // Start of the checks one after the current token's index
+ for ($i = $this->inputIndex + 1; isset($this->inputTokens[$i]); $i++) {
+ if ($this->inputTokens[$i]->type == 'start' || $this->inputTokens[$i]->type == 'empty') {
+ $remove_paragraph_end = $this->_isInline($this->inputTokens[$i]);
+ }
+ // check if we can abort early (whitespace means we carry-on!)
+ if ($this->inputTokens[$i]->type == 'text' && !$this->inputTokens[$i]->is_whitespace) break;
+ // end tags will automatically be handled by MakeWellFormed,
+ // so we don't have to worry about them
+ if ($this->inputTokens[$i]->type == 'end') break;
+ }
+ } else {
+ $remove_paragraph_end = false;
+ }
+
+ // check the outside to determine whether or not the
+ // end paragraph tag should be removed
+ if ($remove_paragraph_end) {
+ array_pop($result);
+ }
+
+ }
+
+ /**
+ * Returns true if passed token is inline (and, ergo, allowed in
+ * paragraph tags)
+ * @private
+ */
+ function _isInline($token) {
+ return isset($this->htmlDefinition->info['p']->child->elements[$token->name]);
+ }
+
+}
+
--- /dev/null
+<?php
+
+require_once 'HTMLPurifier/Injector.php';
+
+HTMLPurifier_ConfigSchema::define(
+ 'AutoFormat', 'Linkify', false, 'bool', '
+<p>
+ This directive turns on linkification, auto-linking http, ftp and
+ https URLs. <code>a</code> tags with the <code>href</code> attribute
+ must be allowed. This directive has been available since 2.0.1.
+</p>
+');
+
+/**
+ * Injector that converts http, https and ftp text URLs to actual links.
+ */
+class HTMLPurifier_Injector_Linkify extends HTMLPurifier_Injector
+{
+
+ var $name = 'Linkify';
+ var $needed = array('a' => array('href'));
+
+ function handleText(&$token) {
+ if (!$this->allowsElement('a')) return;
+
+ if (strpos($token->data, '://') === false) {
+ // our really quick heuristic failed, abort
+ // this may not work so well if we want to match things like
+ // "google.com", but then again, most people don't
+ return;
+ }
+
+ // there is/are URL(s). Let's split the string:
+ // Note: this regex is extremely permissive
+ $bits = preg_split('#((?:https?|ftp)://[^\s\'"<>()]+)#S', $token->data, -1, PREG_SPLIT_DELIM_CAPTURE);
+
+ $token = array();
+
+ // $i = index
+ // $c = count
+ // $l = is link
+ for ($i = 0, $c = count($bits), $l = false; $i < $c; $i++, $l = !$l) {
+ if (!$l) {
+ if ($bits[$i] === '') continue;
+ $token[] = new HTMLPurifier_Token_Text($bits[$i]);
+ } else {
+ $token[] = new HTMLPurifier_Token_Start('a', array('href' => $bits[$i]));
+ $token[] = new HTMLPurifier_Token_Text($bits[$i]);
+ $token[] = new HTMLPurifier_Token_End('a');
+ }
+ }
+
+ }
+
+}
+
--- /dev/null
+<?php
+
+require_once 'HTMLPurifier/Injector.php';
+
+HTMLPurifier_ConfigSchema::define(
+ 'AutoFormat', 'PurifierLinkify', false, 'bool', '
+<p>
+ Internal auto-formatter that converts configuration directives in
+ syntax <a>%Namespace.Directive</a> to links. <code>a</code> tags
+ with the <code>href</code> attribute must be allowed.
+ This directive has been available since 2.0.1.
+</p>
+');
+
+HTMLPurifier_ConfigSchema::define(
+ 'AutoFormatParam', 'PurifierLinkifyDocURL', '#%s', 'string', '
+<p>
+ Location of configuration documentation to link to, let %s substitute
+ into the configuration\'s namespace and directive names sans the percent
+ sign. This directive has been available since 2.0.1.
+</p>
+');
+
+/**
+ * Injector that converts configuration directive syntax %Namespace.Directive
+ * to links
+ */
+class HTMLPurifier_Injector_PurifierLinkify extends HTMLPurifier_Injector
+{
+
+ var $name = 'PurifierLinkify';
+ var $docURL;
+ var $needed = array('a' => array('href'));
+
+ function prepare($config, &$context) {
+ $this->docURL = $config->get('AutoFormatParam', 'PurifierLinkifyDocURL');
+ return parent::prepare($config, $context);
+ }
+
+ function handleText(&$token) {
+ if (!$this->allowsElement('a')) return;
+ if (strpos($token->data, '%') === false) return;
+
+ $bits = preg_split('#%([a-z0-9]+\.[a-z0-9]+)#Si', $token->data, -1, PREG_SPLIT_DELIM_CAPTURE);
+ $token = array();
+
+ // $i = index
+ // $c = count
+ // $l = is link
+ for ($i = 0, $c = count($bits), $l = false; $i < $c; $i++, $l = !$l) {
+ if (!$l) {
+ if ($bits[$i] === '') continue;
+ $token[] = new HTMLPurifier_Token_Text($bits[$i]);
+ } else {
+ $token[] = new HTMLPurifier_Token_Start('a',
+ array('href' => str_replace('%s', $bits[$i], $this->docURL)));
+ $token[] = new HTMLPurifier_Token_Text('%' . $bits[$i]);
+ $token[] = new HTMLPurifier_Token_End('a');
+ }
+ }
+
+ }
+
+}
+
*/
var $messages = array();
+ /**
+ * Array of localizable error codes
+ */
+ var $errorNames = array();
+
/**
* Has the language object been loaded yet?
* @private
*/
var $_loaded = false;
+ /**
+ * Instances of HTMLPurifier_Config and HTMLPurifier_Context
+ */
+ var $config, $context;
+
+ function HTMLPurifier_Language($config, &$context) {
+ $this->config = $config;
+ $this->context =& $context;
+ }
+
/**
* Loads language object with necessary info from factory cache
* @note This is a lazy loader
}
/**
- * Retrieves a localised message. Does not perform any operations.
+ * Retrieves a localised message.
* @param $key string identifier of message
* @return string localised message
*/
function getMessage($key) {
if (!$this->_loaded) $this->load();
- if (!isset($this->messages[$key])) return '';
+ if (!isset($this->messages[$key])) return "[$key]";
return $this->messages[$key];
}
+ /**
+ * Retrieves a localised error name.
+ * @param $int integer error number, corresponding to PHP's error
+ * reporting
+ * @return string localised message
+ */
+ function getErrorName($int) {
+ if (!$this->_loaded) $this->load();
+ if (!isset($this->errorNames[$int])) return "[Error: $int]";
+ return $this->errorNames[$int];
+ }
+
+ /**
+ * Converts an array list into a string readable representation
+ */
+ function listify($array) {
+ $sep = $this->getMessage('Item separator');
+ $sep_last = $this->getMessage('Item separator last');
+ $ret = '';
+ for ($i = 0, $c = count($array); $i < $c; $i++) {
+ if ($i == 0) {
+ } elseif ($i + 1 < $c) {
+ $ret .= $sep;
+ } else {
+ $ret .= $sep_last;
+ }
+ $ret .= $array[$i];
+ }
+ return $ret;
+ }
+
+ /**
+ * Formats a localised message with passed parameters
+ * @param $key string identifier of message
+ * @param $args Parameters to substitute in
+ * @return string localised message
+ * @todo Implement conditionals? Right now, some messages make
+ * reference to line numbers, but those aren't always available
+ */
+ function formatMessage($key, $args = array()) {
+ if (!$this->_loaded) $this->load();
+ if (!isset($this->messages[$key])) return "[$key]";
+ $raw = $this->messages[$key];
+ $subst = array();
+ $generator = false;
+ foreach ($args as $i => $value) {
+ if (is_object($value)) {
+ if (is_a($value, 'HTMLPurifier_Token')) {
+ // factor this out some time
+ if (!$generator) $generator = $this->context->get('Generator');
+ if (isset($value->name)) $subst['$'.$i.'.Name'] = $value->name;
+ if (isset($value->data)) $subst['$'.$i.'.Data'] = $value->data;
+ $subst['$'.$i.'.Compact'] =
+ $subst['$'.$i.'.Serialized'] = $generator->generateFromToken($value);
+ // a more complex algorithm for compact representation
+ // could be introduced for all types of tokens. This
+ // may need to be factored out into a dedicated class
+ if (!empty($value->attr)) {
+ $stripped_token = $value->copy();
+ $stripped_token->attr = array();
+ $subst['$'.$i.'.Compact'] = $generator->generateFromToken($stripped_token);
+ }
+ $subst['$'.$i.'.Line'] = $value->line ? $value->line : 'unknown';
+ }
+ continue;
+ } elseif (is_array($value)) {
+ $keys = array_keys($value);
+ if (array_keys($keys) === $keys) {
+ // list
+ $subst['$'.$i] = $this->listify($value);
+ } else {
+ // associative array
+ // no $i implementation yet, sorry
+ $subst['$'.$i.'.Keys'] = $this->listify($keys);
+ $subst['$'.$i.'.Values'] = $this->listify(array_values($value));
+ }
+ continue;
+ }
+ $subst['$' . $i] = $value;
+ }
+ return strtr($raw, $subst);
+ }
+
}
-?>
\ No newline at end of file
}
-?>
\ No newline at end of file
$fallback = 'en';
$messages = array(
- 'htmlpurifier' => 'HTML Purifier X'
+ 'HTMLPurifier' => 'HTML Purifier X'
);
-?>
\ No newline at end of file
$messages = array(
-'htmlpurifier' => 'HTML Purifier',
-'pizza' => 'Pizza', // for unit testing purposes
+'HTMLPurifier' => 'HTML Purifier',
+// for unit testing purposes
+'LanguageFactoryTest: Pizza' => 'Pizza',
+'LanguageTest: List' => '$1',
+'LanguageTest: Hash' => '$1.Keys; $1.Values',
+
+'Item separator' => ', ',
+'Item separator last' => ' and ', // non-Harvard style
+
+'ErrorCollector: No errors' => 'No errors detected. However, because error reporting is still incomplete, there may have been errors that the error collector was not notified of; please inspect the output HTML carefully.',
+'ErrorCollector: At line' => ' at line $line',
+
+'Lexer: Unclosed comment' => 'Unclosed comment',
+'Lexer: Unescaped lt' => 'Unescaped less-than sign (<) should be <',
+'Lexer: Missing gt' => 'Missing greater-than sign (>), previous less-than sign (<) should be escaped',
+'Lexer: Missing attribute key' => 'Attribute declaration has no key',
+'Lexer: Missing end quote' => 'Attribute declaration has no end quote',
+
+'Strategy_RemoveForeignElements: Tag transform' => '<$1> element transformed into $CurrentToken.Serialized',
+'Strategy_RemoveForeignElements: Missing required attribute' => '$CurrentToken.Compact element missing required attribute $1',
+'Strategy_RemoveForeignElements: Foreign element to text' => 'Unrecognized $CurrentToken.Serialized tag converted to text',
+'Strategy_RemoveForeignElements: Foreign element removed' => 'Unrecognized $CurrentToken.Serialized tag removed',
+'Strategy_RemoveForeignElements: Comment removed' => 'Comment containing "$CurrentToken.Data" removed',
+'Strategy_RemoveForeignElements: Foreign meta element removed' => 'Unrecognized $CurrentToken.Serialized meta tag and all descendants removed',
+'Strategy_RemoveForeignElements: Token removed to end' => 'Tags and text starting from $1 element where removed to end',
+
+'Strategy_MakeWellFormed: Unnecessary end tag removed' => 'Unnecessary $CurrentToken.Serialized tag removed',
+'Strategy_MakeWellFormed: Unnecessary end tag to text' => 'Unnecessary $CurrentToken.Serialized tag converted to text',
+'Strategy_MakeWellFormed: Tag auto closed' => '$1.Compact started on line $1.Line auto-closed by $CurrentToken.Compact',
+'Strategy_MakeWellFormed: Stray end tag removed' => 'Stray $CurrentToken.Serialized tag removed',
+'Strategy_MakeWellFormed: Stray end tag to text' => 'Stray $CurrentToken.Serialized tag converted to text',
+'Strategy_MakeWellFormed: Tag closed by element end' => '$1.Compact tag started on line $1.Line closed by end of $CurrentToken.Serialized',
+'Strategy_MakeWellFormed: Tag closed by document end' => '$1.Compact tag started on line $1.Line closed by end of document',
+
+'Strategy_FixNesting: Node removed' => '$CurrentToken.Compact node removed',
+'Strategy_FixNesting: Node excluded' => '$CurrentToken.Compact node removed due to descendant exclusion by ancestor element',
+'Strategy_FixNesting: Node reorganized' => 'Contents of $CurrentToken.Compact node reorganized to enforce its content model',
+'Strategy_FixNesting: Node contents removed' => 'Contents of $CurrentToken.Compact node removed',
+
+'AttrValidator: Attributes transformed' => 'Attributes on $CurrentToken.Compact transformed from $1.Keys to $2.Keys',
+'AttrValidator: Attribute removed' => '$CurrentAttr.Name attribute on $CurrentToken.Compact removed',
+
+);
+
+$errorNames = array(
+ E_ERROR => 'Error',
+ E_WARNING => 'Warning',
+ E_NOTICE => 'Notice'
);
-?>
\ No newline at end of file
require_once 'HTMLPurifier/Language.php';
require_once 'HTMLPurifier/AttrDef/Lang.php';
+HTMLPurifier_ConfigSchema::define(
+ 'Core', 'Language', 'en', 'string', '
+ISO 639 language code for localizable things in HTML Purifier to use,
+which is mainly error reporting. There is currently only an English (en)
+translation, so this directive is currently useless.
+This directive has been available since 2.0.0.
+');
+
/**
* Class responsible for generating HTMLPurifier_Language objects, managing
* caching and fallbacks.
* variables to slurp out of a message file.
* @value array list
*/
- var $keys = array('fallback', 'messages');
+ var $keys = array('fallback', 'messages', 'errorNames');
/**
* Instance of HTMLPurifier_AttrDef_Lang to validate language codes
* Keys whose contents are a hash map and can be merged
* @value array lookup
*/
- var $mergeable_keys_map = array('messages' => true);
+ var $mergeable_keys_map = array('messages' => true, 'errorNames' => true);
/**
* Keys whose contents are a list and can be merged
*/
function setup() {
$this->validator = new HTMLPurifier_AttrDef_Lang();
- $this->dir = dirname(__FILE__);
+ $this->dir = HTMLPURIFIER_PREFIX . '/HTMLPurifier';
}
/**
* Creates a language object, handles class fallbacks
- * @param $code string language code
+ * @param $config Instance of HTMLPurifier_Config
+ * @param $context Instance of HTMLPurifier_Context
*/
- function create($code) {
+ function create($config, &$context) {
- $config = $context = false; // hope it doesn't use these!
- $code = $this->validator->validate($code, $config, $context);
+ // validate language code
+ $code = $this->validator->validate(
+ $config->get('Core', 'Language'), $config, $context
+ );
if ($code === false) $code = 'en'; // malformed code becomes English
$pcode = str_replace('-', '_', $code); // make valid PHP classname
// you can bypass the conditional include by loading the
// file yourself
if (file_exists($file) && !class_exists($class)) {
- include_once $file;
- }
+ include_once $file;
+ }
}
if (!class_exists($class)) {
// go fallback
- $fallback = HTMLPurifier_Language::getFallbackFor($code);
+ $fallback = HTMLPurifier_LanguageFactory::getFallbackFor($code);
$depth++;
- $lang = Language::factory( $fallback );
+ $lang = HTMLPurifier_LanguageFactory::factory( $fallback );
$depth--;
} else {
- $lang = new $class;
+ $lang = new $class($config, $context);
}
$lang->code = $code;
// merge fallback with current language
foreach ( $this->keys as $key ) {
- if (isset($cache[$key]) && isset($fallback_cache[$key])) {
+ if (isset($cache[$key]) && isset($fallback_cache[$key])) {
if (isset($this->mergeable_keys_map[$key])) {
$cache[$key] = $cache[$key] + $fallback_cache[$key];
} elseif (isset($this->mergeable_keys_list[$key])) {
$cache[$key] = array_merge( $fallback_cache[$key], $cache[$key] );
}
- } else {
- $cache[$key] = $fallback_cache[$key];
- }
+ } else {
+ $cache[$key] = $fallback_cache[$key];
+ }
}
}
}
-?>
\ No newline at end of file
require_once 'HTMLPurifier/Encoder.php';
require_once 'HTMLPurifier/EntityParser.php';
+// implementations
+require_once 'HTMLPurifier/Lexer/DirectLex.php';
+if (version_compare(PHP_VERSION, "5", ">=")) {
+ // You can remove the if statement if you are running PHP 5 only.
+ // We ought to get the strict version to follow those rules.
+ require_once 'HTMLPurifier/Lexer/DOMLex.php';
+}
+
HTMLPurifier_ConfigSchema::define(
'Core', 'AcceptFullDocuments', true, 'bool',
'This parameter determines whether or not the filter should accept full '.
'drop all sections except the content between body.'
);
+HTMLPurifier_ConfigSchema::define(
+ 'Core', 'LexerImpl', null, 'mixed/null', '
+<p>
+ This parameter determines what lexer implementation can be used. The
+ valid values are:
+</p>
+<dl>
+ <dt><em>null</em></dt>
+ <dd>
+ Recommended, the lexer implementation will be auto-detected based on
+ your PHP-version and configuration.
+ </dd>
+ <dt><em>string</em> lexer identifier</dt>
+ <dd>
+ This is a slim way of manually overridding the implementation.
+ Currently recognized values are: DOMLex (the default PHP5 implementation)
+ and DirectLex (the default PHP4 implementation). Only use this if
+ you know what you are doing: usually, the auto-detection will
+ manage things for cases you aren\'t even aware of.
+ </dd>
+ <dt><em>object</em> lexer instance</dt>
+ <dd>
+ Super-advanced: you can specify your own, custom, implementation that
+ implements the interface defined by <code>HTMLPurifier_Lexer</code>.
+ I may remove this option simply because I don\'t expect anyone
+ to use it.
+ </dd>
+</dl>
+<p>
+ This directive has been available since 2.0.0.
+</p>
+'
+);
+
+HTMLPurifier_ConfigSchema::define(
+ 'Core', 'MaintainLineNumbers', null, 'bool/null', '
+<p>
+ If true, HTML Purifier will add line number information to all tokens.
+ This is useful when error reporting is turned on, but can result in
+ significant performance degradation and should not be used when
+ unnecessary. This directive must be used with the DirectLex lexer,
+ as the DOMLex lexer does not (yet) support this functionality.
+ If the value is null, an appropriate value will be selected based
+ on other configuration. This directive has been available since 2.0.0.
+</p>
+');
+
+HTMLPurifier_ConfigSchema::define(
+ 'Core', 'AggressivelyFixLt', false, 'bool', '
+This directive enables aggressive pre-filter fixes HTML Purifier can
+perform in order to ensure that open angled-brackets do not get killed
+during parsing stage. Enabling this will result in two preg_replace_callback
+calls and one preg_replace call for every bit of HTML passed through here.
+It is not necessary and will have no effect for PHP 4.
+This directive has been available since 2.1.0.
+');
+
/**
* Forgivingly lexes HTML (SGML-style) markup into tokens.
*
class HTMLPurifier_Lexer
{
+ // -- STATIC ----------------------------------------------------------
+
+ /**
+ * Retrieves or sets the default Lexer as a Prototype Factory.
+ *
+ * Depending on what PHP version you are running, the abstract base
+ * Lexer class will determine which concrete Lexer is best for you:
+ * HTMLPurifier_Lexer_DirectLex for PHP 4, and HTMLPurifier_Lexer_DOMLex
+ * for PHP 5 and beyond. This general rule has a few exceptions to it
+ * involving special features that only DirectLex implements.
+ *
+ * @static
+ *
+ * @note The behavior of this class has changed, rather than accepting
+ * a prototype object, it now accepts a configuration object.
+ * To specify your own prototype, set %Core.LexerImpl to it.
+ * This change in behavior de-singletonizes the lexer object.
+ *
+ * @note In PHP4, it is possible to call this factory method from
+ * subclasses, such usage is not recommended and not
+ * forwards-compatible.
+ *
+ * @param $prototype Optional prototype lexer or configuration object
+ * @return Concrete lexer.
+ */
+ function create($config) {
+
+ if (!is_a($config, 'HTMLPurifier_Config')) {
+ $lexer = $config;
+ trigger_error("Passing a prototype to
+ HTMLPurifier_Lexer::create() is deprecated, please instead
+ use %Core.LexerImpl", E_USER_WARNING);
+ } else {
+ $lexer = $config->get('Core', 'LexerImpl');
+ }
+
+ if (is_object($lexer)) {
+ return $lexer;
+ }
+
+ if (is_null($lexer)) { do {
+ // auto-detection algorithm
+
+ // once PHP DOM implements native line numbers, or we
+ // hack out something using XSLT, remove this stipulation
+ $line_numbers = $config->get('Core', 'MaintainLineNumbers');
+ if (
+ $line_numbers === true ||
+ ($line_numbers === null && $config->get('Core', 'CollectErrors'))
+ ) {
+ $lexer = 'DirectLex';
+ break;
+ }
+
+ if (version_compare(PHP_VERSION, "5", ">=") && // check for PHP5
+ class_exists('DOMDocument')) { // check for DOM support
+ $lexer = 'DOMLex';
+ } else {
+ $lexer = 'DirectLex';
+ }
+
+ } while(0); } // do..while so we can break
+
+ // instantiate recognized string names
+ switch ($lexer) {
+ case 'DOMLex':
+ return new HTMLPurifier_Lexer_DOMLex();
+ case 'DirectLex':
+ return new HTMLPurifier_Lexer_DirectLex();
+ default:
+ trigger_error("Cannot instantiate unrecognized Lexer type " . htmlspecialchars($lexer), E_USER_ERROR);
+ }
+
+ }
+
+ // -- CONVENIENCE MEMBERS ---------------------------------------------
+
function HTMLPurifier_Lexer() {
$this->_entity_parser = new HTMLPurifier_EntityParser();
}
-
/**
* Most common entity to raw value conversion table for special entities.
* @protected
trigger_error('Call to abstract class', E_USER_ERROR);
}
- /**
- * Retrieves or sets the default Lexer as a Prototype Factory.
- *
- * Depending on what PHP version you are running, the abstract base
- * Lexer class will determine which concrete Lexer is best for you:
- * HTMLPurifier_Lexer_DirectLex for PHP 4, and HTMLPurifier_Lexer_DOMLex
- * for PHP 5 and beyond.
- *
- * Passing the optional prototype lexer parameter will override the
- * default with your own implementation. A copy/reference of the prototype
- * lexer will now be returned when you request a new lexer.
- *
- * @static
- *
- * @note
- * Though it is possible to call this factory method from subclasses,
- * such usage is not recommended.
- *
- * @param $prototype Optional prototype lexer.
- * @return Concrete lexer.
- */
- function create($prototype = null) {
- // we don't really care if it's a reference or a copy
- static $lexer = null;
- if ($prototype) {
- $lexer = $prototype;
- }
- if (empty($lexer)) {
- if (version_compare(PHP_VERSION, "5", ">=") && // check for PHP5
- class_exists('DOMDocument')) { // check for DOM support
- require_once 'HTMLPurifier/Lexer/DOMLex.php';
- $lexer = new HTMLPurifier_Lexer_DOMLex();
- } else {
- require_once 'HTMLPurifier/Lexer/DirectLex.php';
- $lexer = new HTMLPurifier_Lexer_DirectLex();
- }
- }
- return $lexer;
- }
-
/**
* Translates CDATA sections into regular sections (through escaping).
*
*/
function escapeCDATA($string) {
return preg_replace_callback(
- '/<!\[CDATA\[(.+?)\]\]>/',
+ '/<!\[CDATA\[(.+?)\]\]>/s',
+ array('HTMLPurifier_Lexer', 'CDATACallback'),
+ $string
+ );
+ }
+
+ /**
+ * Special CDATA case that is especiall convoluted for <script>
+ */
+ function escapeCommentedCDATA($string) {
+ return preg_replace_callback(
+ '#<!--//--><!\[CDATA\[//><!--(.+?)//--><!\]\]>#s',
array('HTMLPurifier_Lexer', 'CDATACallback'),
$string
);
$html = $this->extractBody($html);
}
+ // normalize newlines to \n
+ $html = str_replace("\r\n", "\n", $html);
+ $html = str_replace("\r", "\n", $html);
+
+ if ($config->get('HTML', 'Trusted')) {
+ // escape convoluted CDATA
+ $html = $this->escapeCommentedCDATA($html);
+ }
+
// escape CDATA
$html = $this->escapeCDATA($html);
}
-?>
\ No newline at end of file
$this->factory = new HTMLPurifier_TokenFactory();
}
- public function tokenizeHTML($string, $config, &$context) {
+ public function tokenizeHTML($html, $config, &$context) {
- $string = $this->normalize($string, $config, $context);
+ $html = $this->normalize($html, $config, $context);
- // preprocess string, essential for UTF-8
- $string =
+ // attempt to armor stray angled brackets that cannot possibly
+ // form tags and thus are probably being used as emoticons
+ if ($config->get('Core', 'AggressivelyFixLt')) {
+ $char = '[^a-z!\/]';
+ $comment = "/<!--(.*?)(-->|\z)/is";
+ $html = preg_replace_callback($comment, array('HTMLPurifier_Lexer_DOMLex', 'callbackArmorCommentEntities'), $html);
+ $html = preg_replace("/<($char)/i", '<\\1', $html);
+ $html = preg_replace_callback($comment, array('HTMLPurifier_Lexer_DOMLex', 'callbackUndoCommentSubst'), $html); // fix comments
+ }
+
+ // preprocess html, essential for UTF-8
+ $html =
'<!DOCTYPE html '.
'PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN"'.
'"http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">'.
'<html><head>'.
'<meta http-equiv="Content-Type" content="text/html;'.
' charset=utf-8" />'.
- '</head><body><div>'.$string.'</div></body></html>';
+ '</head><body><div>'.$html.'</div></body></html>';
$doc = new DOMDocument();
- $doc->encoding = 'UTF-8'; // technically does nothing, but whatever
+ $doc->encoding = 'UTF-8'; // theoretically, the above has this covered
- // DOM will toss errors if the HTML its parsing has really big
- // problems, so we're going to mute them. This can cause problems
- // if a custom error handler that doesn't implement error_reporting
- // is set, as noted by a Drupal plugin of HTML Purifier. Consider
- // making our own error reporter to temporarily load in
- @$doc->loadHTML($string);
+ set_error_handler(array($this, 'muteErrorHandler'));
+ $doc->loadHTML($html);
+ restore_error_handler();
$tokens = array();
$this->tokenizeDOM(
- $doc->getElementsByTagName('html')->item(0)-> // html
- getElementsByTagName('body')->item(0)-> // body
- getElementsByTagName('div')->item(0) // div
+ $doc->getElementsByTagName('html')->item(0)-> // <html>
+ getElementsByTagName('body')->item(0)-> // <body>
+ getElementsByTagName('div')->item(0) // <div>
, $tokens);
return $tokens;
}
* @returns Tokens of node appended to previously passed tokens.
*/
protected function tokenizeDOM($node, &$tokens, $collect = false) {
- // recursive goodness!
// intercept non element nodes. WE MUST catch all of them,
// but we're not getting the character reference nodes because
// those should have been preprocessed
- if ($node->nodeType === XML_TEXT_NODE ||
- $node->nodeType === XML_CDATA_SECTION_NODE) {
+ if ($node->nodeType === XML_TEXT_NODE) {
$tokens[] = $this->factory->createText($node->data);
return;
+ } elseif ($node->nodeType === XML_CDATA_SECTION_NODE) {
+ // undo DOM's special treatment of <script> tags
+ $tokens[] = $this->factory->createText($this->parseData($node->data));
+ return;
} elseif ($node->nodeType === XML_COMMENT_NODE) {
$tokens[] = $this->factory->createComment($node->data);
return;
return $array;
}
+ /**
+ * An error handler that mutes all errors
+ */
+ public function muteErrorHandler($errno, $errstr) {}
+
+ /**
+ * Callback function for undoing escaping of stray angled brackets
+ * in comments
+ */
+ function callbackUndoCommentSubst($matches) {
+ return '<!--' . strtr($matches[1], array('&'=>'&','<'=>'<')) . $matches[2];
+ }
+
+ /**
+ * Callback function that entity-izes ampersands in comments so that
+ * callbackUndoCommentSubst doesn't clobber them
+ */
+ function callbackArmorCommentEntities($matches) {
+ return '<!--' . str_replace('&', '&', $matches[1]) . $matches[2];
+ }
+
}
-?>
\ No newline at end of file
require_once 'HTMLPurifier/Lexer.php';
+HTMLPurifier_ConfigSchema::define(
+ 'Core', 'DirectLexLineNumberSyncInterval', 0, 'int', '
+<p>
+ Specifies the number of tokens the DirectLex line number tracking
+ implementations should process before attempting to resyncronize the
+ current line count by manually counting all previous new-lines. When
+ at 0, this functionality is disabled. Lower values will decrease
+ performance, and this is only strictly necessary if the counting
+ algorithm is buggy (in which case you should report it as a bug).
+ This has no effect when %Core.MaintainLineNumbers is disabled or DirectLex is
+ not being used. This directive has been available since 2.0.0.
+</p>
+');
+
/**
* Our in-house implementation of a parser.
*
* A pure PHP parser, DirectLex has absolutely no dependencies, making
* it a reasonably good default for PHP4. Written with efficiency in mind,
* it can be four times faster than HTMLPurifier_Lexer_PEARSax3, although it
- * pales in comparison to HTMLPurifier_Lexer_DOMLex. It will support UTF-8
- * completely eventually.
+ * pales in comparison to HTMLPurifier_Lexer_DOMLex.
*
* @todo Reread XML spec and document differences.
- *
- * @todo Determine correct behavior in transforming comment data. (preserve dashes?)
*/
class HTMLPurifier_Lexer_DirectLex extends HTMLPurifier_Lexer
{
*/
var $_whitespace = "\x20\x09\x0D\x0A";
+ /**
+ * Callback function for script CDATA fudge
+ * @param $matches, in form of array(opening tag, contents, closing tag)
+ * @static
+ */
+ function scriptCallback($matches) {
+ return $matches[1] . htmlspecialchars($matches[2], ENT_COMPAT, 'UTF-8') . $matches[3];
+ }
+
function tokenizeHTML($html, $config, &$context) {
+ // special normalization for script tags without any armor
+ // our "armor" heurstic is a < sign any number of whitespaces after
+ // the first script tag
+ if ($config->get('HTML', 'Trusted')) {
+ $html = preg_replace_callback('#(<script[^>]*>)(\s*[^<].+?)(</script>)#si',
+ array('HTMLPurifier_Lexer_DirectLex', 'scriptCallback'), $html);
+ }
+
$html = $this->normalize($html, $config, $context);
$cursor = 0; // our location in the text
$inside_tag = false; // whether or not we're parsing the inside of a tag
$array = array(); // result array
+ $maintain_line_numbers = $config->get('Core', 'MaintainLineNumbers');
+
+ if ($maintain_line_numbers === null) {
+ // automatically determine line numbering by checking
+ // if error collection is on
+ $maintain_line_numbers = $config->get('Core', 'CollectErrors');
+ }
+
+ if ($maintain_line_numbers) $current_line = 1;
+ else $current_line = false;
+ $context->register('CurrentLine', $current_line);
+ $nl = "\n";
+ // how often to manually recalculate. This will ALWAYS be right,
+ // but it's pretty wasteful. Set to 0 to turn off
+ $synchronize_interval = $config->get('Core', 'DirectLexLineNumberSyncInterval');
+
+ $e = false;
+ if ($config->get('Core', 'CollectErrors')) {
+ $e =& $context->get('ErrorCollector');
+ }
+
// infinite loop protection
// has to be pretty big, since html docs can be big
// we're allow two hundred thousand tags... more than enough?
+ // NOTE: this is also used for synchronization, so watch out
$loops = 0;
while(true) {
// infinite loop protection
if (++$loops > 200000) return array();
+ // recalculate lines
+ if (
+ $maintain_line_numbers && // line number tracking is on
+ $synchronize_interval && // synchronization is on
+ $cursor > 0 && // cursor is further than zero
+ $loops % $synchronize_interval === 0 // time to synchronize!
+ ) {
+ $current_line = 1 + $this->substrCount($html, $nl, 0, $cursor);
+ }
+
$position_next_lt = strpos($html, '<', $cursor);
$position_next_gt = strpos($html, '>', $cursor);
// triggers on "<b>asdf</b>" but not "asdf <b></b>"
+ // special case to set up context
if ($position_next_lt === $cursor) {
$inside_tag = true;
$cursor++;
if (!$inside_tag && $position_next_lt !== false) {
// We are not inside tag and there still is another tag to parse
- $array[] = new
+ $token = new
HTMLPurifier_Token_Text(
$this->parseData(
substr(
)
)
);
+ if ($maintain_line_numbers) {
+ $token->line = $current_line;
+ $current_line += $this->substrCount($html, $nl, $cursor, $position_next_lt - $cursor);
+ }
+ $array[] = $token;
$cursor = $position_next_lt + 1;
$inside_tag = true;
continue;
// If we're already at the end, break
if ($cursor === strlen($html)) break;
// Create Text of rest of string
- $array[] = new
+ $token = new
HTMLPurifier_Token_Text(
$this->parseData(
substr(
)
)
);
+ if ($maintain_line_numbers) $token->line = $current_line;
+ $array[] = $token;
break;
} elseif ($inside_tag && $position_next_gt !== false) {
// We are in tag and it is well formed
// Grab the internals of the tag
$strlen_segment = $position_next_gt - $cursor;
+
+ if ($strlen_segment < 1) {
+ // there's nothing to process!
+ $token = new HTMLPurifier_Token_Text('<');
+ $cursor++;
+ continue;
+ }
+
$segment = substr($html, $cursor, $strlen_segment);
// Check if it's a comment
if (
- substr($segment, 0, 3) == '!--' &&
- substr($segment, $strlen_segment-2, 2) == '--'
+ substr($segment, 0, 3) == '!--'
) {
- $array[] = new
+ // re-determine segment length, looking for -->
+ $position_comment_end = strpos($html, '-->', $cursor);
+ if ($position_comment_end === false) {
+ // uh oh, we have a comment that extends to
+ // infinity. Can't be helped: set comment
+ // end position to end of string
+ if ($e) $e->send(E_WARNING, 'Lexer: Unclosed comment');
+ $position_comment_end = strlen($html);
+ $end = true;
+ } else {
+ $end = false;
+ }
+ $strlen_segment = $position_comment_end - $cursor;
+ $segment = substr($html, $cursor, $strlen_segment);
+ $token = new
HTMLPurifier_Token_Comment(
substr(
- $segment, 3, $strlen_segment - 5
+ $segment, 3, $strlen_segment - 3
)
);
+ if ($maintain_line_numbers) {
+ $token->line = $current_line;
+ $current_line += $this->substrCount($html, $nl, $cursor, $strlen_segment);
+ }
+ $array[] = $token;
+ $cursor = $end ? $position_comment_end : $position_comment_end + 3;
$inside_tag = false;
- $cursor = $position_next_gt + 1;
continue;
}
$is_end_tag = (strpos($segment,'/') === 0);
if ($is_end_tag) {
$type = substr($segment, 1);
- $array[] = new HTMLPurifier_Token_End($type);
+ $token = new HTMLPurifier_Token_End($type);
+ if ($maintain_line_numbers) {
+ $token->line = $current_line;
+ $current_line += $this->substrCount($html, $nl, $cursor, $position_next_gt - $cursor);
+ }
+ $array[] = $token;
$inside_tag = false;
$cursor = $position_next_gt + 1;
continue;
// Check leading character is alnum, if not, we may
// have accidently grabbed an emoticon. Translate into
// text and go our merry way
- if (!ctype_alnum($segment[0])) {
- $array[] = new
+ if (!ctype_alpha($segment[0])) {
+ // XML: $segment[0] !== '_' && $segment[0] !== ':'
+ if ($e) $e->send(E_NOTICE, 'Lexer: Unescaped lt');
+ $token = new
HTMLPurifier_Token_Text(
'<' .
$this->parseData(
) .
'>'
);
+ if ($maintain_line_numbers) {
+ $token->line = $current_line;
+ $current_line += $this->substrCount($html, $nl, $cursor, $position_next_gt - $cursor);
+ }
+ $array[] = $token;
$cursor = $position_next_gt + 1;
$inside_tag = false;
continue;
if ($position_first_space >= $strlen_segment) {
if ($is_self_closing) {
- $array[] = new HTMLPurifier_Token_Empty($segment);
+ $token = new HTMLPurifier_Token_Empty($segment);
} else {
- $array[] = new HTMLPurifier_Token_Start($segment);
+ $token = new HTMLPurifier_Token_Start($segment);
+ }
+ if ($maintain_line_numbers) {
+ $token->line = $current_line;
+ $current_line += $this->substrCount($html, $nl, $cursor, $position_next_gt - $cursor);
}
+ $array[] = $token;
$inside_tag = false;
$cursor = $position_next_gt + 1;
continue;
}
if ($is_self_closing) {
- $array[] = new HTMLPurifier_Token_Empty($type, $attr);
+ $token = new HTMLPurifier_Token_Empty($type, $attr);
} else {
- $array[] = new HTMLPurifier_Token_Start($type, $attr);
+ $token = new HTMLPurifier_Token_Start($type, $attr);
}
+ if ($maintain_line_numbers) {
+ $token->line = $current_line;
+ $current_line += $this->substrCount($html, $nl, $cursor, $position_next_gt - $cursor);
+ }
+ $array[] = $token;
$cursor = $position_next_gt + 1;
$inside_tag = false;
continue;
} else {
- $array[] = new
+ // inside tag, but there's no ending > sign
+ if ($e) $e->send(E_WARNING, 'Lexer: Missing gt');
+ $token = new
HTMLPurifier_Token_Text(
'<' .
$this->parseData(
substr($html, $cursor)
)
);
+ if ($maintain_line_numbers) $token->line = $current_line;
+ // no cursor scroll? Hmm...
+ $array[] = $token;
break;
}
break;
}
+
+ $context->destroy('CurrentLine');
return $array;
}
+ /**
+ * PHP 4 compatible substr_count that implements offset and length
+ */
+ function substrCount($haystack, $needle, $offset, $length) {
+ static $oldVersion;
+ if ($oldVersion === null) {
+ $oldVersion = version_compare(PHP_VERSION, '5.1', '<');
+ }
+ if ($oldVersion) {
+ $haystack = substr($haystack, $offset, $length);
+ return substr_count($haystack, $needle);
+ } else {
+ return substr_count($haystack, $needle, $offset, $length);
+ }
+ }
+
/**
* Takes the inside of an HTML tag and makes an assoc array of attributes.
*
if ($string == '') return array(); // no attributes
+ $e = false;
+ if ($config->get('Core', 'CollectErrors')) {
+ $e =& $context->get('ErrorCollector');
+ }
+
// let's see if we can abort as quickly as possible
// one equal sign, no spaces => one attribute
$num_equal = substr_count($string, '=');
// only one attribute
list($key, $quoted_value) = explode('=', $string);
$quoted_value = trim($quoted_value);
- if (!$key) return array();
+ if (!$key) {
+ if ($e) $e->send(E_ERROR, 'Lexer: Missing attribute key');
+ return array();
+ }
if (!$quoted_value) return array($key => '');
$first_char = @$quoted_value[0];
$last_char = @$quoted_value[strlen($quoted_value)-1];
} else {
// not well behaved
if ($open_quote) {
+ if ($e) $e->send(E_ERROR, 'Lexer: Missing end quote');
$value = substr($quoted_value, 1);
} else {
$value = $quoted_value;
}
}
+ if ($value === false) $value = '';
return array($key => $value);
}
// infinite loop protection
$loops = 0;
-
while(true) {
// infinite loop protection
- if (++$loops > 1000) return array();
+ if (++$loops > 1000) {
+ trigger_error('Infinite loop detected in attribute parsing', E_USER_WARNING);
+ return array();
+ }
if ($cursor >= $size) {
break;
}
$cursor += ($value = strspn($string, $this->_whitespace, $cursor));
-
// grab the key
$key_begin = $cursor; //we're currently at the start of the key
$key = substr($string, $key_begin, $key_end - $key_begin);
- if (!$key) continue; // empty key
+ if (!$key) {
+ if ($e) $e->send(E_ERROR, 'Lexer: Missing attribute key');
+ $cursor += strcspn($string, $this->_whitespace, $cursor + 1); // prevent infinite loop
+ continue; // empty key
+ }
// scroll past all whitespace
$cursor += strspn($string, $this->_whitespace, $cursor);
$cursor++;
$cursor += strspn($string, $this->_whitespace, $cursor);
+ if ($cursor === false) {
+ $array[$key] = '';
+ break;
+ }
+
// we might be in front of a quote right now
$char = @$string[$cursor];
$value_end = $cursor;
}
+ // we reached a premature end
+ if ($cursor === false) {
+ $cursor = $size;
+ $value_end = $cursor;
+ }
+
$value = substr($string, $value_begin, $value_end - $value_begin);
+ if ($value === false) $value = '';
$array[$key] = $this->parseData($value);
$cursor++;
// boolattr
if ($key !== '') {
$array[$key] = $key;
+ } else {
+ // purely theoretical
+ if ($e) $e->send(E_ERROR, 'Lexer: Missing attribute key');
}
}
}
-?>
\ No newline at end of file
}
-?>
\ No newline at end of file
}
-?>
\ No newline at end of file
require_once 'HTMLPurifier/Token.php';
require_once 'HTMLPurifier/Encoder.php';
+// OUT OF DATE, NEEDS UPDATING!
+
class HTMLPurifier_Printer
{
$this->generator = new HTMLPurifier_Generator();
}
+ /**
+ * Give generator necessary configuration if possible
+ */
+ function prepareGenerator($config) {
+ // hack for smoketests/configForm.php
+ if (empty($config->conf['HTML'])) return;
+ $context = new HTMLPurifier_Context();
+ $this->generator->generateFromTokens(array(), $config, $context);
+ }
+
/**
* Main function that renders object or aspect of that object
- * @param $config Configuration object
+ * @note Parameters vary depending on printer
*/
- function render($config) {}
+ // function render() {}
/**
* Returns a start tag
$this->end($tag);
}
+ function elementEmpty($tag, $attr = array()) {
+ return $this->generator->generateFromToken(
+ new HTMLPurifier_Token_Empty($tag, $attr)
+ );
+ }
+
+ function text($text) {
+ return $this->generator->generateFromToken(
+ new HTMLPurifier_Token_Text($text)
+ );
+ }
+
/**
* Prints a simple key/value row in a table.
* @param $name Key
}
-?>
\ No newline at end of file
}
-?>
\ No newline at end of file
--- /dev/null
+
+.hp-config {}
+
+.hp-config tbody th {text-align:right; padding-right:0.5em;}
+.hp-config thead, .hp-config .namespace {background:#3C578C; color:#FFF;}
+.hp-config .namespace th {text-align:center;}
+.hp-config .verbose {display:none;}
+.hp-config .controls {text-align:center;}
--- /dev/null
+function toggleWriteability(id_of_patient, checked) {
+ document.getElementById(id_of_patient).disabled = checked;
+}
\ No newline at end of file
--- /dev/null
+<?php
+
+require_once 'HTMLPurifier/Printer.php';
+
+class HTMLPurifier_Printer_ConfigForm extends HTMLPurifier_Printer
+{
+
+ /**
+ * Printers for specific fields
+ * @protected
+ */
+ var $fields = array();
+
+ /**
+ * Documentation URL, can have fragment tagged on end
+ * @protected
+ */
+ var $docURL;
+
+ /**
+ * Name of form element to stuff config in
+ * @protected
+ */
+ var $name;
+
+ /**
+ * Whether or not to compress directive names, clipping them off
+ * after a certain amount of letters
+ */
+ var $compress = false;
+
+ /**
+ * @param $name Form element name for directives to be stuffed into
+ * @param $doc_url String documentation URL, will have fragment tagged on
+ * @param $compress Integer max length before compressing a directive name, set to false to turn off
+ */
+ function HTMLPurifier_Printer_ConfigForm(
+ $name, $doc_url = null, $compress = false
+ ) {
+ parent::HTMLPurifier_Printer();
+ $this->docURL = $doc_url;
+ $this->name = $name;
+ $this->compress = $compress;
+ $this->fields['default'] = new HTMLPurifier_Printer_ConfigForm_default();
+ $this->fields['bool'] = new HTMLPurifier_Printer_ConfigForm_bool();
+ }
+
+ /**
+ * @param $cols Integer columns of textarea, null to use default
+ * @param $rows Integer rows of textarea, null to use default
+ */
+ function setTextareaDimensions($cols = null, $rows = null) {
+ if ($cols) $this->fields['default']->cols = $cols;
+ if ($rows) $this->fields['default']->rows = $rows;
+ }
+
+ /**
+ * Retrieves styling, in case the directory it's in is not publically
+ * available
+ */
+ function getCSS() {
+ return file_get_contents(HTMLPURIFIER_PREFIX . '/HTMLPurifier/Printer/ConfigForm.css');
+ }
+
+ /**
+ * Retrieves JavaScript, in case directory is not public
+ */
+ function getJavaScript() {
+ return file_get_contents(HTMLPURIFIER_PREFIX . '/HTMLPurifier/Printer/ConfigForm.js');
+ }
+
+ /**
+ * Returns HTML output for a configuration form
+ * @param $config Configuration object of current form state
+ * @param $allowed Optional namespace(s) and directives to restrict form to.
+ */
+ function render($config, $allowed = true, $render_controls = true) {
+ $this->config = $config;
+ $this->prepareGenerator($config);
+
+ $allowed = HTMLPurifier_Config::getAllowedDirectivesForForm($allowed);
+ $all = array();
+ foreach ($allowed as $key) {
+ list($ns, $directive) = $key;
+ $all[$ns][$directive] = $config->get($ns, $directive);
+ }
+
+ $ret = '';
+ $ret .= $this->start('table', array('class' => 'hp-config'));
+ $ret .= $this->start('thead');
+ $ret .= $this->start('tr');
+ $ret .= $this->element('th', 'Directive');
+ $ret .= $this->element('th', 'Value');
+ $ret .= $this->end('tr');
+ $ret .= $this->end('thead');
+ foreach ($all as $ns => $directives) {
+ $ret .= $this->renderNamespace($ns, $directives);
+ }
+ if ($render_controls) {
+ $ret .= $this->start('tfoot');
+ $ret .= $this->start('tr');
+ $ret .= $this->start('td', array('colspan' => 2, 'class' => 'controls'));
+ $ret .= $this->elementEmpty('input', array('type' => 'Submit', 'value' => 'Submit'));
+ $ret .= '[<a href="?">Reset</a>]';
+ $ret .= $this->end('td');
+ $ret .= $this->end('tr');
+ $ret .= $this->end('tfoot');
+ }
+ $ret .= $this->end('table');
+ return $ret;
+ }
+
+ /**
+ * Renders a single namespace
+ * @param $ns String namespace name
+ * @param $directive Associative array of directives to values
+ * @protected
+ */
+ function renderNamespace($ns, $directives) {
+ $ret = '';
+ $ret .= $this->start('tbody', array('class' => 'namespace'));
+ $ret .= $this->start('tr');
+ $ret .= $this->element('th', $ns, array('colspan' => 2));
+ $ret .= $this->end('tr');
+ $ret .= $this->end('tbody');
+ $ret .= $this->start('tbody');
+ foreach ($directives as $directive => $value) {
+ $ret .= $this->start('tr');
+ $ret .= $this->start('th');
+ if ($this->docURL) {
+ $url = str_replace('%s', urlencode("$ns.$directive"), $this->docURL);
+ $ret .= $this->start('a', array('href' => $url));
+ }
+ $attr = array('for' => "{$this->name}:$ns.$directive");
+
+ // crop directive name if it's too long
+ if (!$this->compress || (strlen($directive) < $this->compress)) {
+ $directive_disp = $directive;
+ } else {
+ $directive_disp = substr($directive, 0, $this->compress - 2) . '...';
+ $attr['title'] = $directive;
+ }
+
+ $ret .= $this->element(
+ 'label',
+ $directive_disp,
+ // component printers must create an element with this id
+ $attr
+ );
+ if ($this->docURL) $ret .= $this->end('a');
+ $ret .= $this->end('th');
+
+ $ret .= $this->start('td');
+ $def = $this->config->def->info[$ns][$directive];
+ $type = $def->type;
+ if (!isset($this->fields[$type])) $type = 'default';
+ $type_obj = $this->fields[$type];
+ if ($def->allow_null) {
+ $type_obj = new HTMLPurifier_Printer_ConfigForm_NullDecorator($type_obj);
+ }
+ $ret .= $type_obj->render($ns, $directive, $value, $this->name, $this->config);
+ $ret .= $this->end('td');
+ $ret .= $this->end('tr');
+ }
+ $ret .= $this->end('tbody');
+ return $ret;
+ }
+
+}
+
+/**
+ * Printer decorator for directives that accept null
+ */
+class HTMLPurifier_Printer_ConfigForm_NullDecorator extends HTMLPurifier_Printer {
+ /**
+ * Printer being decorated
+ */
+ var $obj;
+ /**
+ * @param $obj Printer to decorate
+ */
+ function HTMLPurifier_Printer_ConfigForm_NullDecorator($obj) {
+ parent::HTMLPurifier_Printer();
+ $this->obj = $obj;
+ }
+ function render($ns, $directive, $value, $name, $config) {
+ $this->prepareGenerator($config);
+ $ret = '';
+ $ret .= $this->start('label', array('for' => "$name:Null_$ns.$directive"));
+ $ret .= $this->element('span', "$ns.$directive:", array('class' => 'verbose'));
+ $ret .= $this->text(' Null/Disabled');
+ $ret .= $this->end('label');
+ $attr = array(
+ 'type' => 'checkbox',
+ 'value' => '1',
+ 'class' => 'null-toggle',
+ 'name' => "$name"."[Null_$ns.$directive]",
+ 'id' => "$name:Null_$ns.$directive",
+ 'onclick' => "toggleWriteability('$name:$ns.$directive',checked)" // INLINE JAVASCRIPT!!!!
+ );
+ if ($value === null) $attr['checked'] = 'checked';
+ $ret .= $this->elementEmpty('input', $attr);
+ $ret .= $this->text(' or ');
+ $ret .= $this->elementEmpty('br');
+ $ret .= $this->obj->render($ns, $directive, $value, $name, $config);
+ return $ret;
+ }
+}
+
+/**
+ * Swiss-army knife configuration form field printer
+ */
+class HTMLPurifier_Printer_ConfigForm_default extends HTMLPurifier_Printer {
+ var $cols = 18;
+ var $rows = 5;
+ function render($ns, $directive, $value, $name, $config) {
+ $this->prepareGenerator($config);
+ // this should probably be split up a little
+ $ret = '';
+ $def = $config->def->info[$ns][$directive];
+ if (is_array($value)) {
+ switch ($def->type) {
+ case 'lookup':
+ $array = $value;
+ $value = array();
+ foreach ($array as $val => $b) {
+ $value[] = $val;
+ }
+ case 'list':
+ $value = implode(PHP_EOL, $value);
+ break;
+ case 'hash':
+ $nvalue = '';
+ foreach ($value as $i => $v) {
+ $nvalue .= "$i:$v" . PHP_EOL;
+ }
+ $value = $nvalue;
+ break;
+ default:
+ $value = '';
+ }
+ }
+ if ($def->type === 'mixed') {
+ return 'Not supported';
+ $value = serialize($value);
+ }
+ $attr = array(
+ 'name' => "$name"."[$ns.$directive]",
+ 'id' => "$name:$ns.$directive"
+ );
+ if ($value === null) $attr['disabled'] = 'disabled';
+ if (is_array($def->allowed)) {
+ $ret .= $this->start('select', $attr);
+ foreach ($def->allowed as $val => $b) {
+ $attr = array();
+ if ($value == $val) $attr['selected'] = 'selected';
+ $ret .= $this->element('option', $val, $attr);
+ }
+ $ret .= $this->end('select');
+ } elseif (
+ $def->type == 'text' || $def->type == 'itext' ||
+ $def->type == 'list' || $def->type == 'hash' || $def->type == 'lookup'
+ ) {
+ $attr['cols'] = $this->cols;
+ $attr['rows'] = $this->rows;
+ $ret .= $this->start('textarea', $attr);
+ $ret .= $this->text($value);
+ $ret .= $this->end('textarea');
+ } else {
+ $attr['value'] = $value;
+ $attr['type'] = 'text';
+ $ret .= $this->elementEmpty('input', $attr);
+ }
+ return $ret;
+ }
+}
+
+/**
+ * Bool form field printer
+ */
+class HTMLPurifier_Printer_ConfigForm_bool extends HTMLPurifier_Printer {
+ function render($ns, $directive, $value, $name, $config) {
+ $this->prepareGenerator($config);
+ $ret = '';
+ $ret .= $this->start('div', array('id' => "$name:$ns.$directive"));
+
+ $ret .= $this->start('label', array('for' => "$name:Yes_$ns.$directive"));
+ $ret .= $this->element('span', "$ns.$directive:", array('class' => 'verbose'));
+ $ret .= $this->text(' Yes');
+ $ret .= $this->end('label');
+
+ $attr = array(
+ 'type' => 'radio',
+ 'name' => "$name"."[$ns.$directive]",
+ 'id' => "$name:Yes_$ns.$directive",
+ 'value' => '1'
+ );
+ if ($value) $attr['checked'] = 'checked';
+ $ret .= $this->elementEmpty('input', $attr);
+
+ $ret .= $this->start('label', array('for' => "$name:No_$ns.$directive"));
+ $ret .= $this->element('span', "$ns.$directive:", array('class' => 'verbose'));
+ $ret .= $this->text(' No');
+ $ret .= $this->end('label');
+
+ $attr = array(
+ 'type' => 'radio',
+ 'name' => "$name"."[$ns.$directive]",
+ 'id' => "$name:No_$ns.$directive",
+ 'value' => '0'
+ );
+ if (!$value) $attr['checked'] = 'checked';
+ $ret .= $this->elementEmpty('input', $attr);
+
+ $ret .= $this->end('div');
+
+ return $ret;
+ }
+}
+
$this->config =& $config;
$this->def = $config->getHTMLDefinition();
- $def =& $this->def;
$ret .= $this->start('div', array('class' => 'HTMLPurifier_Printer'));
+
+ $ret .= $this->renderDoctype();
+ $ret .= $this->renderEnvironment();
+ $ret .= $this->renderContentSets();
+ $ret .= $this->renderInfo();
+
+ $ret .= $this->end('div');
+
+ return $ret;
+ }
+
+ /**
+ * Renders the Doctype table
+ */
+ function renderDoctype() {
+ $doctype = $this->def->doctype;
+ $ret = '';
+ $ret .= $this->start('table');
+ $ret .= $this->element('caption', 'Doctype');
+ $ret .= $this->row('Name', $doctype->name);
+ $ret .= $this->row('XML', $doctype->xml ? 'Yes' : 'No');
+ $ret .= $this->row('Default Modules', implode($doctype->modules, ', '));
+ $ret .= $this->row('Default Tidy Modules', implode($doctype->tidyModules, ', '));
+ $ret .= $this->end('table');
+ return $ret;
+ }
+
+
+ /**
+ * Renders environment table, which is miscellaneous info
+ */
+ function renderEnvironment() {
+ $def = $this->def;
+
+ $ret = '';
+
$ret .= $this->start('table');
$ret .= $this->element('caption', 'Environment');
$ret .= $this->end('tr');
$ret .= $this->end('table');
-
-
- $ret .= $this->renderInfo();
-
-
- $ret .= $this->end('div');
-
+ return $ret;
+ }
+
+ /**
+ * Renders the Content Sets table
+ */
+ function renderContentSets() {
+ $ret = '';
+ $ret .= $this->start('table');
+ $ret .= $this->element('caption', 'Content Sets');
+ foreach ($this->def->info_content_sets as $name => $lookup) {
+ $ret .= $this->heavyHeader($name);
+ $ret .= $this->start('tr');
+ $ret .= $this->element('td', $this->listifyTagLookup($lookup));
+ $ret .= $this->end('tr');
+ }
return $ret;
}
$ret .= $this->start('table');
$ret .= $this->element('caption', 'Elements ($info)');
ksort($this->def->info);
- $ret .= $this->start('tr');
- $ret .= $this->element('th', 'Allowed tags', array('colspan' => 2, 'class' => 'heavy'));
- $ret .= $this->end('tr');
+ $ret .= $this->heavyHeader('Allowed tags', 2);
$ret .= $this->start('tr');
$ret .= $this->element('td', $this->listifyTagLookup($this->def->info), array('colspan' => 2));
$ret .= $this->end('tr');
foreach ($this->def->info as $name => $def) {
$ret .= $this->start('tr');
- $ret .= $this->element('th', "<$name>", array('class'=>'heavy', 'colspan' => 2));
+ $ret .= $this->element('th', "<$name>" . ($def->safe ? '' : ' (unsafe)'), array('class'=>'heavy' . ($def->safe ? '' : ' unsafe'), 'colspan' => 2));
$ret .= $this->end('tr');
$ret .= $this->start('tr');
$ret .= $this->element('th', 'Inline content');
}
$ret .= $this->start('tr');
$ret .= $this->element('th', 'Allowed attributes');
- $ret .= $this->element('td',$this->listifyAttr($def->attr),0,0);
+ $ret .= $this->element('td',$this->listifyAttr($def->attr), array(), 0);
$ret .= $this->end('tr');
+ if (!empty($def->required_attr)) {
+ $ret .= $this->row('Required attributes', $this->listify($def->required_attr));
+ }
+
$ret .= $this->renderChildren($def->child);
}
$ret .= $this->end('table');
'<em>Inline</em>: ' .
$this->escape($this->listifyTagLookup($def->inline->elements)),0,0);
+ } elseif ($def->type == 'custom') {
+
+ $ret .= $this->element('td', '<em>'.ucfirst($def->type).'</em>: ' .
+ $def->dtd_regex);
+
} else {
$ret .= $this->element('td',
'<em>'.ucfirst($def->type).'</em>: ' .
return $this->listify($list);
}
+ /**
+ * Creates a heavy header row
+ */
+ function heavyHeader($text, $num = 1) {
+ $ret = '';
+ $ret .= $this->start('tr');
+ $ret .= $this->element('th', $text, array('colspan' => $num, 'class' => 'heavy'));
+ $ret .= $this->end('tr');
+ return $ret;
+ }
+
}
-?>
\ No newline at end of file
}
-?>
\ No newline at end of file
}
-?>
\ No newline at end of file
}
-?>
\ No newline at end of file
$definition = $config->getHTMLDefinition();
// insert implicit "parent" node, will be removed at end.
- // ! we might want to move this to configuration
// DEFINITION CALL
$parent_name = $definition->info_parent;
array_unshift($tokens, new HTMLPurifier_Token_Start($parent_name));
$tokens[] = new HTMLPurifier_Token_End($parent_name);
- // setup the context variables
- $is_inline = false; // reference var that we alter
+ // setup the context variable 'IsInline', for chameleon processing
+ // is 'false' when we are not inline, 'true' when it must always
+ // be inline, and an integer when it is inline for a certain
+ // branch of the document tree
+ $is_inline = $definition->info_parent_def->descendants_are_inline;
$context->register('IsInline', $is_inline);
+ // setup error collector
+ $e =& $context->get('ErrorCollector', true);
+
//####################################################################//
// Loop initialization
$stack = array();
// stack that contains all elements that are excluded
- // same structure as $stack, but it is only populated when an element
- // with exclusions is processed, i.e. there won't be empty exclusions.
+ // it is organized by parent elements, similar to $stack,
+ // but it is only populated when an element with exclusions is
+ // processed, i.e. there won't be empty exclusions.
$exclude_stack = array();
+ // variable that contains the start token while we are processing
+ // nodes. This enables error reporting to do its job
+ $start_token = false;
+ $context->register('CurrentToken', $start_token);
+
//####################################################################//
// Loop
// $i is index of start token
// $j is index of end token
+ $start_token = $tokens[$i]; // to make token available via CurrentToken
+
//################################################################//
// Gather information on parent
$parent_def = $definition->info[$parent_name];
}
} else {
- // unknown info, it won't be used anyway
+ // processing as if the parent were the "root" node
+ // unknown info, it won't be used anyway, in the future,
+ // we may want to enforce one element only (this is
+ // necessary for HTML Purifier to clean entire documents
$parent_index = $parent_name = $parent_def = null;
}
} elseif($result === false) {
// remove entire node
+ if ($e) {
+ if ($excluded) {
+ $e->send(E_ERROR, 'Strategy_FixNesting: Node excluded');
+ } else {
+ $e->send(E_ERROR, 'Strategy_FixNesting: Node removed');
+ }
+ }
+
// calculate length of inner tokens and current tokens
$length = $j - $i + 1;
// current node is now the next possible start node
// unless it turns out that we need to do a double-check
+ // this is a rought heuristic that covers 100% of HTML's
+ // cases and 99% of all other cases. A child definition
+ // that would be tricked by this would be something like:
+ // ( | a b c) where it's all or nothing. Fortunately,
+ // our current implementation claims that that case would
+ // not allow empty, even if it did
if (!$parent_def->child->allow_empty) {
// we need to do a double-check
$i = $parent_index;
// calculate length of inner tokens
$length = $j - $i - 1;
+ if ($e) {
+ if (empty($result) && $length) {
+ $e->send(E_ERROR, 'Strategy_FixNesting: Node contents removed');
+ } else {
+ $e->send(E_WARNING, 'Strategy_FixNesting: Node reorganized');
+ }
+ }
+
// perform replacement
array_splice($tokens, $i + 1, $length, $result);
// remove context variables
$context->destroy('IsInline');
+ $context->destroy('CurrentToken');
//####################################################################//
// Return
}
-?>
+
require_once 'HTMLPurifier/HTMLDefinition.php';
require_once 'HTMLPurifier/Generator.php';
+require_once 'HTMLPurifier/Injector/AutoParagraph.php';
+require_once 'HTMLPurifier/Injector/Linkify.php';
+require_once 'HTMLPurifier/Injector/PurifierLinkify.php';
+
+HTMLPurifier_ConfigSchema::define(
+ 'AutoFormat', 'Custom', array(), 'list', '
+<p>
+ This directive can be used to add custom auto-format injectors.
+ Specify an array of injector names (class name minus the prefix)
+ or concrete implementations. Injector class must exist. This directive
+ has been available since 2.0.1.
+</p>
+'
+);
+
/**
* Takes tokens makes them well-formed (balance end tags, etc.)
*/
class HTMLPurifier_Strategy_MakeWellFormed extends HTMLPurifier_Strategy
{
+ /**
+ * Locally shared variable references
+ * @private
+ */
+ var $inputTokens, $inputIndex, $outputTokens, $currentNesting,
+ $currentInjector, $injectors;
+
function execute($tokens, $config, &$context) {
+
$definition = $config->getHTMLDefinition();
- $generator = new HTMLPurifier_Generator();
+
+ // CurrentNesting
+ $this->currentNesting = array();
+ $context->register('CurrentNesting', $this->currentNesting);
+
+ // InputIndex
+ $this->inputIndex = false;
+ $context->register('InputIndex', $this->inputIndex);
+
+ // InputTokens
+ $context->register('InputTokens', $tokens);
+ $this->inputTokens =& $tokens;
+
+ // OutputTokens
$result = array();
- $current_nesting = array();
+ $this->outputTokens =& $result;
+
+ // %Core.EscapeInvalidTags
$escape_invalid_tags = $config->get('Core', 'EscapeInvalidTags');
- foreach ($tokens as $token) {
- if (empty( $token->is_tag )) {
- $result[] = $token;
- continue;
+ $generator = new HTMLPurifier_Generator();
+
+ $e =& $context->get('ErrorCollector', true);
+
+ // -- begin INJECTOR --
+
+ $this->injectors = array();
+
+ $injectors = $config->getBatch('AutoFormat');
+ $custom_injectors = $injectors['Custom'];
+ unset($injectors['Custom']); // special case
+ foreach ($injectors as $injector => $b) {
+ $injector = "HTMLPurifier_Injector_$injector";
+ if (!$b) continue;
+ $this->injectors[] = new $injector;
+ }
+ foreach ($custom_injectors as $injector) {
+ if (is_string($injector)) {
+ $injector = "HTMLPurifier_Injector_$injector";
+ $injector = new $injector;
}
+ $this->injectors[] = $injector;
+ }
+
+ // array index of the injector that resulted in an array
+ // substitution. This enables processTokens() to know which
+ // injectors are affected by the added tokens and which are
+ // not (namely, the ones after the current injector are not
+ // affected)
+ $this->currentInjector = false;
+
+ // give the injectors references to the definition and context
+ // variables for performance reasons
+ foreach ($this->injectors as $i => $x) {
+ $error = $this->injectors[$i]->prepare($config, $context);
+ if (!$error) continue;
+ list($injector) = array_splice($this->injectors, $i, 1);
+ $name = $injector->name;
+ trigger_error("Cannot enable $name injector because $error is not allowed", E_USER_WARNING);
+ }
+
+ // -- end INJECTOR --
+
+ $token = false;
+ $context->register('CurrentToken', $token);
+
+ for ($this->inputIndex = 0; isset($tokens[$this->inputIndex]); $this->inputIndex++) {
- // DEFINITION CALL
- $info = $definition->info[$token->name]->child;
+ // if all goes well, this token will be passed through unharmed
+ $token = $tokens[$this->inputIndex];
- // test if it claims to be a start tag but is empty
- if ($info->type == 'empty' &&
- $token->type == 'start' ) {
-
- $result[] = new HTMLPurifier_Token_Empty($token->name,
- $token->attr);
- continue;
+ foreach ($this->injectors as $i => $x) {
+ if ($x->skip > 0) $this->injectors[$i]->skip--;
}
- // test if it claims to be empty but really is a start tag
- if ($info->type != 'empty' &&
- $token->type == 'empty' ) {
-
- $result[] = new HTMLPurifier_Token_Start($token->name,
- $token->attr);
- $result[] = new HTMLPurifier_Token_End($token->name);
-
+ // quick-check: if it's not a tag, no need to process
+ if (empty( $token->is_tag )) {
+ if ($token->type === 'text') {
+ // injector handler code; duplicated for performance reasons
+ foreach ($this->injectors as $i => $x) {
+ if (!$x->skip) $x->handleText($token);
+ if (is_array($token)) {
+ $this->currentInjector = $i;
+ break;
+ }
+ }
+ }
+ $this->processToken($token, $config, $context);
continue;
}
- // automatically insert empty tags
- if ($token->type == 'empty') {
- $result[] = $token;
- continue;
- }
+ $info = $definition->info[$token->name]->child;
- // we give start tags precedence, so automatically accept unless...
- // it's one of those special cases
- if ($token->type == 'start') {
+ // quick tag checks: anything that's *not* an end tag
+ $ok = false;
+ if ($info->type == 'empty' && $token->type == 'start') {
+ // test if it claims to be a start tag but is empty
+ $token = new HTMLPurifier_Token_Empty($token->name, $token->attr);
+ $ok = true;
+ } elseif ($info->type != 'empty' && $token->type == 'empty' ) {
+ // claims to be empty but really is a start tag
+ $token = array(
+ new HTMLPurifier_Token_Start($token->name, $token->attr),
+ new HTMLPurifier_Token_End($token->name)
+ );
+ $ok = true;
+ } elseif ($token->type == 'empty') {
+ // real empty token
+ $ok = true;
+ } elseif ($token->type == 'start') {
+ // start tag
- // if there's a parent, check for special case
- if (!empty($current_nesting)) {
+ // ...unless they also have to close their parent
+ if (!empty($this->currentNesting)) {
- $parent = array_pop($current_nesting);
- $parent_name = $parent->name;
- $parent_info = $definition->info[$parent_name];
+ $parent = array_pop($this->currentNesting);
+ $parent_info = $definition->info[$parent->name];
- if (isset($parent_info->auto_close[$token->name])) {
- $result[] = new HTMLPurifier_Token_End($parent_name);
+ // this can be replaced with a more general algorithm:
+ // if the token is not allowed by the parent, auto-close
+ // the parent
+ if (!isset($parent_info->child->elements[$token->name])) {
+ if ($e) $e->send(E_NOTICE, 'Strategy_MakeWellFormed: Tag auto closed', $parent);
+ // close the parent, then append the token
+ $result[] = new HTMLPurifier_Token_End($parent->name);
$result[] = $token;
- $current_nesting[] = $token;
+ $this->currentNesting[] = $token;
continue;
}
- $current_nesting[] = $parent; // undo the pop
+ $this->currentNesting[] = $parent; // undo the pop
}
-
- $result[] = $token;
- $current_nesting[] = $token;
+ $ok = true;
+ }
+
+ // injector handler code; duplicated for performance reasons
+ if ($ok) {
+ foreach ($this->injectors as $i => $x) {
+ if (!$x->skip) $x->handleElement($token);
+ if (is_array($token)) {
+ $this->currentInjector = $i;
+ break;
+ }
+ }
+ $this->processToken($token, $config, $context);
continue;
}
- // sanity check
+ // sanity check: we should be dealing with a closing tag
if ($token->type != 'end') continue;
- // okay, we're dealing with a closing tag
-
// make sure that we have something open
- if (empty($current_nesting)) {
+ if (empty($this->currentNesting)) {
if ($escape_invalid_tags) {
+ if ($e) $e->send(E_WARNING, 'Strategy_MakeWellFormed: Unnecessary end tag to text');
$result[] = new HTMLPurifier_Token_Text(
$generator->generateFromToken($token, $config, $context)
);
+ } elseif ($e) {
+ $e->send(E_WARNING, 'Strategy_MakeWellFormed: Unnecessary end tag removed');
}
continue;
}
// first, check for the simplest case: everything closes neatly
-
- // current_nesting is modified
- $current_parent = array_pop($current_nesting);
+ $current_parent = array_pop($this->currentNesting);
if ($current_parent->name == $token->name) {
$result[] = $token;
continue;
}
- // undo the array_pop
- $current_nesting[] = $current_parent;
-
// okay, so we're trying to close the wrong tag
- // scroll back the entire nest, trying to find our tag
- // feature could be to specify how far you'd like to go
- $size = count($current_nesting);
+ // undo the pop previous pop
+ $this->currentNesting[] = $current_parent;
+
+ // scroll back the entire nest, trying to find our tag.
+ // (feature could be to specify how far you'd like to go)
+ $size = count($this->currentNesting);
// -2 because -1 is the last element, but we already checked that
$skipped_tags = false;
for ($i = $size - 2; $i >= 0; $i--) {
- if ($current_nesting[$i]->name == $token->name) {
+ if ($this->currentNesting[$i]->name == $token->name) {
// current nesting is modified
- $skipped_tags = array_splice($current_nesting, $i);
+ $skipped_tags = array_splice($this->currentNesting, $i);
break;
}
}
- // we still didn't find the tag, so translate to text
+ // we still didn't find the tag, so remove
if ($skipped_tags === false) {
if ($escape_invalid_tags) {
$result[] = new HTMLPurifier_Token_Text(
$generator->generateFromToken($token, $config, $context)
);
+ if ($e) $e->send(E_WARNING, 'Strategy_MakeWellFormed: Stray end tag to text');
+ } elseif ($e) {
+ $e->send(E_WARNING, 'Strategy_MakeWellFormed: Stray end tag removed');
}
continue;
}
// okay, we found it, close all the skipped tags
// note that skipped tags contains the element we need closed
$size = count($skipped_tags);
- for ($i = $size - 1; $i >= 0; $i--) {
+ for ($i = $size - 1; $i > 0; $i--) {
+ if ($e && !isset($skipped_tags[$i]->armor['MakeWellFormed_TagClosedError'])) {
+ $e->send(E_NOTICE, 'Strategy_MakeWellFormed: Tag closed by element end', $skipped_tags[$i]);
+ }
$result[] = new HTMLPurifier_Token_End($skipped_tags[$i]->name);
}
- // done!
+ $result[] = new HTMLPurifier_Token_End($skipped_tags[$i]->name);
}
- // we're at the end now, fix all still unclosed tags
+ $context->destroy('CurrentNesting');
+ $context->destroy('InputTokens');
+ $context->destroy('InputIndex');
+ $context->destroy('CurrentToken');
- if (!empty($current_nesting)) {
- $size = count($current_nesting);
+ // we're at the end now, fix all still unclosed tags
+ // not using processToken() because at this point we don't
+ // care about current nesting
+ if (!empty($this->currentNesting)) {
+ $size = count($this->currentNesting);
for ($i = $size - 1; $i >= 0; $i--) {
+ if ($e && !isset($this->currentNesting[$i]->armor['MakeWellFormed_TagClosedError'])) {
+ $e->send(E_NOTICE, 'Strategy_MakeWellFormed: Tag closed by document end', $this->currentNesting[$i]);
+ }
$result[] =
- new HTMLPurifier_Token_End($current_nesting[$i]->name);
+ new HTMLPurifier_Token_End($this->currentNesting[$i]->name);
}
}
+ unset($this->outputTokens, $this->injectors, $this->currentInjector,
+ $this->currentNesting, $this->inputTokens, $this->inputIndex);
+
return $result;
}
+ function processToken($token, $config, &$context) {
+ if (is_array($token)) {
+ // the original token was overloaded by an injector, time
+ // to some fancy acrobatics
+
+ // $this->inputIndex is decremented so that the entire set gets
+ // re-processed
+ array_splice($this->inputTokens, $this->inputIndex--, 1, $token);
+
+ // adjust the injector skips based on the array substitution
+ if ($this->injectors) {
+ $offset = count($token) + 1;
+ for ($i = 0; $i <= $this->currentInjector; $i++) {
+ $this->injectors[$i]->skip += $offset;
+ }
+ }
+ } elseif ($token) {
+ // regular case
+ $this->outputTokens[] = $token;
+ if ($token->type == 'start') {
+ $this->currentNesting[] = $token;
+ } elseif ($token->type == 'end') {
+ array_pop($this->currentNesting); // not actually used
+ }
+ }
+ }
+
}
-?>
\ No newline at end of file
require_once 'HTMLPurifier/Generator.php';
require_once 'HTMLPurifier/TagTransform.php';
+require_once 'HTMLPurifier/AttrValidator.php';
+
+HTMLPurifier_ConfigSchema::define(
+ 'Core', 'RemoveInvalidImg', true, 'bool', '
+<p>
+ This directive enables pre-emptive URI checking in <code>img</code>
+ tags, as the attribute validation strategy is not authorized to
+ remove elements from the document. This directive has been available
+ since 1.3.0, revert to pre-1.3.0 behavior by setting to false.
+</p>
+'
+);
+
+HTMLPurifier_ConfigSchema::define(
+ 'Core', 'RemoveScriptContents', null, 'bool/null', '
+<p>
+ This directive enables HTML Purifier to remove not only script tags
+ but all of their contents. This directive has been deprecated since 2.1.0,
+ and when not set the value of %Core.HiddenElements will take
+ precedence. This directive has been available since 2.0.0, and can be used to
+ revert to pre-2.0.0 behavior by setting it to false.
+</p>
+'
+);
+
HTMLPurifier_ConfigSchema::define(
- 'Core', 'RemoveInvalidImg', true, 'bool',
- 'This directive enables pre-emptive URI checking in <code>img</code> '.
- 'tags, as the attribute validation strategy is not authorized to '.
- 'remove elements from the document. This directive has been available '.
- 'since 1.3.0, revert to pre-1.3.0 behavior by setting to false.'
+ 'Core', 'HiddenElements', array('script' => true, 'style' => true), 'lookup', '
+<p>
+ This directive is a lookup array of elements which should have their
+ contents removed when they are not allowed by the HTML definition.
+ For example, the contents of a <code>script</code> tag are not
+ normally shown in a document, so if script tags are to be removed,
+ their contents should be removed to. This is opposed to a <code>b</code>
+ tag, which defines some presentational changes but does not hide its
+ contents.
+</p>
+'
);
/**
$definition = $config->getHTMLDefinition();
$generator = new HTMLPurifier_Generator();
$result = array();
+
$escape_invalid_tags = $config->get('Core', 'EscapeInvalidTags');
$remove_invalid_img = $config->get('Core', 'RemoveInvalidImg');
+
+ $remove_script_contents = $config->get('Core', 'RemoveScriptContents');
+ $hidden_elements = $config->get('Core', 'HiddenElements');
+
+ // remove script contents compatibility
+ if ($remove_script_contents === true) {
+ $hidden_elements['script'] = true;
+ } elseif ($remove_script_contents === false && isset($hidden_elements['script'])) {
+ unset($hidden_elements['script']);
+ }
+
+ $attr_validator = new HTMLPurifier_AttrValidator();
+
+ // removes tokens until it reaches a closing tag with its value
+ $remove_until = false;
+
+ // converts comments into text tokens when this is equal to a tag name
+ $textify_comments = false;
+
+ $token = false;
+ $context->register('CurrentToken', $token);
+
+ $e = false;
+ if ($config->get('Core', 'CollectErrors')) {
+ $e =& $context->get('ErrorCollector');
+ }
+
foreach($tokens as $token) {
+ if ($remove_until) {
+ if (empty($token->is_tag) || $token->name !== $remove_until) {
+ continue;
+ }
+ }
if (!empty( $token->is_tag )) {
// DEFINITION CALL
- if (isset($definition->info[$token->name])) {
- // leave untouched, except for a few special cases:
-
- // hard-coded image special case, pre-emptively drop
- // if not available. Probably not abstract-able
- if ( $token->name == 'img' && $remove_invalid_img ) {
- if (!isset($token->attr['src'])) {
- continue;
- }
- if (!isset($definition->info['img']->attr['src'])) {
- continue;
- }
- $token->attr['src'] =
- $definition->
- info['img']->
- attr['src']->
- validate($token->attr['src'],
- $config, $context);
- if ($token->attr['src'] === false) continue;
- }
-
- } elseif (
+
+ // before any processing, try to transform the element
+ if (
isset($definition->info_tag_transform[$token->name])
) {
+ $original_name = $token->name;
// there is a transformation for this tag
// DEFINITION CALL
$token = $definition->
info_tag_transform[$token->name]->
transform($token, $config, $context);
+ if ($e) $e->send(E_NOTICE, 'Strategy_RemoveForeignElements: Tag transform', $original_name);
+ }
+
+ if (isset($definition->info[$token->name])) {
+
+ // mostly everything's good, but
+ // we need to make sure required attributes are in order
+ if (
+ $definition->info[$token->name]->required_attr &&
+ ($token->name != 'img' || $remove_invalid_img) // ensure config option still works
+ ) {
+ $attr_validator->validateToken($token, $config, $context);
+ $ok = true;
+ foreach ($definition->info[$token->name]->required_attr as $name) {
+ if (!isset($token->attr[$name])) {
+ $ok = false;
+ break;
+ }
+ }
+ if (!$ok) {
+ if ($e) $e->send(E_ERROR, 'Strategy_RemoveForeignElements: Missing required attribute', $name);
+ continue;
+ }
+ $token->armor['ValidateAttributes'] = true;
+ }
+
+ // CAN BE GENERICIZED
+ if (isset($hidden_elements[$token->name]) && $token->type == 'start') {
+ $textify_comments = $token->name;
+ } elseif ($token->name === $textify_comments && $token->type == 'end') {
+ $textify_comments = false;
+ }
+
} elseif ($escape_invalid_tags) {
- // invalid tag, generate HTML and insert in
+ // invalid tag, generate HTML representation and insert in
+ if ($e) $e->send(E_WARNING, 'Strategy_RemoveForeignElements: Foreign element to text');
$token = new HTMLPurifier_Token_Text(
$generator->generateFromToken($token, $config, $context)
);
} else {
+ // check if we need to destroy all of the tag's children
+ // CAN BE GENERICIZED
+ if (isset($hidden_elements[$token->name])) {
+ if ($token->type == 'start') {
+ $remove_until = $token->name;
+ } elseif ($token->type == 'empty') {
+ // do nothing: we're still looking
+ } else {
+ $remove_until = false;
+ }
+ if ($e) $e->send(E_ERROR, 'Strategy_RemoveForeignElements: Foreign meta element removed');
+ } else {
+ if ($e) $e->send(E_ERROR, 'Strategy_RemoveForeignElements: Foreign element removed');
+ }
continue;
}
} elseif ($token->type == 'comment') {
- // strip comments
- continue;
+ // textify comments in script tags when they are allowed
+ if ($textify_comments !== false) {
+ $data = $token->data;
+ $token = new HTMLPurifier_Token_Text($data);
+ } else {
+ // strip comments
+ if ($e) $e->send(E_NOTICE, 'Strategy_RemoveForeignElements: Comment removed');
+ continue;
+ }
} elseif ($token->type == 'text') {
} else {
continue;
}
$result[] = $token;
}
+ if ($remove_until && $e) {
+ // we removed tokens until the end, throw error
+ $e->send(E_ERROR, 'Strategy_RemoveForeignElements: Token removed to end', $remove_until);
+ }
+
+ $context->destroy('CurrentToken');
+
return $result;
}
}
-?>
\ No newline at end of file
require_once 'HTMLPurifier/HTMLDefinition.php';
require_once 'HTMLPurifier/IDAccumulator.php';
+require_once 'HTMLPurifier/AttrValidator.php';
+
HTMLPurifier_ConfigSchema::define(
'Attr', 'IDBlacklist', array(), 'list',
'Array of IDs not allowed in the document.');
function execute($tokens, $config, &$context) {
- $definition = $config->getHTMLDefinition();
-
// setup id_accumulator context
$id_accumulator = new HTMLPurifier_IDAccumulator();
$id_accumulator->load($config->get('Attr', 'IDBlacklist'));
$context->register('IDAccumulator', $id_accumulator);
- // create alias to global definition array, see also $defs
- // DEFINITION CALL
- $d_defs = $definition->info_global_attr;
+ // setup validator
+ $validator = new HTMLPurifier_AttrValidator();
+
+ $token = false;
+ $context->register('CurrentToken', $token);
foreach ($tokens as $key => $token) {
// namely start and empty tags
if ($token->type !== 'start' && $token->type !== 'empty') continue;
- // copy out attributes for easy manipulation
- $attr = $token->attr;
-
- // do global transformations (pre)
- // nothing currently utilizes this
- foreach ($definition->info_attr_transform_pre as $transform) {
- $attr = $transform->transform($attr, $config, $context);
- }
-
- // do local transformations only applicable to this element (pre)
- // ex. <p align="right"> to <p style="text-align:right;">
- foreach ($definition->info[$token->name]->attr_transform_pre
- as $transform
- ) {
- $attr = $transform->transform($attr, $config, $context);
- }
+ // skip tokens that are armored
+ if (!empty($token->armor['ValidateAttributes'])) continue;
- // create alias to this element's attribute definition array, see
- // also $d_defs (global attribute definition array)
- // DEFINITION CALL
- $defs = $definition->info[$token->name]->attr;
+ // note that we have no facilities here for removing tokens
+ $validator->validateToken($token, $config, $context);
- // iterate through all the attribute keypairs
- // Watch out for name collisions: $key has previously been used
- foreach ($attr as $attr_key => $value) {
-
- // call the definition
- if ( isset($defs[$attr_key]) ) {
- // there is a local definition defined
- if ($defs[$attr_key] === false) {
- // We've explicitly been told not to allow this element.
- // This is usually when there's a global definition
- // that must be overridden.
- // Theoretically speaking, we could have a
- // AttrDef_DenyAll, but this is faster!
- $result = false;
- } else {
- // validate according to the element's definition
- $result = $defs[$attr_key]->validate(
- $value, $config, $context
- );
- }
- } elseif ( isset($d_defs[$attr_key]) ) {
- // there is a global definition defined, validate according
- // to the global definition
- $result = $d_defs[$attr_key]->validate(
- $value, $config, $context
- );
- } else {
- // system never heard of the attribute? DELETE!
- $result = false;
- }
-
- // put the results into effect
- if ($result === false || $result === null) {
- // remove the attribute
- unset($attr[$attr_key]);
- } elseif (is_string($result)) {
- // simple substitution
- $attr[$attr_key] = $result;
- }
-
- // we'd also want slightly more complicated substitution
- // involving an array as the return value,
- // although we're not sure how colliding attributes would
- // resolve (certain ones would be completely overriden,
- // others would prepend themselves).
- }
-
- // post transforms
-
- // ex. <x lang="fr"> to <x lang="fr" xml:lang="fr">
- foreach ($definition->info_attr_transform_post as $transform) {
- $attr = $transform->transform($attr, $config, $context);
- }
-
- // ex. <bdo> to <bdo dir="ltr">
- foreach ($definition->info[$token->name]->attr_transform_post as $transform) {
- $attr = $transform->transform($attr, $config, $context);
- }
-
- // commit changes
- // could interfere with flyweight implementation
- $tokens[$key]->attr = $attr;
+ $tokens[$key] = $token; // for PHP 4
}
+
$context->destroy('IDAccumulator');
+ $context->destroy('CurrentToken');
return $tokens;
}
}
-?>
\ No newline at end of file
trigger_error('Call to abstract function', E_USER_ERROR);
}
+ /**
+ * Prepends CSS properties to the style attribute, creating the
+ * attribute if it doesn't exist.
+ * @warning Copied over from AttrTransform, be sure to keep in sync
+ * @param $attr Attribute array to process (passed by reference)
+ * @param $css CSS to prepend
+ */
+ function prependCSS(&$attr, $css) {
+ $attr['style'] = isset($attr['style']) ? $attr['style'] : '';
+ $attr['style'] = $css . $attr['style'];
+ }
+
}
-?>
\ No newline at end of file
+++ /dev/null
-<?php
-
-require_once 'HTMLPurifier/TagTransform.php';
-
-/**
- * Transforms CENTER tags into proper version (DIV with text-align CSS)
- *
- * Takes a CENTER tag, parses the align attribute, and then if it's valid
- * assigns it to the CSS property text-align.
- */
-class HTMLPurifier_TagTransform_Center extends HTMLPurifier_TagTransform
-{
- var $transform_to = 'div';
-
- function transform($tag, $config, &$context) {
- if ($tag->type == 'end') {
- $new_tag = new HTMLPurifier_Token_End($this->transform_to);
- return $new_tag;
- }
- $attr = $tag->attr;
- $prepend_css = 'text-align:center;';
- if (isset($attr['style'])) {
- $attr['style'] = $prepend_css . $attr['style'];
- } else {
- $attr['style'] = $prepend_css;
- }
- $new_tag = $tag->copy();
- $new_tag->name = $this->transform_to;
- $new_tag->attr = $attr;
- return $new_tag;
- }
-}
-
-?>
\ No newline at end of file
function transform($tag, $config, &$context) {
if ($tag->type == 'end') {
- $new_tag = new HTMLPurifier_Token_End($this->transform_to);
+ $new_tag = $tag->copy();
+ $new_tag->name = $this->transform_to;
return $new_tag;
}
}
}
-?>
\ No newline at end of file
require_once 'HTMLPurifier/TagTransform.php';
/**
- * Simple transformation, just change tag name to something else.
+ * Simple transformation, just change tag name to something else,
+ * and possibly add some styling. This will cover most of the deprecated
+ * tag cases.
*/
class HTMLPurifier_TagTransform_Simple extends HTMLPurifier_TagTransform
{
+ var $style;
+
/**
* @param $transform_to Tag name to transform to.
+ * @param $style CSS style to add to the tag
*/
- function HTMLPurifier_TagTransform_Simple($transform_to) {
+ function HTMLPurifier_TagTransform_Simple($transform_to, $style = null) {
$this->transform_to = $transform_to;
+ $this->style = $style;
}
function transform($tag, $config, &$context) {
$new_tag = $tag->copy();
$new_tag->name = $this->transform_to;
+ if (!is_null($this->style) &&
+ ($new_tag->type == 'start' || $new_tag->type == 'empty')
+ ) {
+ $this->prependCSS($new_tag->attr, $this->style);
+ }
return $new_tag;
}
}
-?>
\ No newline at end of file
*/
class HTMLPurifier_Token {
var $type; /**< Type of node to bypass <tt>is_a()</tt>. @public */
+ var $line; /**< Line number node was on in source document. Null if unknown. @public */
+
+ /**
+ * Lookup array of processing that this token is exempt from.
+ * Currently, valid values are "ValidateAttributes" and
+ * "MakeWellFormed_TagClosedError"
+ */
+ var $armor = array();
/**
* Copies the tag into a new one (clone substitute).
* @return Copied token
*/
function copy() {
- trigger_error('Cannot copy abstract class', E_USER_ERROR);
+ return unserialize(serialize($this));
}
}
* @param $name String name.
* @param $attr Associative array of attributes.
*/
- function HTMLPurifier_Token_Tag($name, $attr = array()) {
+ function HTMLPurifier_Token_Tag($name, $attr = array(), $line = null) {
$this->name = ctype_lower($name) ? $name : strtolower($name);
foreach ($attr as $key => $value) {
// normalization only necessary when key is not lowercase
}
}
$this->attr = $attr;
+ $this->line = $line;
}
}
class HTMLPurifier_Token_Start extends HTMLPurifier_Token_Tag
{
var $type = 'start';
- function copy() {
- return new HTMLPurifier_Token_Start($this->name, $this->attr);
- }
}
/**
class HTMLPurifier_Token_Empty extends HTMLPurifier_Token_Tag
{
var $type = 'empty';
- function copy() {
- return new HTMLPurifier_Token_Empty($this->name, $this->attr);
- }
}
/**
class HTMLPurifier_Token_End extends HTMLPurifier_Token_Tag
{
var $type = 'end';
- function copy() {
- return new HTMLPurifier_Token_End($this->name);
- }
}
/**
*
* @param $data String parsed character data.
*/
- function HTMLPurifier_Token_Text($data) {
+ function HTMLPurifier_Token_Text($data, $line = null) {
$this->data = $data;
$this->is_whitespace = ctype_space($data);
- }
- function copy() {
- return new HTMLPurifier_Token_Text($this->data);
+ $this->line = $line;
}
}
*
* @param $data String comment data.
*/
- function HTMLPurifier_Token_Comment($data) {
+ function HTMLPurifier_Token_Comment($data, $line = null) {
$this->data = $data;
- }
- function copy() {
- return new HTMLPurifier_Token_Comment($this->data);
+ $this->line = $line;
}
}
-?>
\ No newline at end of file
}
-?>
\ No newline at end of file
--- /dev/null
+<?php
+
+require_once 'HTMLPurifier/URIParser.php';
+require_once 'HTMLPurifier/URIFilter.php';
+
+/**
+ * HTML Purifier's internal representation of a URI
+ */
+class HTMLPurifier_URI
+{
+
+ var $scheme, $userinfo, $host, $port, $path, $query, $fragment;
+
+ /**
+ * @note Automatically normalizes scheme and port
+ */
+ function HTMLPurifier_URI($scheme, $userinfo, $host, $port, $path, $query, $fragment) {
+ $this->scheme = is_null($scheme) || ctype_lower($scheme) ? $scheme : strtolower($scheme);
+ $this->userinfo = $userinfo;
+ $this->host = $host;
+ $this->port = is_null($port) ? $port : (int) $port;
+ $this->path = $path;
+ $this->query = $query;
+ $this->fragment = $fragment;
+ }
+
+ /**
+ * Retrieves a scheme object corresponding to the URI's scheme/default
+ * @param $config Instance of HTMLPurifier_Config
+ * @param $context Instance of HTMLPurifier_Context
+ * @return Scheme object appropriate for validating this URI
+ */
+ function getSchemeObj($config, &$context) {
+ $registry =& HTMLPurifier_URISchemeRegistry::instance();
+ if ($this->scheme !== null) {
+ $scheme_obj = $registry->getScheme($this->scheme, $config, $context);
+ if (!$scheme_obj) return false; // invalid scheme, clean it out
+ } else {
+ // no scheme: retrieve the default one
+ $def = $config->getDefinition('URI');
+ $scheme_obj = $registry->getScheme($def->defaultScheme, $config, $context);
+ if (!$scheme_obj) {
+ // something funky happened to the default scheme object
+ trigger_error(
+ 'Default scheme object "' . $def->defaultScheme . '" was not readable',
+ E_USER_WARNING
+ );
+ return false;
+ }
+ }
+ return $scheme_obj;
+ }
+
+ /**
+ * Generic validation method applicable for all schemes
+ * @param $config Instance of HTMLPurifier_Config
+ * @param $context Instance of HTMLPurifier_Context
+ * @return True if validation/filtering succeeds, false if failure
+ */
+ function validate($config, &$context) {
+
+ // validate host
+ if (!is_null($this->host)) {
+ $host_def = new HTMLPurifier_AttrDef_URI_Host();
+ $this->host = $host_def->validate($this->host, $config, $context);
+ if ($this->host === false) $this->host = null;
+ }
+
+ // validate port
+ if (!is_null($this->port)) {
+ if ($this->port < 1 || $this->port > 65535) $this->port = null;
+ }
+
+ // query and fragment are quite simple in terms of definition:
+ // *( pchar / "/" / "?" ), so define their validation routines
+ // when we start fixing percent encoding
+
+ // path gets to be validated against a hodge-podge of rules depending
+ // on the status of authority and scheme, but it's not that important,
+ // esp. since it won't be applicable to everyone
+
+ return true;
+
+ }
+
+ /**
+ * Convert URI back to string
+ * @return String URI appropriate for output
+ */
+ function toString() {
+ // reconstruct authority
+ $authority = null;
+ if (!is_null($this->host)) {
+ $authority = '';
+ if(!is_null($this->userinfo)) $authority .= $this->userinfo . '@';
+ $authority .= $this->host;
+ if(!is_null($this->port)) $authority .= ':' . $this->port;
+ }
+
+ // reconstruct the result
+ $result = '';
+ if (!is_null($this->scheme)) $result .= $this->scheme . ':';
+ if (!is_null($authority)) $result .= '//' . $authority;
+ $result .= $this->path;
+ if (!is_null($this->query)) $result .= '?' . $this->query;
+ if (!is_null($this->fragment)) $result .= '#' . $this->fragment;
+
+ return $result;
+ }
+
+ /**
+ * Returns a copy of the URI object
+ */
+ function copy() {
+ return unserialize(serialize($this));
+ }
+
+}
+
--- /dev/null
+<?php
+
+require_once 'HTMLPurifier/Definition.php';
+require_once 'HTMLPurifier/URIFilter.php';
+require_once 'HTMLPurifier/URIParser.php';
+
+require_once 'HTMLPurifier/URIFilter/DisableExternal.php';
+require_once 'HTMLPurifier/URIFilter/DisableExternalResources.php';
+require_once 'HTMLPurifier/URIFilter/HostBlacklist.php';
+require_once 'HTMLPurifier/URIFilter/MakeAbsolute.php';
+
+HTMLPurifier_ConfigSchema::define(
+ 'URI', 'DefinitionID', null, 'string/null', '
+<p>
+ Unique identifier for a custom-built URI definition. If you want
+ to add custom URIFilters, you must specify this value.
+ This directive has been available since 2.1.0.
+</p>
+');
+
+HTMLPurifier_ConfigSchema::define(
+ 'URI', 'DefinitionRev', 1, 'int', '
+<p>
+ Revision identifier for your custom definition. See
+ %HTML.DefinitionRev for details. This directive has been available
+ since 2.1.0.
+</p>
+');
+
+// informative URI directives
+
+HTMLPurifier_ConfigSchema::define(
+ 'URI', 'DefaultScheme', 'http', 'string', '
+<p>
+ Defines through what scheme the output will be served, in order to
+ select the proper object validator when no scheme information is present.
+</p>
+');
+
+HTMLPurifier_ConfigSchema::define(
+ 'URI', 'Host', null, 'string/null', '
+<p>
+ Defines the domain name of the server, so we can determine whether or
+ an absolute URI is from your website or not. Not strictly necessary,
+ as users should be using relative URIs to reference resources on your
+ website. It will, however, let you use absolute URIs to link to
+ subdomains of the domain you post here: i.e. example.com will allow
+ sub.example.com. However, higher up domains will still be excluded:
+ if you set %URI.Host to sub.example.com, example.com will be blocked.
+ <strong>Note:</strong> This directive overrides %URI.Base because
+ a given page may be on a sub-domain, but you wish HTML Purifier to be
+ more relaxed and allow some of the parent domains too.
+ This directive has been available since 1.2.0.
+</p>
+');
+
+HTMLPurifier_ConfigSchema::define(
+ 'URI', 'Base', null, 'string/null', '
+<p>
+ The base URI is the URI of the document this purified HTML will be
+ inserted into. This information is important if HTML Purifier needs
+ to calculate absolute URIs from relative URIs, such as when %URI.MakeAbsolute
+ is on. You may use a non-absolute URI for this value, but behavior
+ may vary (%URI.MakeAbsolute deals nicely with both absolute and
+ relative paths, but forwards-compatibility is not guaranteed).
+ <strong>Warning:</strong> If set, the scheme on this URI
+ overrides the one specified by %URI.DefaultScheme. This directive has
+ been available since 2.1.0.
+</p>
+');
+
+class HTMLPurifier_URIDefinition extends HTMLPurifier_Definition
+{
+
+ var $type = 'URI';
+ var $filters = array();
+ var $registeredFilters = array();
+
+ /**
+ * HTMLPurifier_URI object of the base specified at %URI.Base
+ */
+ var $base;
+
+ /**
+ * String host to consider "home" base
+ */
+ var $host;
+
+ /**
+ * Name of default scheme based on %URI.DefaultScheme and %URI.Base
+ */
+ var $defaultScheme;
+
+ function HTMLPurifier_URIDefinition() {
+ $this->registerFilter(new HTMLPurifier_URIFilter_DisableExternal());
+ $this->registerFilter(new HTMLPurifier_URIFilter_DisableExternalResources());
+ $this->registerFilter(new HTMLPurifier_URIFilter_HostBlacklist());
+ $this->registerFilter(new HTMLPurifier_URIFilter_MakeAbsolute());
+ }
+
+ function registerFilter($filter) {
+ $this->registeredFilters[$filter->name] = $filter;
+ }
+
+ function addFilter($filter, $config) {
+ $filter->prepare($config);
+ $this->filters[$filter->name] = $filter;
+ }
+
+ function doSetup($config) {
+ $this->setupMemberVariables($config);
+ $this->setupFilters($config);
+ }
+
+ function setupFilters($config) {
+ foreach ($this->registeredFilters as $name => $filter) {
+ $conf = $config->get('URI', $name);
+ if ($conf !== false && $conf !== null) {
+ $this->addFilter($filter, $config);
+ }
+ }
+ unset($this->registeredFilters);
+ }
+
+ function setupMemberVariables($config) {
+ $this->host = $config->get('URI', 'Host');
+ $base_uri = $config->get('URI', 'Base');
+ if (!is_null($base_uri)) {
+ $parser = new HTMLPurifier_URIParser();
+ $this->base = $parser->parse($base_uri);
+ $this->defaultScheme = $this->base->scheme;
+ if (is_null($this->host)) $this->host = $this->base->host;
+ }
+ if (is_null($this->defaultScheme)) $this->defaultScheme = $config->get('URI', 'DefaultScheme');
+ }
+
+ function filter(&$uri, $config, &$context) {
+ foreach ($this->filters as $name => $x) {
+ $result = $this->filters[$name]->filter($uri, $config, $context);
+ if (!$result) return false;
+ }
+ return true;
+ }
+
+}
--- /dev/null
+<?php
+
+/**
+ * Chainable filters for custom URI processing
+ */
+class HTMLPurifier_URIFilter
+{
+ var $name;
+
+ /**
+ * Performs initialization for the filter
+ */
+ function prepare($config) {}
+
+ /**
+ * Filter a URI object
+ * @param &$uri Reference to URI object
+ * @param $config Instance of HTMLPurifier_Config
+ * @param &$context Instance of HTMLPurifier_Context
+ */
+ function filter(&$uri, $config, &$context) {
+ trigger_error('Cannot call abstract function', E_USER_ERROR);
+ }
+}
--- /dev/null
+<?php
+
+require_once 'HTMLPurifier/URIFilter.php';
+
+HTMLPurifier_ConfigSchema::define(
+ 'URI', 'DisableExternal', false, 'bool',
+ 'Disables links to external websites. This is a highly effective '.
+ 'anti-spam and anti-pagerank-leech measure, but comes at a hefty price: no'.
+ 'links or images outside of your domain will be allowed. Non-linkified '.
+ 'URIs will still be preserved. If you want to be able to link to '.
+ 'subdomains or use absolute URIs, specify %URI.Host for your website. '.
+ 'This directive has been available since 1.2.0.'
+);
+
+class HTMLPurifier_URIFilter_DisableExternal extends HTMLPurifier_URIFilter
+{
+ var $name = 'DisableExternal';
+ var $ourHostParts = false;
+ function prepare($config) {
+ $our_host = $config->get('URI', 'Host');
+ if ($our_host !== null) $this->ourHostParts = array_reverse(explode('.', $our_host));
+ }
+ function filter(&$uri, $config, &$context) {
+ if (is_null($uri->host)) return true;
+ if ($this->ourHostParts === false) return false;
+ $host_parts = array_reverse(explode('.', $uri->host));
+ foreach ($this->ourHostParts as $i => $x) {
+ if (!isset($host_parts[$i])) return false;
+ if ($host_parts[$i] != $this->ourHostParts[$i]) return false;
+ }
+ return true;
+ }
+}
+
--- /dev/null
+<?php
+
+require_once 'HTMLPurifier/URIFilter/DisableExternal.php';
+
+HTMLPurifier_ConfigSchema::define(
+ 'URI', 'DisableExternalResources', false, 'bool',
+ 'Disables the embedding of external resources, preventing users from '.
+ 'embedding things like images from other hosts. This prevents '.
+ 'access tracking (good for email viewers), bandwidth leeching, '.
+ 'cross-site request forging, goatse.cx posting, and '.
+ 'other nasties, but also results in '.
+ 'a loss of end-user functionality (they can\'t directly post a pic '.
+ 'they posted from Flickr anymore). Use it if you don\'t have a '.
+ 'robust user-content moderation team. This directive has been '.
+ 'available since 1.3.0.'
+);
+
+class HTMLPurifier_URIFilter_DisableExternalResources extends HTMLPurifier_URIFilter_DisableExternal
+{
+ var $name = 'DisableExternalResources';
+ function filter(&$uri, $config, &$context) {
+ if (!$context->get('EmbeddedURI', true)) return true;
+ return parent::filter($uri, $config, $context);
+ }
+}
+
--- /dev/null
+<?php
+
+require_once 'HTMLPurifier/URIFilter.php';
+
+HTMLPurifier_ConfigSchema::define(
+ 'URI', 'HostBlacklist', array(), 'list',
+ 'List of strings that are forbidden in the host of any URI. Use it to '.
+ 'kill domain names of spam, etc. Note that it will catch anything in '.
+ 'the domain, so <tt>moo.com</tt> will catch <tt>moo.com.example.com</tt>. '.
+ 'This directive has been available since 1.3.0.'
+);
+
+class HTMLPurifier_URIFilter_HostBlacklist extends HTMLPurifier_URIFilter
+{
+ var $name = 'HostBlacklist';
+ var $blacklist = array();
+ function prepare($config) {
+ $this->blacklist = $config->get('URI', 'HostBlacklist');
+ }
+ function filter(&$uri, $config, &$context) {
+ foreach($this->blacklist as $blacklisted_host_fragment) {
+ if (strpos($uri->host, $blacklisted_host_fragment) !== false) {
+ return false;
+ }
+ }
+ return true;
+ }
+}
--- /dev/null
+<?php
+
+// does not support network paths
+
+require_once 'HTMLPurifier/URIFilter.php';
+
+HTMLPurifier_ConfigSchema::define(
+ 'URI', 'MakeAbsolute', false, 'bool', '
+<p>
+ Converts all URIs into absolute forms. This is useful when the HTML
+ being filtered assumes a specific base path, but will actually be
+ viewed in a different context (and setting an alternate base URI is
+ not possible). %URI.Base must be set for this directive to work.
+ This directive has been available since 2.1.0.
+</p>
+');
+
+class HTMLPurifier_URIFilter_MakeAbsolute extends HTMLPurifier_URIFilter
+{
+ var $name = 'MakeAbsolute';
+ var $base;
+ var $basePathStack = array();
+ function prepare($config) {
+ $def = $config->getDefinition('URI');
+ $this->base = $def->base;
+ if (is_null($this->base)) {
+ trigger_error('URI.MakeAbsolute is being ignored due to lack of value for URI.Base configuration', E_USER_ERROR);
+ return;
+ }
+ $this->base->fragment = null; // fragment is invalid for base URI
+ $stack = explode('/', $this->base->path);
+ array_pop($stack); // discard last segment
+ $stack = $this->_collapseStack($stack); // do pre-parsing
+ $this->basePathStack = $stack;
+ }
+ function filter(&$uri, $config, &$context) {
+ if (is_null($this->base)) return true; // abort early
+ if (
+ $uri->path === '' && is_null($uri->scheme) &&
+ is_null($uri->host) && is_null($uri->query) && is_null($uri->fragment)
+ ) {
+ // reference to current document
+ $uri = $this->base->copy();
+ return true;
+ }
+ if (!is_null($uri->scheme)) {
+ // absolute URI already: don't change
+ if (!is_null($uri->host)) return true;
+ $scheme_obj = $uri->getSchemeObj($config, $context);
+ if (!$scheme_obj->hierarchical) {
+ // non-hierarchal URI with explicit scheme, don't change
+ return true;
+ }
+ // special case: had a scheme but always is hierarchical and had no authority
+ }
+ if (!is_null($uri->host)) {
+ // network path, don't bother
+ return true;
+ }
+ if ($uri->path === '') {
+ $uri->path = $this->base->path;
+ }elseif ($uri->path[0] !== '/') {
+ // relative path, needs more complicated processing
+ $stack = explode('/', $uri->path);
+ $new_stack = array_merge($this->basePathStack, $stack);
+ $new_stack = $this->_collapseStack($new_stack);
+ $uri->path = implode('/', $new_stack);
+ }
+ // re-combine
+ $uri->scheme = $this->base->scheme;
+ if (is_null($uri->userinfo)) $uri->userinfo = $this->base->userinfo;
+ if (is_null($uri->host)) $uri->host = $this->base->host;
+ if (is_null($uri->port)) $uri->port = $this->base->port;
+ return true;
+ }
+
+ /**
+ * Resolve dots and double-dots in a path stack
+ * @private
+ */
+ function _collapseStack($stack) {
+ $result = array();
+ for ($i = 0; isset($stack[$i]); $i++) {
+ $is_folder = false;
+ // absorb an internally duplicated slash
+ if ($stack[$i] == '' && $i && isset($stack[$i+1])) continue;
+ if ($stack[$i] == '..') {
+ if (!empty($result)) {
+ $segment = array_pop($result);
+ if ($segment === '' && empty($result)) {
+ // error case: attempted to back out too far:
+ // restore the leading slash
+ $result[] = '';
+ } elseif ($segment === '..') {
+ $result[] = '..'; // cannot remove .. with ..
+ }
+ } else {
+ // relative path, preserve the double-dots
+ $result[] = '..';
+ }
+ $is_folder = true;
+ continue;
+ }
+ if ($stack[$i] == '.') {
+ // silently absorb
+ $is_folder = true;
+ continue;
+ }
+ $result[] = $stack[$i];
+ }
+ if ($is_folder) $result[] = '';
+ return $result;
+ }
+}
+
--- /dev/null
+<?php
+
+require_once 'HTMLPurifier/URI.php';
+
+/**
+ * Parses a URI into the components and fragment identifier as specified
+ * by RFC 2396.
+ * @todo Replace regexps with a native PHP parser
+ */
+class HTMLPurifier_URIParser
+{
+
+ /**
+ * Parses a URI
+ * @param $uri string URI to parse
+ * @return HTMLPurifier_URI representation of URI
+ */
+ function parse($uri) {
+ $r_URI = '!'.
+ '(([^:/?#<>\'"]+):)?'. // 2. Scheme
+ '(//([^/?#<>\'"]*))?'. // 4. Authority
+ '([^?#<>\'"]*)'. // 5. Path
+ '(\?([^#<>\'"]*))?'. // 7. Query
+ '(#([^<>\'"]*))?'. // 8. Fragment
+ '!';
+
+ $matches = array();
+ $result = preg_match($r_URI, $uri, $matches);
+
+ if (!$result) return false; // *really* invalid URI
+
+ // seperate out parts
+ $scheme = !empty($matches[1]) ? $matches[2] : null;
+ $authority = !empty($matches[3]) ? $matches[4] : null;
+ $path = $matches[5]; // always present, can be empty
+ $query = !empty($matches[6]) ? $matches[7] : null;
+ $fragment = !empty($matches[8]) ? $matches[9] : null;
+
+ // further parse authority
+ if ($authority !== null) {
+ // ridiculously inefficient: it's a stacked regex!
+ $HEXDIG = '[A-Fa-f0-9]';
+ $unreserved = 'A-Za-z0-9-._~'; // make sure you wrap with []
+ $sub_delims = '!$&\'()'; // needs []
+ $pct_encoded = "%$HEXDIG$HEXDIG";
+ $r_userinfo = "(?:[$unreserved$sub_delims:]|$pct_encoded)*";
+ $r_authority = "/^(($r_userinfo)@)?(\[[^\]]+\]|[^:]*)(:(\d*))?/";
+ $matches = array();
+ preg_match($r_authority, $authority, $matches);
+ $userinfo = !empty($matches[1]) ? $matches[2] : null;
+ $host = !empty($matches[3]) ? $matches[3] : '';
+ $port = !empty($matches[4]) ? (int) $matches[5] : null;
+ } else {
+ $port = $host = $userinfo = null;
+ }
+
+ return new HTMLPurifier_URI(
+ $scheme, $userinfo, $host, $port, $path, $query, $fragment);
+ }
+
+}
+
*/
var $browsable = false;
+ /**
+ * Whether or not the URI always uses <hier_part>, resolves edge cases
+ * with making relative URIs absolute
+ */
+ var $hierarchical = false;
+
/**
* Validates the components of a URI
* @note This implementation should be called by children if they define
* a default port, as it does port processing.
- * @note Fragment is omitted as that is scheme independent
- * @param $userinfo User info found before at sign in authority
- * @param $host Hostname in authority
- * @param $port Port found after colon in authority
- * @param $path Path of URI
- * @param $query Query of URI, found after question mark
+ * @param $uri Instance of HTMLPurifier_URI
* @param $config HTMLPurifier_Config object
* @param $context HTMLPurifier_Context object
+ * @return Bool success or failure
*/
- function validateComponents(
- $userinfo, $host, $port, $path, $query, $config, &$context
- ) {
- if ($this->default_port == $port) $port = null;
- return array($userinfo, $host, $port, $path, $query);
+ function validate(&$uri, $config, &$context) {
+ if ($this->default_port == $uri->port) $uri->port = null;
+ return true;
}
}
-?>
\ No newline at end of file
var $default_port = 21;
var $browsable = true; // usually
+ var $hierarchical = true;
- function validateComponents(
- $userinfo, $host, $port, $path, $query, $config, &$context
- ) {
- list($userinfo, $host, $port, $path, $query) =
- parent::validateComponents(
- $userinfo, $host, $port, $path, $query, $config, $context );
- $semicolon_pos = strrpos($path, ';'); // reverse
+ function validate(&$uri, $config, &$context) {
+ parent::validate($uri, $config, $context);
+ $uri->query = null;
+
+ // typecode check
+ $semicolon_pos = strrpos($uri->path, ';'); // reverse
if ($semicolon_pos !== false) {
- // typecode check
- $type = substr($path, $semicolon_pos + 1); // no semicolon
- $path = substr($path, 0, $semicolon_pos);
+ $type = substr($uri->path, $semicolon_pos + 1); // no semicolon
+ $uri->path = substr($uri->path, 0, $semicolon_pos);
$type_ret = '';
if (strpos($type, '=') !== false) {
// figure out whether or not the declaration is correct
list($key, $typecode) = explode('=', $type, 2);
if ($key !== 'type') {
// invalid key, tack it back on encoded
- $path .= '%3B' . $type;
+ $uri->path .= '%3B' . $type;
} elseif ($typecode === 'a' || $typecode === 'i' || $typecode === 'd') {
$type_ret = ";type=$typecode";
}
} else {
- $path .= '%3B' . $type;
+ $uri->path .= '%3B' . $type;
}
- $path = str_replace(';', '%3B', $path);
- $path .= $type_ret;
+ $uri->path = str_replace(';', '%3B', $uri->path);
+ $uri->path .= $type_ret;
}
- return array($userinfo, $host, $port, $path, null);
+
+ return true;
}
}
-?>
\ No newline at end of file
var $default_port = 80;
var $browsable = true;
+ var $hierarchical = true;
- function validateComponents(
- $userinfo, $host, $port, $path, $query, $config, &$context
- ) {
- list($userinfo, $host, $port, $path, $query) =
- parent::validateComponents(
- $userinfo, $host, $port, $path, $query, $config, $context );
- return array(null, $host, $port, $path, $query);
+ function validate(&$uri, $config, &$context) {
+ parent::validate($uri, $config, $context);
+ $uri->userinfo = null;
+ return true;
}
}
-?>
\ No newline at end of file
}
-?>
\ No newline at end of file
var $browsable = false;
- function validateComponents(
- $userinfo, $host, $port, $path, $query, $config, &$context
- ) {
- list($userinfo, $host, $port, $path, $query) =
- parent::validateComponents(
- $userinfo, $host, $port, $path, $query, $config, $context );
+ function validate(&$uri, $config, &$context) {
+ parent::validate($uri, $config, $context);
+ $uri->userinfo = null;
+ $uri->host = null;
+ $uri->port = null;
// we need to validate path against RFC 2368's addr-spec
- return array(null, null, null, $path, $query);
+ return true;
}
}
-?>
\ No newline at end of file
var $browsable = false;
- function validateComponents(
- $userinfo, $host, $port, $path, $query, $config, &$context
- ) {
- list($userinfo, $host, $port, $path, $query) =
- parent::validateComponents(
- $userinfo, $host, $port, $path, $query, $config, $context );
+ function validate(&$uri, $config, &$context) {
+ parent::validate($uri, $config, $context);
+ $uri->userinfo = null;
+ $uri->host = null;
+ $uri->port = null;
+ $uri->query = null;
// typecode check needed on path
- return array(null, null, null, $path, null);
+ return true;
}
}
-?>
\ No newline at end of file
var $default_port = 119;
var $browsable = false;
- function validateComponents(
- $userinfo, $host, $port, $path, $query, $config, &$context
- ) {
- list($userinfo, $host, $port, $path, $query) =
- parent::validateComponents(
- $userinfo, $host, $port, $path, $query, $config, $context );
- return array(null, $host, $port, $path, null);
+ function validate(&$uri, $config, &$context) {
+ parent::validate($uri, $config, $context);
+ $uri->userinfo = null;
+ $uri->query = null;
+ return true;
}
}
-?>
\ No newline at end of file
}
if (isset($this->schemes[$scheme])) return $this->schemes[$scheme];
- if (empty($this->_dir)) $this->_dir = dirname(__FILE__) . '/URIScheme/';
+ if (empty($this->_dir)) $this->_dir = HTMLPURIFIER_PREFIX . '/HTMLPurifier/URIScheme/';
if (!isset($allowed_schemes[$scheme])) return $null;
- @include_once $this->_dir . $scheme . '.php';
+ // this bit of reflection is not very efficient, and a bit
+ // hacky too
$class = 'HTMLPurifier_URIScheme_' . $scheme;
+ if (!class_exists($class)) include_once $this->_dir . $scheme . '.php';
if (!class_exists($class)) return $null;
$this->schemes[$scheme] = new $class();
return $this->schemes[$scheme];
}
-?>
+
-Description of HTML Purifier v1.6.1 library import into Moodle
+Description of HTML Purifier v2.1.1 Lite library import into Moodle
Changes:
- * Text.php - added nolink, tex and algebra tags
+ * Text.php - added nolink, tex, lang and algebra tags
skodak
static $purifier = false;
if (!$purifier) {
+ make_upload_directory('cache/htmlpurifier', false);
require_once $CFG->libdir.'/htmlpurifier/HTMLPurifier.auto.php';
$config = HTMLPurifier_Config::createDefault();
$config->set('Core', 'AcceptFullDocuments', false);
- //$config->set('HTML', 'Strict', true);
+ $config->set('Core', 'Encoding', 'UTF-8');
+ $config->set('HTML', 'Doctype', 'XHTML 1.0 Transitional');
+ $config->set('Cache', 'SerializerPath', $CFG->dataroot.'/cache/htmlpurifier');
$config->set('URI', 'AllowedSchemes', array('http'=>1, 'https'=>1, 'ftp'=>1, 'irc'=>1, 'nntp'=>1, 'news'=>1, 'rtsp'=>1, 'teamspeak'=>1, 'gopher'=>1, 'mms'=>1));
$purifier = new HTMLPurifier($config);
}