*/
/*
- HTML Purifier 2.1.3 - Standards Compliant HTML Filtering
+ HTML Purifier 2.1.4 - Standards Compliant HTML Filtering
Copyright (C) 2006-2007 Edward Z. Yang
This library is free software; you can redistribute it and/or
class HTMLPurifier
{
- var $version = '2.1.3';
+ var $version = '2.1.4';
var $config;
var $filters = array();
* @param $prototype Optional prototype HTMLPurifier instance to
* overload singleton with.
*/
- function &getInstance($prototype = null) {
+ function &instance($prototype = null) {
static $htmlpurifier;
if (!$htmlpurifier || $prototype) {
if (is_a($prototype, 'HTMLPurifier')) {
return $htmlpurifier;
}
+ function &getInstance($prototype = null) {
+ return HTMLPurifier::instance($prototype);
+ }
}
return $this;
}
+ /**
+ * Removes spaces from rgb(0, 0, 0) so that shorthand CSS properties work
+ * properly. THIS IS A HACK!
+ */
+ function mungeRgb($string) {
+ return preg_replace('/rgb\((\d+)\s*,\s*(\d+)\s*,\s*(\d+)\)/', 'rgb(\1,\2,\3)', $string);
+ }
+
}
list($property, $value) = explode(':', $declaration, 2);
$property = trim($property);
$value = trim($value);
- if (!isset($definition->info[$property])) continue;
+ $ok = false;
+ do {
+ if (isset($definition->info[$property])) {
+ $ok = true;
+ break;
+ }
+ if (ctype_lower($property)) break;
+ $property = strtolower($property);
+ if (isset($definition->info[$property])) {
+ $ok = true;
+ break;
+ }
+ } while(0);
+ if (!$ok) continue;
// inefficient call, since the validator will do this again
if (strtolower(trim($value)) !== 'inherit') {
// inherit works for everything (but only on the base property)
$string = $this->parseCDATA($string);
if ($string === '') return false;
+ // munge rgb() decl if necessary
+ $string = $this->mungeRgb($string);
+
// assumes URI doesn't have spaces in it
$bits = explode(' ', strtolower($string)); // bits to process
function validate($string, $config, &$context) {
$string = $this->parseCDATA($string);
- // we specifically will not support rgb() syntax with spaces
+ $string = $this->mungeRgb($string);
$bits = explode(' ', $string);
$done = array(); // segments we've finished
$ret = ''; // return value
if ($colors === null) $colors = $config->get('Core', 'ColorKeywords');
$color = trim($color);
- if (!$color) return false;
+ if ($color === '') return false;
$lower = strtolower($color);
if (isset($colors[$lower])) return $colors[$lower];
- if ($color[0] === '#') {
- // hexadecimal handling
- $hex = substr($color, 1);
- $length = strlen($hex);
- if ($length !== 3 && $length !== 6) return false;
- if (!ctype_xdigit($hex)) return false;
- } else {
+ if (strpos($color, 'rgb(') !== false) {
// rgb literal handling
- if (strpos($color, 'rgb(')) return false;
$length = strlen($color);
if (strpos($color, ')') !== $length - 1) return false;
$triad = substr($color, 4, $length - 4 - 1);
}
$new_triad = implode(',', $new_parts);
$color = "rgb($new_triad)";
+ } else {
+ // hexadecimal handling
+ if ($color[0] === '#') {
+ $hex = substr($color, 1);
+ } else {
+ $hex = $color;
+ $color = '#' . $color;
+ }
+ $length = strlen($hex);
+ if ($length !== 3 && $length !== 6) return false;
+ if (!ctype_xdigit($hex)) return false;
}
return $color;
--- /dev/null
+<?php
+
+/**
+ * Decorator which enables CSS properties to be disabled for specific elements.
+ */
+class HTMLPurifier_AttrDef_CSS_DenyElementDecorator extends HTMLPurifier_AttrDef
+{
+ var $def, $element;
+
+ /**
+ * @param $def Definition to wrap
+ * @param $element Element to deny
+ */
+ function HTMLPurifier_AttrDef_CSS_DenyElementDecorator(&$def, $element) {
+ $this->def =& $def;
+ $this->element = $element;
+ }
+ /**
+ * Checks if CurrentToken is set and equal to $this->element
+ */
+ function validate($string, $config, $context) {
+ $token = $context->get('CurrentToken', true);
+ if ($token && $token->name == $this->element) return false;
+ return $this->def->validate($string, $config, $context);
+ }
+}
class HTMLPurifier_AttrDef_URI extends HTMLPurifier_AttrDef
{
- var $parser, $percentEncoder;
+ var $parser;
var $embedsResource;
/**
*/
function HTMLPurifier_AttrDef_URI($embeds_resource = false) {
$this->parser = new HTMLPurifier_URIParser();
- $this->percentEncoder = new HTMLPurifier_PercentEncoder();
$this->embedsResource = (bool) $embeds_resource;
}
if ($config->get('URI', 'Disable')) return false;
- // initial operations
$uri = $this->parseCDATA($uri);
- $uri = $this->percentEncoder->normalize($uri);
// parse the URI
$uri = $this->parser->parse($uri);
$context->destroy('EmbeddedURI');
if (!$ok) return false;
- // munge scheme off if necessary (this must be last)
- if (!is_null($uri->scheme) && is_null($uri->host)) {
- if ($uri_def->defaultScheme == $uri->scheme) {
- $uri->scheme = null;
- }
- }
-
// back to string
$result = $uri->toString();
$ipv4 = $this->ipv4->validate($string, $config, $context);
if ($ipv4 !== false) return $ipv4;
- // validate a domain name here, do filtering, etc etc etc
+ // A regular domain name.
- // We could use this, but it would break I18N domain names
- //$match = preg_match('/^[a-z0-9][\w\-\.]*[a-z0-9]$/i', $string);
- //if (!$match) return false;
+ // This breaks I18N domain names, but we don't have proper IRI support,
+ // so force users to insert Punycode. If there's complaining we'll
+ // try to fix things into an international friendly form.
+
+ // The productions describing this are:
+ $a = '[a-z]'; // alpha
+ $an = '[a-z0-9]'; // alphanum
+ $and = '[a-z0-9-]'; // alphanum | "-"
+ // domainlabel = alphanum | alphanum *( alphanum | "-" ) alphanum
+ $domainlabel = "$an($and*$an)?";
+ // toplabel = alpha | alpha *( alphanum | "-" ) alphanum
+ $toplabel = "$a($and*$an)?";
+ // hostname = *( domainlabel "." ) toplabel [ "." ]
+ $match = preg_match("/^($domainlabel\.)*$toplabel\.?$/i", $string);
+ if (!$match) return false;
return $string;
}
require_once 'HTMLPurifier/AttrDef/CSS/Border.php';
require_once 'HTMLPurifier/AttrDef/CSS/Color.php';
require_once 'HTMLPurifier/AttrDef/CSS/Composite.php';
+require_once 'HTMLPurifier/AttrDef/CSS/DenyElementDecorator.php';
require_once 'HTMLPurifier/AttrDef/CSS/Font.php';
require_once 'HTMLPurifier/AttrDef/CSS/FontFamily.php';
require_once 'HTMLPurifier/AttrDef/CSS/Length.php';
));
$this->info['width'] =
- $this->info['height'] =
+ $this->info['height'] =
+ new HTMLPurifier_AttrDef_CSS_DenyElementDecorator(
new HTMLPurifier_AttrDef_CSS_Composite(array(
new HTMLPurifier_AttrDef_CSS_Length(true),
new HTMLPurifier_AttrDef_CSS_Percentage(true),
new HTMLPurifier_AttrDef_Enum(array('auto'))
- ));
+ )), 'img');
$this->info['text-decoration'] = new HTMLPurifier_AttrDef_CSS_TextDecoration();
/**
* HTML Purifier's version
*/
- var $version = '2.1.3';
+ var $version = '2.1.4';
/**
* Two-level associative array of configuration directives
/**
* Clears all expired (older version or revision) objects from cache
+ * @note Be carefuly implementing this method as flush. Flush must
+ * not interfere with other Definition types, and cleanup()
+ * should not be repeatedly called by userland code.
*/
function cleanup($config) {
trigger_error('Cannot call abstract method', E_USER_ERROR);
<?php
require_once 'HTMLPurifier/DefinitionCache.php';
+require_once 'HTMLPurifier/DefinitionCache/Serializer.php';
HTMLPurifier_ConfigSchema::define(
'Cache', 'DefinitionImpl', 'Serializer', 'string/null', '
performance degradation). This directive has been available since 2.0.0.
');
-HTMLPurifier_ConfigSchema::defineAllowedValues(
- 'Cache', 'DefinitionImpl', array('Serializer')
-);
-
HTMLPurifier_ConfigSchema::defineAlias(
'Core', 'DefinitionCache',
'Cache', 'DefinitionImpl'
{
var $caches = array('Serializer' => array());
+ var $implementations = array();
var $decorators = array();
/**
return $instance;
}
+ /**
+ * Registers a new definition cache object
+ * @param $short Short name of cache object, for reference
+ * @param $long Full class name of cache object, for construction
+ */
+ function register($short, $long) {
+ $this->implementations[$short] = $long;
+ }
+
/**
* Factory method that creates a cache object based on configuration
* @param $name Name of definitions handled by cache
* @param $config Instance of HTMLPurifier_Config
*/
function &create($type, $config) {
- // only one implementation as for right now, $config will
- // be used to determine implementation
$method = $config->get('Cache', 'DefinitionImpl');
if ($method === null) {
$null = new HTMLPurifier_DefinitionCache_Null($type);
if (!empty($this->caches[$method][$type])) {
return $this->caches[$method][$type];
}
- $cache = new HTMLPurifier_DefinitionCache_Serializer($type);
+ if (
+ isset($this->implementations[$method]) &&
+ class_exists($class = $this->implementations[$method])
+ ) {
+ $cache = new $class($type);
+ } else {
+ if ($method != 'Serializer') {
+ trigger_error("Unrecognized DefinitionCache $method, using Serializer instead", E_USER_WARNING);
+ }
+ $cache = new HTMLPurifier_DefinitionCache_Serializer($type);
+ }
foreach ($this->decorators as $decorator) {
$new_cache = $decorator->decorate($cache);
// prevent infinite recursion in PHP 4
/**
* List of the names of required attributes this element has. Dynamically
- * populated.
+ * populated by HTMLPurifier_HTMLDefinition::getElement
* @public
*/
var $required_attr = array();
trigger_error('Cannot instantiate encoder, call methods statically', E_USER_ERROR);
}
+ /**
+ * Error-handler that mutes errors, alternative to shut-up operator.
+ */
+ function muteErrorHandler() {}
+
/**
* Cleans a UTF-8 string for well-formedness and SGML validity
*
static $iconv = null;
if ($iconv === null) $iconv = function_exists('iconv');
+ // UTF-8 validity is checked since PHP 4.3.5
+ // This is an optimization: if the string is already valid UTF-8, no
+ // need to do iconv/php stuff. 99% of the time, this will be the case.
+ if (preg_match('/^.{1}/us', $str)) {
+ return strtr($str, $non_sgml_chars);
+ }
+
if ($iconv && !$force_php) {
// do the shortcut way
- $str = @iconv('UTF-8', 'UTF-8//IGNORE', $str);
+ set_error_handler(array('HTMLPurifier_Encoder', 'muteErrorHandler'));
+ $str = iconv('UTF-8', 'UTF-8//IGNORE', $str);
+ restore_error_handler();
return strtr($str, $non_sgml_chars);
}
/**
* Adds a custom attribute to a pre-existing element
+ * @note This is strictly convenience, and does not have a corresponding
+ * method in HTMLPurifier_HTMLModule
* @param $element_name String element name to add attribute to
* @param $attr_name String name of attribute
* @param $def Attribute definition, can be string or object, see
*/
function addAttribute($element_name, $attr_name, $def) {
$module =& $this->getAnonymousModule();
- $element =& $module->addBlankElement($element_name);
+ if (!isset($module->info[$element_name])) {
+ $element =& $module->addBlankElement($element_name);
+ } else {
+ $element =& $module->info[$element_name];
+ }
$element->attr[$attr_name] = $def;
}
* @static
*/
function build($config, &$context) {
- $id_accumulator = new HTMLPurifier_IDAccumulator();
- $id_accumulator->load($config->get('Attr', 'IDBlacklist'));
- return $id_accumulator;
+ $acc = new HTMLPurifier_IDAccumulator();
+ $acc->load($config->get('Attr', 'IDBlacklist'));
+ return $acc;
}
/**
*/
var $errorNames = array();
+ /**
+ * True if no message file was found for this language, so English
+ * is being used instead. Check this if you'd like to notify the
+ * user that they've used a non-supported language.
+ */
+ var $error = false;
+
/**
* Has the language object been loaded yet?
* @private
--- /dev/null
+<?php
+
+// private language message file for unit testing purposes
+// this language file has no class associated with it
+
+$fallback = 'en';
+
+$messages = array(
+ 'HTMLPurifier' => 'HTML Purifier XNone'
+);
+
* caching and fallbacks.
* @note Thanks to MediaWiki for the general logic, although this version
* has been entirely rewritten
+ * @todo Serialized cache for languages
*/
class HTMLPurifier_LanguageFactory
{
* Creates a language object, handles class fallbacks
* @param $config Instance of HTMLPurifier_Config
* @param $context Instance of HTMLPurifier_Context
+ * @param $code Code to override configuration with. Private parameter.
*/
- function create($config, &$context) {
+ function create($config, &$context, $code = false) {
// validate language code
- $code = $this->validator->validate(
- $config->get('Core', 'Language'), $config, $context
- );
+ if ($code === false) {
+ $code = $this->validator->validate(
+ $config->get('Core', 'Language'), $config, $context
+ );
+ } else {
+ $code = $this->validator->validate($code, $config, $context);
+ }
if ($code === false) $code = 'en'; // malformed code becomes English
$pcode = str_replace('-', '_', $code); // make valid PHP classname
static $depth = 0; // recursion protection
if ($code == 'en') {
- $class = 'HTMLPurifier_Language';
- $file = $this->dir . '/Language.php';
+ $lang = new HTMLPurifier_Language($config, $context);
} else {
$class = 'HTMLPurifier_Language_' . $pcode;
$file = $this->dir . '/Language/classes/' . $code . '.php';
- // PHP5/APC deps bug workaround can go here
- // you can bypass the conditional include by loading the
- // file yourself
- if (file_exists($file) && !class_exists($class)) {
- include_once $file;
- }
- }
-
- if (!class_exists($class)) {
- // go fallback
- $fallback = HTMLPurifier_LanguageFactory::getFallbackFor($code);
- $depth++;
- $lang = HTMLPurifier_LanguageFactory::factory( $fallback );
- $depth--;
- } else {
- $lang = new $class($config, $context);
+ if (file_exists($file)) {
+ include $file;
+ $lang = new $class($config, $context);
+ } else {
+ // Go fallback
+ $raw_fallback = $this->getFallbackFor($code);
+ $fallback = $raw_fallback ? $raw_fallback : 'en';
+ $depth++;
+ $lang = $this->create($config, $context, $fallback);
+ if (!$raw_fallback) {
+ $lang->error = true;
+ }
+ $depth--;
+ }
}
$lang->code = $code;
$tokens[] = $this->factory->createText($node->data);
return;
} elseif ($node->nodeType === XML_CDATA_SECTION_NODE) {
- // undo DOM's special treatment of <script> tags
- $tokens[] = $this->factory->createText($this->parseData($node->data));
+ // undo libxml's special treatment of <script> and <style> tags
+ $last = end($tokens);
+ $data = $node->data;
+ // (note $node->tagname is already normalized)
+ if ($last instanceof HTMLPurifier_Token_Start && $last->name == 'script') {
+ $new_data = trim($data);
+ if (substr($new_data, 0, 4) === '<!--') {
+ $data = substr($new_data, 4);
+ if (substr($data, -3) === '-->') {
+ $data = substr($data, 0, -3);
+ } else {
+ // Highly suspicious! Not sure what to do...
+ }
+ }
+ }
+ $tokens[] = $this->factory->createText($this->parseData($data));
return;
} elseif ($node->nodeType === XML_COMMENT_NODE) {
+ // this is code is only invoked for comments in script/style in versions
+ // of libxml pre-2.6.28 (regular comments, of course, are still
+ // handled regularly)
$tokens[] = $this->factory->createComment($node->data);
return;
} elseif (
// Check if it's a comment
if (
- substr($segment, 0, 3) === '!--'
+ strncmp('!--', $segment, 3) === 0
) {
// re-determine segment length, looking for -->
$position_comment_end = strpos($html, '-->', $cursor);
}
$strlen_segment = $position_comment_end - $cursor;
$segment = substr($html, $cursor, $strlen_segment);
- $token = new
- HTMLPurifier_Token_Comment(
- substr(
- $segment, 3, $strlen_segment - 3
- )
- );
+ $token = new HTMLPurifier_Token_Comment(substr($segment, 3));
if ($maintain_line_numbers) {
$token->line = $current_line;
$current_line += $this->substrCount($html, $nl, $cursor, $strlen_segment);
/**
* Class that handles operations involving percent-encoding in URIs.
+ *
+ * @warning
+ * Be careful when reusing instances of PercentEncoder. The object
+ * you use for normalize() SHOULD NOT be used for encode(), or
+ * vice-versa.
*/
class HTMLPurifier_PercentEncoder
{
/**
- * Fix up percent-encoding by decoding unreserved characters and normalizing
+ * Reserved characters to preserve when using encode().
+ */
+ var $preserve = array();
+
+ /**
+ * String of characters that should be preserved while using encode().
+ */
+ function HTMLPurifier_PercentEncoder($preserve = false) {
+ // unreserved letters, ought to const-ify
+ for ($i = 48; $i <= 57; $i++) $this->preserve[$i] = true; // digits
+ for ($i = 65; $i <= 90; $i++) $this->preserve[$i] = true; // upper-case
+ for ($i = 97; $i <= 122; $i++) $this->preserve[$i] = true; // lower-case
+ $this->preserve[45] = true; // Dash -
+ $this->preserve[46] = true; // Period .
+ $this->preserve[95] = true; // Underscore _
+ $this->preserve[126]= true; // Tilde ~
+
+ // extra letters not to escape
+ if ($preserve !== false) {
+ for ($i = 0, $c = strlen($preserve); $i < $c; $i++) {
+ $this->preserve[ord($preserve[$i])] = true;
+ }
+ }
+ }
+
+ /**
+ * Our replacement for urlencode, it encodes all non-reserved characters,
+ * as well as any extra characters that were instructed to be preserved.
+ * @note
+ * Assumes that the string has already been normalized, making any
+ * and all percent escape sequences valid. Percents will not be
+ * re-escaped, regardless of their status in $preserve
+ * @param $string String to be encoded
+ * @return Encoded string.
+ */
+ function encode($string) {
+ $ret = '';
+ for ($i = 0, $c = strlen($string); $i < $c; $i++) {
+ if ($string[$i] !== '%' && !isset($this->preserve[$int = ord($string[$i])]) ) {
+ $ret .= '%' . sprintf('%02X', $int);
+ } else {
+ $ret .= $string[$i];
+ }
+ }
+ return $ret;
+ }
+
+ /**
+ * Fix up percent-encoding by decoding unreserved characters and normalizing.
+ * @warning This function is affected by $preserve, even though the
+ * usual desired behavior is for this not to preserve those
+ * characters. Be careful when reusing instances of PercentEncoder!
* @param $string String to normalize
*/
function normalize($string) {
continue;
}
$int = hexdec($encoding);
- if (
- ($int >= 48 && $int <= 57) || // digits
- ($int >= 65 && $int <= 90) || // uppercase letters
- ($int >= 97 && $int <= 122) || // lowercase letters
- $int == 126 || $int == 45 || $int == 46 || $int == 95 // ~-._
- ) {
+ if (isset($this->preserve[$int])) {
$ret .= chr($int) . $text;
continue;
}
// the parent
if (!isset($parent_info->child->elements[$token->name])) {
if ($e) $e->send(E_NOTICE, 'Strategy_MakeWellFormed: Tag auto closed', $parent);
- // close the parent, then append the token
+ // close the parent, then re-loop to reprocess token
$result[] = new HTMLPurifier_Token_End($parent->name);
- $result[] = $token;
- $this->currentNesting[] = $token;
+ $this->inputIndex--;
continue;
}
require_once 'HTMLPurifier/URIFilter.php';
/**
- * HTML Purifier's internal representation of a URI
+ * HTML Purifier's internal representation of a URI.
+ * @note
+ * Internal data-structures are completely escaped. If the data needs
+ * to be used in a non-URI context (which is very unlikely), be sure
+ * to decode it first. The URI may not necessarily be well-formed until
+ * validate() is called.
*/
class HTMLPurifier_URI
{
}
/**
- * Generic validation method applicable for all schemes
+ * Generic validation method applicable for all schemes. May modify
+ * this URI in order to get it into a compliant form.
* @param $config Instance of HTMLPurifier_Config
* @param $context Instance of HTMLPurifier_Context
* @return True if validation/filtering succeeds, false if failure
*/
function validate($config, &$context) {
+ // ABNF definitions from RFC 3986
+ $chars_sub_delims = '!$&\'()*+,;=';
+ $chars_gen_delims = ':/?#[]@';
+ $chars_pchar = $chars_sub_delims . ':@';
+
+ // validate scheme (MUST BE FIRST!)
+ if (!is_null($this->scheme) && is_null($this->host)) {
+ $def = $config->getDefinition('URI');
+ if ($def->defaultScheme === $this->scheme) {
+ $this->scheme = null;
+ }
+ }
+
// validate host
if (!is_null($this->host)) {
$host_def = new HTMLPurifier_AttrDef_URI_Host();
if ($this->host === false) $this->host = null;
}
+ // validate username
+ if (!is_null($this->userinfo)) {
+ $encoder = new HTMLPurifier_PercentEncoder($chars_sub_delims . ':');
+ $this->userinfo = $encoder->encode($this->userinfo);
+ }
+
// validate port
if (!is_null($this->port)) {
if ($this->port < 1 || $this->port > 65535) $this->port = null;
}
- // query and fragment are quite simple in terms of definition:
- // *( pchar / "/" / "?" ), so define their validation routines
- // when we start fixing percent encoding
-
- // path gets to be validated against a hodge-podge of rules depending
- // on the status of authority and scheme, but it's not that important,
- // esp. since it won't be applicable to everyone
+ // validate path
+ $path_parts = array();
+ $segments_encoder = new HTMLPurifier_PercentEncoder($chars_pchar . '/');
+ if (!is_null($this->host)) {
+ // path-abempty (hier and relative)
+ $this->path = $segments_encoder->encode($this->path);
+ } elseif ($this->path !== '' && $this->path[0] === '/') {
+ // path-absolute (hier and relative)
+ if (strlen($this->path) >= 2 && $this->path[1] === '/') {
+ // This shouldn't ever happen!
+ $this->path = '';
+ } else {
+ $this->path = $segments_encoder->encode($this->path);
+ }
+ } elseif (!is_null($this->scheme) && $this->path !== '') {
+ // path-rootless (hier)
+ // Short circuit evaluation means we don't need to check nz
+ $this->path = $segments_encoder->encode($this->path);
+ } elseif (is_null($this->scheme) && $this->path !== '') {
+ // path-noscheme (relative)
+ // (once again, not checking nz)
+ $segment_nc_encoder = new HTMLPurifier_PercentEncoder($chars_sub_delims . '@');
+ $c = strpos($this->path, '/');
+ if ($c !== false) {
+ $this->path =
+ $segment_nc_encoder->encode(substr($this->path, 0, $c)) .
+ $segments_encoder->encode(substr($this->path, $c));
+ } else {
+ $this->path = $segment_nc_encoder->encode($this->path);
+ }
+ } else {
+ // path-empty (hier and relative)
+ $this->path = ''; // just to be safe
+ }
return true;
/**
* Parses a URI into the components and fragment identifier as specified
- * by RFC 2396.
- * @todo Replace regexps with a native PHP parser
+ * by RFC 3986.
*/
class HTMLPurifier_URIParser
{
/**
- * Parses a URI
+ * Instance of HTMLPurifier_PercentEncoder to do normalization with.
+ */
+ var $percentEncoder;
+
+ function HTMLPurifier_URIParser() {
+ $this->percentEncoder = new HTMLPurifier_PercentEncoder();
+ }
+
+ /**
+ * Parses a URI.
* @param $uri string URI to parse
- * @return HTMLPurifier_URI representation of URI
+ * @return HTMLPurifier_URI representation of URI. This representation has
+ * not been validated yet and may not conform to RFC.
*/
function parse($uri) {
+
+ $uri = $this->percentEncoder->normalize($uri);
+
+ // Regexp is as per Appendix B.
+ // Note that ["<>] are an addition to the RFC's recommended
+ // characters, because they represent external delimeters.
$r_URI = '!'.
- '(([^:/?#<>\'"]+):)?'. // 2. Scheme
- '(//([^/?#<>\'"]*))?'. // 4. Authority
- '([^?#<>\'"]*)'. // 5. Path
- '(\?([^#<>\'"]*))?'. // 7. Query
- '(#([^<>\'"]*))?'. // 8. Fragment
+ '(([^:/?#"<>]+):)?'. // 2. Scheme
+ '(//([^/?#"<>]*))?'. // 4. Authority
+ '([^?#"<>]*)'. // 5. Path
+ '(\?([^#"<>]*))?'. // 7. Query
+ '(#([^"<>]*))?'. // 8. Fragment
'!';
$matches = array();
// further parse authority
if ($authority !== null) {
- // ridiculously inefficient: it's a stacked regex!
- $HEXDIG = '[A-Fa-f0-9]';
- $unreserved = 'A-Za-z0-9-._~'; // make sure you wrap with []
- $sub_delims = '!$&\'()'; // needs []
- $pct_encoded = "%$HEXDIG$HEXDIG";
- $r_userinfo = "(?:[$unreserved$sub_delims:]|$pct_encoded)*";
- $r_authority = "/^(($r_userinfo)@)?(\[[^\]]+\]|[^:]*)(:(\d*))?/";
+ $r_authority = "/^((.+?)@)?(\[[^\]]+\]|[^:]*)(:(\d*))?/";
$matches = array();
preg_match($r_authority, $authority, $matches);
$userinfo = !empty($matches[1]) ? $matches[2] : null;
-Description of HTML Purifier v2.1.3 Lite library import into Moodle
+Description of HTML Purifier v2.1.4 Lite library import into Moodle
Changes:
* HMLTModule/Text.php - added <nolink>, <tex>, <lang> and <algebra> tags