MDL-10928:

author thepurpleblob <thepurpleblob>

Tue, 21 Aug 2007 09:54:49 +0000 (09:54 +0000)

committer thepurpleblob <thepurpleblob>

Tue, 21 Aug 2007 09:54:49 +0000 (09:54 +0000)
author thepurpleblob <thepurpleblob>
Tue, 21 Aug 2007 09:54:49 +0000 (09:54 +0000)
committer thepurpleblob <thepurpleblob>
Tue, 21 Aug 2007 09:54:49 +0000 (09:54 +0000)
diff --git a/lib/markdown.php b/lib/markdown.php

index 0c2cfed85e5686857839f2e21169c78a2f293a60..6d43f8bcac140e52fbeac034f311b1e868da0e0d 100755 (executable)
--- a/lib/markdown.php
+++ b/lib/markdown.php
@@ -12,8 +12,8 @@
  #
  
  
-define( 'MARKDOWN_VERSION',       "1.0.1f" ); # Wed 7 Feb 2007
-define( 'MARKDOWNEXTRA_VERSION',  "1.1.2" );  # Wed 7 Feb 2007
+define( 'MARKDOWN_VERSION',       "1.0.1h" ); # Fri 3 Aug 2007
+define( 'MARKDOWNEXTRA_VERSION',  "1.1.4" );  # Fri 3 Aug 2007
  
  
  #
@@ -71,7 +71,7 @@ function Markdown($text) {
  Plugin Name: Markdown Extra
  Plugin URI: http://www.michelf.com/projects/php-markdown/
  Description: <a href="http://daringfireball.net/projects/markdown/syntax">Markdown syntax</a> allows you to write using an easy-to-read, easy-to-write plain text format. Based on the original Perl version by <a href="http://daringfireball.net/">John Gruber</a>. <a href="http://www.michelf.com/projects/php-markdown/">More...</a>
-Version: 1.1.2
+Version: 1.1.4
  Author: Michel Fortin
  Author URI: http://www.michelf.com/
  */
@@ -85,9 +85,11 @@ if (isset($wp_version)) {
         # - Run Markdown on excerpt, then remove all tags.
         # - Add paragraph tag around the excerpt, but remove it for the excerpt rss.
         if (MARKDOWN_WP_POSTS) {
-               remove_filter('the_content',  'wpautop');
-               remove_filter('the_excerpt',  'wpautop');
+               remove_filter('the_content',     'wpautop');
+        remove_filter('the_content_rss', 'wpautop');
+               remove_filter('the_excerpt',     'wpautop');
                 add_filter('the_content',     'Markdown', 6);
+        add_filter('the_content_rss', 'Markdown', 6);
                 add_filter('get_the_excerpt', 'Markdown', 6);
                 add_filter('get_the_excerpt', 'trim', 7);
                 add_filter('the_excerpt',     'mdwp_add_p');
@@ -105,7 +107,7 @@ if (isset($wp_version)) {
         # - Scramble important tags before passing them to the kses filter.
         # - Run Markdown on excerpt then remove paragraph tags.
         if (MARKDOWN_WP_COMMENTS) {
-               remove_filter('comment_text', 'wpautop');
+               remove_filter('comment_text', 'wpautop', 30);
                 remove_filter('comment_text', 'make_clickable');
                 add_filter('pre_comment_content', 'Markdown', 6);
                 add_filter('pre_comment_content', 'mdwp_hide_tags', 8);
@@ -114,14 +116,12 @@ if (isset($wp_version)) {
                 add_filter('get_comment_excerpt', 'Markdown', 6);
                 add_filter('get_comment_excerpt', 'mdwp_strip_p', 7);
         
-               global $markdown_hidden_tags;
-               $markdown_hidden_tags = array(
-                       '<p>'   => md5('<p>'),          '</p>'  => md5('</p>'),
-                       '<pre>' => md5('<pre>'),        '</pre>'=> md5('</pre>'),
-                       '<ol>'  => md5('<ol>'),         '</ol>' => md5('</ol>'),
-                       '<ul>'  => md5('<ul>'),         '</ul>' => md5('</ul>'),
-                       '<li>'  => md5('<li>'),         '</li>' => md5('</li>'),
-                       );
+               global $mdwp_hidden_tags, $mdwp_placeholders;
+               $mdwp_hidden_tags = explode(' ',
+                       '<p> </p> <pre> </pre> <ol> </ol> <ul> </ul> <li> </li>');
+               $mdwp_placeholders = explode(' ', str_rot13(
+                       'pEj07ZbbBZ U1kqgh4w4p pre2zmeN6K QTi31t9pre ol0MP1jzJR '.
+                       'ML5IjmbRol ulANi1NsGY J7zRLJqPul liA8ctl16T K9nhooUHli'));
         }
         
         function mdwp_add_p($text) {
@@ -135,14 +135,12 @@ if (isset($wp_version)) {
         function mdwp_strip_p($t) { return preg_replace('{</?p>}i', '', $t); }
  
         function mdwp_hide_tags($text) {
-               global $markdown_hidden_tags;
-               return str_replace(array_keys($markdown_hidden_tags), 
-                                                       array_values($markdown_hidden_tags), $text);
+               global $mdwp_hidden_tags, $mdwp_placeholders;
+               return str_replace($mdwp_hidden_tags, $mdwp_placeholders, $text);
         }
         function mdwp_show_tags($text) {
-               global $markdown_hidden_tags;
-               return str_replace(array_values($markdown_hidden_tags), 
-                                                       array_keys($markdown_hidden_tags), $text);
+               global $mdwp_hidden_tags, $mdwp_placeholders;
+               return str_replace($mdwp_placeholders, $mdwp_hidden_tags, $text);
         }
  }
  
@@ -205,15 +203,22 @@ class Markdown_Parser {
         # Needed to insert a maximum bracked depth while converting to PHP.
         var $nested_brackets_depth = 6;
         var $nested_brackets;
+       
+       var $nested_url_parenthesis_depth = 4;
+       var $nested_url_parenthesis;
  
         # Table of hash values for escaped characters:
         var $escape_chars = '\`*_{}[]()>#+-.!';
-       var $escape_table = array();
+//     var $escape_table = array();
         var $backslash_escape_table = array();
  
         # Change to ">" for HTML output.
         var $empty_element_suffix = MARKDOWN_EMPTY_ELEMENT_SUFFIX;
         var $tab_width = MARKDOWN_TAB_WIDTH;
+       
+       # Change to `true` to disallow markup or entities.
+       var $no_markup = false;
+       var $no_entities = false;
  
  
         function Markdown_Parser() {
@@ -225,12 +230,16 @@ class Markdown_Parser {
                 $this->nested_brackets = 
                         str_repeat('(?>[^\[\]]+|\[', $this->nested_brackets_depth).
                         str_repeat('\])*', $this->nested_brackets_depth);
+       
+               $this->nested_url_parenthesis = 
+                       str_repeat('(?>[^()\s]+|\(', $this->nested_url_parenthesis_depth).
+                       str_repeat('(?>\)))*', $this->nested_url_parenthesis_depth);
                 
                 # Create an identical table but for escaped characters.
                 foreach (preg_split('/(?!^|$)/', $this->escape_chars) as $char) {
-                       $hash = md5($char);
-                       $this->escape_table[$char] = $hash;
-                       $this->backslash_escape_table["\\$char"] = $hash;
+                       $entity = "&#". ord($char). ";";
+//                     $this->escape_table[$char] = $entity;
+                       $this->backslash_escape_table["\\$char"] = $entity;
                 }
                 
                 # Sort document, block, and span gamut in ascendent priority order.
@@ -245,6 +254,9 @@ class Markdown_Parser {
         var $titles = array();
         var $html_blocks = array();
         var $html_hashes = array(); # Contains both blocks and span hashes.
+       
+       # Status flag to avoid invalid nesting.
+       var $in_anchor = false;
  
  
         function transform($text) {
@@ -279,8 +291,8 @@ class Markdown_Parser {
                 # Strip any lines consisting only of spaces and tabs.
                 # This makes subsequent regexen easier to write, because we can
                 # match consecutive blank lines with /\n+/ instead of something
-               # contorted like /[ \t]*\n+/ .
-               $text = preg_replace('/^[ \t]+$/m', '', $text);
+               # contorted like /[ ]*\n+/ .
+               $text = preg_replace('/^[ ]+$/m', '', $text);
  
                 # Run document gamut methods.
                 foreach ($this->document_gamut as $method => $priority) {
@@ -295,7 +307,6 @@ class Markdown_Parser {
                 "stripLinkDefinitions" => 20,
                 
                 "runBasicBlockGamut"   => 30,
-               "unescapeSpecialChars" => 90,
                 );
  
  
@@ -309,19 +320,19 @@ class Markdown_Parser {
                 # Link defs are in the form: ^[id]: url "optional title"
                 $text = preg_replace_callback('{
                                                         ^[ ]{0,'.$less_than_tab.'}\[(.+)\][ ]?: # id = $1
-                                                         [ \t]*
+                                                         [ ]*
                                                           \n?                           # maybe *one* newline
-                                                         [ \t]*
+                                                         [ ]*
                                                         <?(\S+?)>?                      # url = $2
-                                                         [ \t]*
+                                                         [ ]*
                                                           \n?                           # maybe one newline
-                                                         [ \t]*
+                                                         [ ]*
                                                         (?:
                                                                 (?<=\s)                 # lookbehind for whitespace
                                                                 ["(]
                                                                 (.*?)                   # title = $3
                                                                 [")]
-                                                               [ \t]*
+                                                               [ ]*
                                                         )?      # title is optional
                                                         (?:\n+|\Z)
                         }xm',
@@ -339,6 +350,8 @@ class Markdown_Parser {
  
  
         function hashHTMLBlocks($text) {
+               if ($this->no_markup)  return $text;
+       
                 $less_than_tab = $this->tab_width - 1;
  
                 # Hashify HTML blocks:
@@ -407,10 +420,10 @@ class Markdown_Parser {
                                                 '.$attr.'>\n            # attributes followed by > and \n
                                                 '.$content.'            # content, support nesting
                                                 </\2>                           # the matching end tag
-                                               [ \t]*                          # trailing spaces/tabs
+                                               [ ]*                            # trailing spaces/tabs
                                                 (?=\n+|\Z)      # followed by a newline or end of document
                                         )
-                       }xm',
+                       }xmi',
                         array(&$this, '_hashHTMLBlocks_callback'),
                         $text);
  
@@ -424,10 +437,10 @@ class Markdown_Parser {
                                                 '.$attr.'>                      # attributes followed by >
                                                 '.$content.'            # content, support nesting
                                                 </\2>                           # the matching end tag
-                                               [ \t]*                          # trailing spaces/tabs
+                                               [ ]*                            # trailing spaces/tabs
                                                 (?=\n+|\Z)      # followed by a newline or end of document
                                         )
-                       }xm',
+                       }xmi',
                         array(&$this, '_hashHTMLBlocks_callback'),
                         $text);
  
@@ -445,10 +458,10 @@ class Markdown_Parser {
                                                 \b                                      # word break
                                                 ([^<>])*?                       # 
                                                 /?>                                     # the matching end tag
-                                               [ \t]*
+                                               [ ]*
                                                 (?=\n{2,}|\Z)           # followed by a blank line or end of document
                                         )
-                       }x',
+                       }xi',
                         array(&$this, '_hashHTMLBlocks_callback'),
                         $text);
  
@@ -464,7 +477,7 @@ class Markdown_Parser {
                                         (?s:
                                                 <!-- .*? -->
                                         )
-                                       [ \t]*
+                                       [ ]*
                                         (?=\n{2,}|\Z)           # followed by a blank line or end of document
                                 )
                         }x',
@@ -485,7 +498,7 @@ class Markdown_Parser {
                                                 .*?
                                                 \2>
                                         )
-                                       [ \t]*
+                                       [ ]*
                                         (?=\n{2,}|\Z)           # followed by a blank line or end of document
                                 )
                         }x',
@@ -512,25 +525,28 @@ class Markdown_Parser {
                 $text = $this->unhash($text);
                 
                 # Then hash the block.
-               $key = md5($text);
+               $key = "B\x1A". md5($text);
                 $this->html_hashes[$key] = $text;
                 $this->html_blocks[$key] = $text;
                 return $key; # String that will replace the tag.
         }
  
  
-       function hashSpan($text) {
+       function hashSpan($text, $word_separator = false) {
         #
         # Called whenever a tag must be hashed when a function insert a span-level 
         # element in $text, it pass through this function and is automaticaly 
-       # escaped, blocking invalid nested overlap.
+       # escaped, blocking invalid nested overlap. If optional argument 
+       # $word_separator is true, surround the hash value by spaces.
         #
                 # Swap back any tag hash found in $text so we do not have to `unhash`
                 # multiple times at the end.
                 $text = $this->unhash($text);
                 
                 # Then hash the span.
-               $key = md5($text);
+               $key = "S\x1A". md5($text);
+               if ($word_separator) $key = ":$key:";
+               
                 $this->html_hashes[$key] = $text;
                 return $key; # String that will replace the span tag.
         }
@@ -583,9 +599,9 @@ class Markdown_Parser {
         function doHorizontalRules($text) {
                 # Do Horizontal Rules:
                 return preg_replace(
-                       array('{^[ ]{0,2}([ ]?\*[ ]?){3,}[ \t]*$}mx',
-                                 '{^[ ]{0,2}([ ]? -[ ]?){3,}[ \t]*$}mx',
-                                 '{^[ ]{0,2}([ ]? _[ ]?){3,}[ \t]*$}mx'),
+                       array('{^[ ]{0,2}([ ]?\*[ ]?){3,}[ ]*$}mx',
+                                 '{^[ ]{0,2}([ ]? -[ ]?){3,}[ ]*$}mx',
+                                 '{^[ ]{0,2}([ ]? _[ ]?){3,}[ ]*$}mx'),
                         "\n".$this->hashBlock("<hr$this->empty_element_suffix")."\n", 
                         $text);
         }
@@ -642,15 +658,18 @@ class Markdown_Parser {
         # value; this is likely overkill, but it should prevent us from colliding
         # with the escape values by accident.
         #
+               if ($this->no_markup)  return $text;
+       
                 $tokens = $this->tokenizeHTML($text);
                 $text = '';   # rebuild $text from the tokens
  
                 foreach ($tokens as $cur_token) {
                         if ($cur_token[0] == 'tag') {
-                               $cur_token[1] = str_replace('\\', $this->escape_table['\\'], $cur_token[1]);
-                               $cur_token[1] = str_replace(array('`'), $this->escape_table['`'], $cur_token[1]);
-                               $cur_token[1] = str_replace('*', $this->escape_table['*'], $cur_token[1]);
-                               $cur_token[1] = str_replace('_', $this->escape_table['_'], $cur_token[1]);
+//                             $cur_token[1] = str_replace('\\', $this->escape_table['\\'], $cur_token[1]);
+//                             $cur_token[1] = str_replace('`', $this->escape_table['`'], $cur_token[1]);
+//                             $cur_token[1] = str_replace('*', $this->escape_table['*'], $cur_token[1]);
+//                             $cur_token[1] = str_replace('_', $this->escape_table['_'], $cur_token[1]);
+                               $cur_token[1] = $this->hashSpan($cur_token[1]);
                         }
                         $text .= $cur_token[1];
                 }
@@ -662,6 +681,9 @@ class Markdown_Parser {
         #
         # Turn Markdown link shortcuts into XHTML <a> tags.
         #
+               if ($this->in_anchor) return $text;
+               $this->in_anchor = true;
+               
                 #
                 # First, handle reference-style links: [link text] [id]
                 #
@@ -690,14 +712,18 @@ class Markdown_Parser {
                                 ('.$this->nested_brackets.')    # link text = $2
                           \]
                           \(                    # literal paren
-                               [ \t]*
-                               <?(.*?)>?       # href = $3
-                               [ \t]*
-                               (                       # $4
-                                 ([\'"])       # quote char = $5
-                                 (.*?)         # Title = $6
-                                 \5            # matching quote
-                                 [ \t]*        # ignore any spaces/tabs between closing quote and )
+                               [ ]*
+                               (?:
+                                       <(\S*)> # href = $3
+                               |
+                                       ('.$this->nested_url_parenthesis.')     # href = $4
+                               )
+                               [ ]*
+                               (                       # $5
+                                 ([\'"])       # quote char = $6
+                                 (.*?)         # Title = $7
+                                 \6            # matching quote
+                                 [ ]*  # ignore any spaces/tabs between closing quote and )
                                 )?                      # title is optional
                           \)
                         )
@@ -718,6 +744,7 @@ class Markdown_Parser {
  //                     }xs',
  //                     array(&$this, '_doAnchors_reference_callback'), $text);
  
+               $this->in_anchor = false;
                 return $text;
         }
         function _doAnchors_reference_callback($matches) {
@@ -757,9 +784,9 @@ class Markdown_Parser {
         function _doAnchors_inline_callback($matches) {
                 $whole_match    =  $matches[1];
                 $link_text              =  $this->runSpanGamut($matches[2]);
-               $url                    =  $matches[3];
-               $title                  =& $matches[6];
-               
+               $url                    =  $matches[3] == '' ? $matches[4] : $matches[3];
+               $title                  =& $matches[7];
+
                 $url = $this->encodeAmpsAndAngles($url);
  
                 $result = "<a href=\"$url\"";
@@ -811,14 +838,18 @@ class Markdown_Parser {
                           \]
                           \s?                   # One optional whitespace character
                           \(                    # literal paren
-                               [ \t]*
-                               <?(\S+?)>?      # src url = $3
-                               [ \t]*
-                               (                       # $4
-                                 ([\'"])       # quote char = $5
-                                 (.*?)         # title = $6
-                                 \5            # matching quote
-                                 [ \t]*
+                               [ ]*
+                               (?:
+                                       <(\S*)> # src url = $3
+                               |
+                                       ('.$this->nested_url_parenthesis.')     # src url = $4
+                               )
+                               [ ]*
+                               (                       # $5
+                                 ([\'"])       # quote char = $6
+                                 (.*?)         # title = $7
+                                 \6            # matching quote
+                                 [ ]*
                                 )?                      # title is optional
                           \)
                         )
@@ -857,8 +888,8 @@ class Markdown_Parser {
         function _doImages_inline_callback($matches) {
                 $whole_match    = $matches[1];
                 $alt_text               = $matches[2];
-               $url                    = $matches[3];
-               $title                  =& $matches[6];
+               $url                    = $matches[3] == '' ? $matches[4] : $matches[3];
+               $title                  =& $matches[7];
  
                 $alt_text = str_replace('"', '&quot;', $alt_text);
                 $result = "<img src=\"$url\" alt=\"$alt_text\"";
@@ -880,9 +911,9 @@ class Markdown_Parser {
                 #         Header 2
                 #         --------
                 #
-               $text = preg_replace_callback('{ ^(.+)[ \t]*\n=+[ \t]*\n+ }mx',
+               $text = preg_replace_callback('{ ^(.+?)[ ]*\n=+[ ]*\n+ }mx',
                         array(&$this, '_doHeaders_callback_setext_h1'), $text);
-               $text = preg_replace_callback('{ ^(.+)[ \t]*\n-+[ \t]*\n+ }mx',
+               $text = preg_replace_callback('{ ^(.+?)[ ]*\n-+[ ]*\n+ }mx',
                         array(&$this, '_doHeaders_callback_setext_h2'), $text);
  
                 # atx-style headers:
@@ -894,9 +925,9 @@ class Markdown_Parser {
                 #
                 $text = preg_replace_callback('{
                                 ^(\#{1,6})      # $1 = string of #\'s
-                               [ \t]*
+                               [ ]*
                                 (.+?)           # $2 = Header text
-                               [ \t]*
+                               [ ]*
                                 \#*                     # optional closing #\'s (not counted)
                                 \n+
                         }xm',
@@ -939,7 +970,7 @@ class Markdown_Parser {
                                   (                                                             # $2
                                         [ ]{0,'.$less_than_tab.'}
                                         ('.$marker.')                           # $3 = first list item marker
-                                       [ \t]+
+                                       [ ]+
                                   )
                                   (?s:.+?)
                                   (                                                             # $4
@@ -948,8 +979,8 @@ class Markdown_Parser {
                                           \n{2,}
                                           (?=\S)
                                           (?!                                           # Negative lookahead for another list item marker
-                                               [ \t]*
-                                               '.$marker.'[ \t]+
+                                               [ ]*
+                                               '.$marker.'[ ]+
                                           )
                                   )
                                 )
@@ -1029,11 +1060,11 @@ class Markdown_Parser {
  
                 $list_str = preg_replace_callback('{
                         (\n)?                                                   # leading line = $1
-                       (^[ \t]*)                                               # leading whitespace = $2
-                       ('.$marker_any.') [ \t]+                # list marker = $3
+                       (^[ ]*)                                         # leading whitespace = $2
+                       ('.$marker_any.') [ ]+          # list marker = $3
                         ((?s:.+?))                                              # list item text   = $4
                         (?:(\n+(?=\n))|\n)                              # tailing blank line = $5
-                       (?= \n* (\z | \2 ('.$marker_any.') [ \t]+))
+                       (?= \n* (\z | \2 ('.$marker_any.') [ ]+))
                         }xm',
                         array(&$this, '_processListItems_callback'), $list_str);
  
@@ -1084,7 +1115,7 @@ class Markdown_Parser {
                 $codeblock = $matches[1];
  
                 $codeblock = $this->encodeCode($this->outdent($codeblock));
-       //      $codeblock = $this->detab($codeblock);
+//             $codeblock = $this->detab($codeblock);
                 # trim leading newlines and trailing whitespace
                 $codeblock = preg_replace(array('/\A\n+/', '/\n+\z/'), '', $codeblock);
  
@@ -1133,8 +1164,8 @@ class Markdown_Parser {
         }
         function _doCodeSpans_callback($matches) {
                 $c = $matches[2];
-               $c = preg_replace('/^[ \t]*/', '', $c); # leading whitespace
-               $c = preg_replace('/[ \t]*$/', '', $c); # trailing whitespace
+               $c = preg_replace('/^[ ]*/', '', $c); # leading whitespace
+               $c = preg_replace('/[ ]*$/', '', $c); # trailing whitespace
                 $c = $this->encodeCode($c);
                 return $this->hashSpan("<code>$c</code>");
         }
@@ -1173,13 +1204,13 @@ class Markdown_Parser {
                                 (?=\S)                                  # Not followed by whitespace 
                                 (?!\1\1)                                #   or two others marker chars.
                                 (                                               # $2: Content
-                                       (?:
+                                       (?>
                                                 [^*_]+?                 # Anthing not em markers.
                                         |
                                                                                 # Balence any regular emphasis inside.
                                                 \1 (?=\S) .+? (?<=\S) \1
                                         |
-                                               (?! \1 ) .              # Allow unbalenced * and _.
+                                               .                               # Allow unbalenced * and _.
                                         )+?
                                 )
                                 (?<=\S) \1\1                    # End mark not preceded by whitespace.
@@ -1187,7 +1218,7 @@ class Markdown_Parser {
                         array(&$this, '_doItalicAndBold_strong_callback'), $text);
                 # Then <em>:
                 $text = preg_replace_callback(
-                       '{ ( (?<!\*)\* | (?<!_)_ ) (?=\S) (?! \1) (.+?) (?<=\S) \1 }sx',
+                       '{ ( (?<!\*)\* | (?<!_)_ ) (?=\S) (?! \1) (.+?) (?<=\S)(?<!\s(?=\1).) \1 }sx',
                         array(&$this, '_doItalicAndBold_em_callback'), $text);
  
                 return $text;
@@ -1208,7 +1239,7 @@ class Markdown_Parser {
                 $text = preg_replace_callback('/
                           (                                                             # Wrap whole match in $1
                                 (
-                                 ^[ \t]*>[ \t]?                        # ">" at the start of a line
+                                 ^[ ]*>[ ]?                    # ">" at the start of a line
                                         .+\n                                    # rest of the first line
                                   (.+\n)*                                       # subsequent consecutive lines
                                   \n*                                           # blanks
@@ -1222,7 +1253,7 @@ class Markdown_Parser {
         function _doBlockQuotes_callback($matches) {
                 $bq = $matches[1];
                 # trim one level of quoting - trim whitespace-only lines
-               $bq = preg_replace(array('/^[ \t]*>[ \t]?/m', '/^[ \t]+$/m'), '', $bq);
+               $bq = preg_replace(array('/^[ ]*>[ ]?/m', '/^[ ]+$/m'), '', $bq);
                 $bq = $this->runBlockGamut($bq);                # recurse
  
                 $bq = preg_replace('/^/m', "  ", $bq);
@@ -1256,7 +1287,7 @@ class Markdown_Parser {
                 foreach ($grafs as $key => $value) {
                         if (!isset( $this->html_blocks[$value] )) {
                                 $value = $this->runSpanGamut($value);
-                               $value = preg_replace('/^([ \t]*)/', "<p>", $value);
+                               $value = preg_replace('/^([ ]*)/', "<p>", $value);
                                 $value .= "</p>";
                                 $grafs[$key] = $this->unhash($value);
                         }
@@ -1315,6 +1346,11 @@ class Markdown_Parser {
  
         function encodeAmpsAndAngles($text) {
         # Smart processing for ampersands and angle brackets that need to be encoded.
+               if ($this->no_entities) {
+                       $text = str_replace('&', '&amp;', $text);
+                       $text = str_replace('<', '&lt;', $text);
+                       return $text;
+               }
  
                 # Ampersand-encoding based entirely on Nat Irons's Amputator MT plugin:
                 #   http://bumppo.net/projects/amputator/
@@ -1334,9 +1370,11 @@ class Markdown_Parser {
         #       Returns:    The string, with after processing the following backslash
         #                               escape sequences.
         #
-               # Must process escaped backslashes first.
-               return str_replace(array_keys($this->backslash_escape_table),
-                                                  array_values($this->backslash_escape_table), $text);
+               # Must process escaped backslashes first (should be first in list).
+               foreach ($this->backslash_escape_table as $search => $replacement) {
+                       $text = str_replace($search, $this->hashSpan($replacement), $text);
+               }
+               return $text;
         }
  
  
@@ -1366,7 +1404,6 @@ class Markdown_Parser {
         }
         function _doAutoLinks_email_callback($matches) {
                 $address = $matches[1];
-               $address = $this->unescapeSpecialChars($address);
                 $link = $this->encodeEmailAddress($address);
                 return $this->hashSpan($link);
         }
@@ -1413,15 +1450,6 @@ class Markdown_Parser {
         }
  
  
-       function unescapeSpecialChars($text) {
-       #
-       # Swap back in all the special characters we've hidden.
-       #
-               return str_replace(array_values($this->escape_table), 
-                                                  array_keys($this->escape_table), $text);
-       }
-
-
         function tokenizeHTML($str) {
         #
         #   Parameter:  String containing HTML + Markdown markup.
@@ -1512,7 +1540,7 @@ class Markdown_Parser {
                 # tab characters. Then we reconstruct every line by adding the 
                 # appropriate number of space between each blocks.
                 
-               $strlen = $this->utf8_strlen; # best strlen function for UTF-8.
+               $strlen = $this->utf8_strlen; # strlen function for UTF-8.
                 $lines = explode("\n", $text);
                 $text = "";
                 
@@ -1535,18 +1563,14 @@ class Markdown_Parser {
         function _initDetab() {
         #
         # Check for the availability of the function in the `utf8_strlen` property
-       # (probably `mb_strlen`). If the function is not available, create a 
+       # (initially `mb_strlen`). If the function is not available, create a 
         # function that will loosely count the number of UTF-8 characters with a
         # regular expression.
         #
                 if (function_exists($this->utf8_strlen)) return;
-               $this->utf8_strlen = 'Markdown_UTF8_strlen';
-               
-               if (function_exists($this->utf8_strlen)) return;
-               function Markdown_UTF8_strlen($text) {
-                       return preg_match_all('/[\x00-\xBF]|[\xC0-\xFF][\x80-\xBF]*/', 
-                               $text, $m);
-               }
+               $this->utf8_strlen = create_function('$text', 'return preg_match_all(
+                       "/[\\\\x00-\\\\xBF]|[\\\\xC0-\\\\xFF][\\\\x80-\\\\xBF]*/", 
+                       $text, $m);');
         }
  
  
@@ -1599,8 +1623,8 @@ class MarkdownExtra_Parser extends Markdown_Parser {
                         "doDefLists"         => 45,
                         );
                 $this->span_gamut += array(
-                       "doFootnotes"        =>  4,
-                       "doAbbreviations"    =>  5,
+                       "doFootnotes"        => 5,
+                       "doAbbreviations"    => 70,
                         );
                 
                 parent::Markdown_Parser();
@@ -1614,6 +1638,9 @@ class MarkdownExtra_Parser extends Markdown_Parser {
         var $abbr_matches = array();
         var $html_cleans = array();
         
+       # Status flag to avoid invalid nesting.
+       var $in_footnote = false;
+       
         
         function transform($text) {
         #
@@ -1727,7 +1754,7 @@ class MarkdownExtra_Parser extends Markdown_Parser {
                                                         (?!\s)'.$enclosing_tag.'
                                                 )
                                                 \s*                             # Whitespace.
-                                               (?:
+                                               (?>
                                                         ".*?"           |       # Double quotes (can contain `>`)
                                                         \'.*?\'         |       # Single quotes (can contain `>`)
                                                         .+?                             # Anything but quotes and `>`.
@@ -1763,8 +1790,9 @@ class MarkdownExtra_Parser extends Markdown_Parser {
                         # If in Markdown span mode, add a empty-string span-level hash 
                         # after each newline to prevent triggering any block element.
                         if ($span) {
-                               $newline = $this->hashSpan("") . "\n";
-                               $parts[0] = str_replace("\n", $newline, $parts[0]);
+                               $void = $this->hashSpan("", true) ;
+                               $newline = $this->hashSpan("", true) . "\n";
+                               $parts[0] = $void . str_replace("\n", $newline, $parts[0]) . $void;
                         }
                         
                         $parsed .= $parts[0]; # Text before current tag.
@@ -1879,9 +1907,14 @@ class MarkdownExtra_Parser extends Markdown_Parser {
                                 \s*                     # Eat whitespace before the `markdown` attribute
                                 markdown
                                 \s*=\s*
-                               (["\'])         # $1: quote delimiter           
-                               (.*?)           # $2: attribute value
-                               \1                      # matching delimiter    
+                               (?:
+                                       (["\'])         # $1: quote delimiter           
+                                       (.*?)           # $2: attribute value
+                                       \1                      # matching delimiter    
+                               |
+                                       ([^\s>]*)       # $3: unquoted attribute value
+                               )
+                               ()                              # $4: make $3 always defined (avoid warnings)
                         }xs';
                 
                 # Regex to match any tag.
@@ -1890,7 +1923,7 @@ class MarkdownExtra_Parser extends Markdown_Parser {
                                         </?                                     # Any opening or closing tag.
                                                 [\w:$]+                 # Tag name.
                                                 \s*                             # Whitespace.
-                                               (?:
+                                               (?>
                                                         ".*?"           |       # Double quotes (can contain `>`)
                                                         \'.*?\'         |       # Single quotes (can contain `>`)
                                                         .+?                             # Anything but quotes and `>`.
@@ -1967,14 +2000,14 @@ class MarkdownExtra_Parser extends Markdown_Parser {
                                 # Check for `markdown="1"` attribute and handle it.
                                 #
                                 if ($md_attr && 
-                                       preg_match($markdown_attr_match, $tag, $attr_matches) &&
-                                       preg_match('/^1|block|span$/', $attr_matches[2]))
+                                       preg_match($markdown_attr_match, $tag, $attr_m) &&
+                                       preg_match('/^1|block|span$/', $attr_m[2] . $attr_m[3]))
                                 {
                                         # Remove `markdown` attribute from opening tag.
                                         $tag = preg_replace($markdown_attr_match, '', $tag);
                                         
                                         # Check if text inside this tag must be parsed in span mode.
-                                       $this->mode = $attr_matches[2];
+                                       $this->mode = $attr_m[2] . $attr_m[3];
                                         $span_mode = $this->mode == 'span' || $this->mode != 'block' &&
                                                 preg_match("{^<(?:$this->contain_span_tags)\b}", $tag);
                                         
@@ -2033,7 +2066,7 @@ class MarkdownExtra_Parser extends Markdown_Parser {
                 $text = $this->unhash($text);
                 
                 # Then hash the tag.
-               $key = md5($text);
+               $key = "C\x1A". md5($text);
                 $this->html_cleans[$key] = $text;
                 $this->html_hashes[$key] = $text;
                 return $key; # String that will replace the clean tag.
@@ -2052,10 +2085,10 @@ class MarkdownExtra_Parser extends Markdown_Parser {
                 #         --------
                 #
                 $text = preg_replace_callback(
-                       '{ (^.+?) (?:[ ]+\{\#([-_:a-zA-Z0-9]+)\})? [ \t]*\n=+[ \t]*\n+ }mx',
+                       '{ (^.+?) (?:[ ]+\{\#([-_:a-zA-Z0-9]+)\})? [ ]*\n=+[ ]*\n+ }mx',
                         array(&$this, '_doHeaders_callback_setext_h1'), $text);
                 $text = preg_replace_callback(
-                       '{ (^.+?) (?:[ ]+\{\#([-_:a-zA-Z0-9]+)\})? [ \t]*\n-+[ \t]*\n+ }mx',
+                       '{ (^.+?) (?:[ ]+\{\#([-_:a-zA-Z0-9]+)\})? [ ]*\n-+[ ]*\n+ }mx',
                         array(&$this, '_doHeaders_callback_setext_h2'), $text);
  
                 # atx-style headers:
@@ -2067,12 +2100,12 @@ class MarkdownExtra_Parser extends Markdown_Parser {
                 #
                 $text = preg_replace_callback('{
                                 ^(\#{1,6})      # $1 = string of #\'s
-                               [ \t]*
+                               [ ]*
                                 (.+?)           # $2 = Header text
-                               [ \t]*
+                               [ ]*
                                 \#*                     # optional closing #\'s (not counted)
                                 (?:[ ]+\{\#([-_:a-zA-Z0-9]+)\})? # id attribute
-                               [ \t]*
+                               [ ]*
                                 \n+
                         }xm',
                         array(&$this, '_doHeaders_callback_atx'), $text);
@@ -2368,14 +2401,14 @@ class MarkdownExtra_Parser extends Markdown_Parser {
                                 (?=\S)                                  # Not followed by whitespace 
                                 (?!__)                                  #   or two others marker chars.
                                 (                                               # $2: Content
-                                       (?:
+                                       (?>
                                                 [^_]+?                  # Anthing not em markers.
                                         |
                                                                                 # Balence any regular _ emphasis inside.
                                                 (?<![a-zA-Z0-9]) _ (?=\S) (.+?) 
                                                 (?<=\S) _ (?![a-zA-Z0-9])
                                         |
-                                               ___+
+                                               _+                              # Allow unbalenced as last resort.
                                         )+?
                                 )
                                 (?<=\S) __                              # End mark not preceded by whitespace.
@@ -2387,11 +2420,13 @@ class MarkdownExtra_Parser extends Markdown_Parser {
                                 (?=\S)                                  # Not followed by whitespace 
                                 (?!\1)                                  #   or two others marker chars.
                                 (                                               # $2: Content
-                                       (?:
+                                       (?>
                                                 [^*]+?                  # Anthing not em markers.
                                         |
                                                                                 # Balence any regular * emphasis inside.
                                                 \* (?=\S) (.+?) (?<=\S) \*
+                                       |
+                                               \*                              # Allow unbalenced as last resort.
                                         )+?
                                 )
                                 (?<=\S) \*\*                    # End mark not preceded by whitespace.
@@ -2401,7 +2436,7 @@ class MarkdownExtra_Parser extends Markdown_Parser {
                 # Then <em>:
                 $text = preg_replace_callback(array(
                         '{ ( (?<![a-zA-Z0-9])(?<!_)_ ) (?=\S) (?! \1) (.+?) (?<=\S) \1(?![a-zA-Z0-9]) }sx',
-                       '{ ( (?<!\*)\* ) (?=\S) (?! \1) (.+?) (?<=\S) \1 }sx',
+                       '{ ( (?<!\*)\* ) (?=\S) (?! \1) (.+?) (?<=\S)(?<!\s\*) \1 }sx',
                         ),
                         array(&$this, '_doItalicAndBold_em_callback'), $text);
  
@@ -2428,7 +2463,7 @@ class MarkdownExtra_Parser extends Markdown_Parser {
                         # Check if this should be enclosed in a paragraph.
                         # Clean tag hashes & block tag hashes are left alone.
                         $clean_key = $value;
-                       $block_key = substr($value, 0, 32);
+                       $block_key = substr($value, 0, 34);
                         
                         $is_p = (!isset($this->html_blocks[$block_key]) && 
                                          !isset($this->html_cleans[$clean_key]));
@@ -2461,7 +2496,7 @@ class MarkdownExtra_Parser extends Markdown_Parser {
                 # Link defs are in the form: [^id]: url "optional title"
                 $text = preg_replace_callback('{
                         ^[ ]{0,'.$less_than_tab.'}\[\^(.+?)\][ ]?:      # note_id = $1
-                         [ \t]*
+                         [ ]*
                           \n?                                   # maybe *one* newline
                         (                                               # text = $2 (no blank lines allowed)
                                 (?:                                     
@@ -2479,7 +2514,7 @@ class MarkdownExtra_Parser extends Markdown_Parser {
                 return $text;
         }
         function _stripFootnotes_callback($matches) {
-               $note_id = $matches[1];
+               $note_id = $this->fn_id_prefix . $matches[1];
                 $this->footnotes[$note_id] = $this->outdent($matches[2]);
                 return ''; # String that will replace the block
         }
@@ -2490,7 +2525,9 @@ class MarkdownExtra_Parser extends Markdown_Parser {
         # Replace footnote references in $text [^id] with a special text-token 
         # which will be can be
         #
-               $text = preg_replace('{\[\^(.+?)\]}', "a\0fn:\\1\0z", $text);
+               if (!$this->in_footnote && !$this->in_anchor) {
+                       $text = preg_replace('{\[\^(.+?)\]}', "F\x1Afn:\\1\x1A:", $text);
+               }
                 return $text;
         }
  
@@ -2499,7 +2536,8 @@ class MarkdownExtra_Parser extends Markdown_Parser {
         #
         # Append footnote list to text.
         #
-               $text = preg_replace_callback('{a\0fn:(.*?)\0z}', 
+       
+               $text = preg_replace_callback('{F\x1Afn:(.*?)\x1A:}', 
                         array(&$this, '_appendFootnotes_callback'), $text);
         
                 if (!empty($this->footnotes_ordered)) {
@@ -2523,6 +2561,8 @@ class MarkdownExtra_Parser extends Markdown_Parser {
                         }
                         $num = 0;
                         
+                       $this->in_footnote = true;
+                       
                         foreach ($this->footnotes_ordered as $note_id => $footnote) {
                                 $footnote .= "\n"; # Need to append newline before parsing.
                                 $footnote = $this->runBlockGamut("$footnote\n");
@@ -2542,10 +2582,10 @@ class MarkdownExtra_Parser extends Markdown_Parser {
                                 $text .= "</li>\n\n";
                         }
                         
+                       $this->in_footnote = false;
+                       
                         $text .= "</ol>\n";
                         $text .= "</div>";
-                       
-                       $text = preg_replace('{a\{fn:(.*?)\}z}', '[^\\1]', $text);
                 }
                 return $text;
         }
@@ -2589,8 +2629,7 @@ class MarkdownExtra_Parser extends Markdown_Parser {
         
         function stripAbbreviations($text) {
         #
-       # Strips abbreviations from text, stores the URLs and titles in
-       # hash references.
+       # Strips abbreviations from text, stores titles in hash references.
         #
                 $less_than_tab = $this->tab_width - 1;
  
@@ -2614,12 +2653,16 @@ class MarkdownExtra_Parser extends Markdown_Parser {
         
         function doAbbreviations($text) {
         #
-       # Replace footnote references in $text [^id] with a link to the footnote.
+       # Find defined abbreviations in text and wrap them in <abbr> elements.
         #
                 if ($this->abbr_matches) {
-                       $regex = '{(?<!\w)(?:'. implode('|', $this->abbr_matches) .')(?!\w)}';
-       
-                       $text = preg_replace_callback($regex, 
+                       // cannot use the /x modifier because abbr_matches may 
+                       // contain spaces:
+                       $text = preg_replace_callback('{'.
+                               '(?<![\w\x1A])'.
+                               '(?:'. implode('|', $this->abbr_matches) .')'.
+                               '(?![\w\x1A])'.
+                               '}', 
                                 array(&$this, '_doAbbreviations_callback'), $text);
                 }
                 return $text;
@@ -2686,6 +2729,10 @@ Version History
  
  See Readme file for details.
  
+Extra 1.1.4 (3 Aug 2007):
+
+Extra 1.1.3 (3 Jul 2007):
+
  Extra 1.1.2 (7 Feb 2007)
  
  Extra 1.1.1 (28 Dec 2006)
author	thepurpleblob <thepurpleblob>
	Tue, 21 Aug 2007 09:54:49 +0000 (09:54 +0000)
committer	thepurpleblob <thepurpleblob>
	Tue, 21 Aug 2007 09:54:49 +0000 (09:54 +0000)