]> git.mjollnir.org Git - moodle.git/commitdiff
weblib MDL-21168 improved convert_urls_into_links
authorAndrew Davis <andrew@affinitysoftware.net>
Thu, 24 Dec 2009 06:44:51 +0000 (06:44 +0000)
committerAndrew Davis <andrew@affinitysoftware.net>
Thu, 24 Dec 2009 06:44:51 +0000 (06:44 +0000)
lib/simpletest/testweblib.php
lib/weblib.php

index 64dbd435e8797ab11ae744636bfc33413414acb2..1f26c1e655422e4e3e8b34ab288e50509dbe9103 100644 (file)
@@ -166,49 +166,88 @@ END;
     }
 
     function test_convert_urls_into_links() {
-        $texts = array (
+$texts = array (
+                     //just a url
+                     'http://moodle.org - URL' => '<a href="http://moodle.org" class="_blanktarget">http://moodle.org</a> - URL',
+                     'www.moodle.org - URL' => '<a href="http://www.moodle.org" class="_blanktarget">www.moodle.org</a> - URL',
+                     //url with params
                      'URL: http://moodle.org/s/i=1&j=2' => 'URL: <a href="http://moodle.org/s/i=1&j=2" class="_blanktarget">http://moodle.org/s/i=1&j=2</a>',
+                     //url with escaped params
                      'URL: www.moodle.org/s/i=1&amp;j=2' => 'URL: <a href="http://www.moodle.org/s/i=1&amp;j=2" class="_blanktarget">www.moodle.org/s/i=1&amp;j=2</a>',
+                     //https url with params
                      'URL: https://moodle.org/s/i=1&j=2' => 'URL: <a href="https://moodle.org/s/i=1&j=2" class="_blanktarget">https://moodle.org/s/i=1&j=2</a>',
+                     //url with port and params
                      'URL: http://moodle.org:8080/s/i=1' => 'URL: <a href="http://moodle.org:8080/s/i=1" class="_blanktarget">http://moodle.org:8080/s/i=1</a>',
-                     'http://moodle.org - URL' => '<a href="http://moodle.org" class="_blanktarget">http://moodle.org</a> - URL',
-                     'www.moodle.org - URL' => '<a href="http://www.moodle.org" class="_blanktarget">www.moodle.org</a> - URL',
+                     //url in brackets
                      '(http://moodle.org) - URL' => '(<a href="http://moodle.org" class="_blanktarget">http://moodle.org</a>) - URL',
                      '(www.moodle.org) - URL' => '(<a href="http://www.moodle.org" class="_blanktarget">www.moodle.org</a>) - URL',
+                     //url in square brackets
                      '[http://moodle.org] - URL' => '[<a href="http://moodle.org" class="_blanktarget">http://moodle.org</a>] - URL',
                      '[www.moodle.org] - URL' => '[<a href="http://www.moodle.org" class="_blanktarget">www.moodle.org</a>] - URL',
+                     //url in brackets with anchor
                      '[http://moodle.org/main#anchor] - URL' => '[<a href="http://moodle.org/main#anchor" class="_blanktarget">http://moodle.org/main#anchor</a>] - URL',
                      '[www.moodle.org/main#anchor] - URL' => '[<a href="http://www.moodle.org/main#anchor" class="_blanktarget">www.moodle.org/main#anchor</a>] - URL',
+                     //brackets within the url
                      'URL: http://cc.org/url_(withpar)_go/?i=2' => 'URL: <a href="http://cc.org/url_(withpar)_go/?i=2" class="_blanktarget">http://cc.org/url_(withpar)_go/?i=2</a>',
                      'URL: www.cc.org/url_(withpar)_go/?i=2' => 'URL: <a href="http://www.cc.org/url_(withpar)_go/?i=2" class="_blanktarget">www.cc.org/url_(withpar)_go/?i=2</a>',
                      'URL: http://cc.org/url_(with)_(par)_go/?i=2' => 'URL: <a href="http://cc.org/url_(with)_(par)_go/?i=2" class="_blanktarget">http://cc.org/url_(with)_(par)_go/?i=2</a>',
                      'URL: www.cc.org/url_(with)_(par)_go/?i=2' => 'URL: <a href="http://www.cc.org/url_(with)_(par)_go/?i=2" class="_blanktarget">www.cc.org/url_(with)_(par)_go/?i=2</a>',
+                     'http://en.wikipedia.org/wiki/Slash_(punctuation)'=>'<a href="http://en.wikipedia.org/wiki/Slash_(punctuation)" class="_blanktarget">http://en.wikipedia.org/wiki/Slash_(punctuation)</a>',
+                     'http://en.wikipedia.org/wiki/%28#Parentheses_.28_.29 - URL' => '<a href="http://en.wikipedia.org/wiki/%28#Parentheses_.28_.29" class="_blanktarget">http://en.wikipedia.org/wiki/%28#Parentheses_.28_.29</a> - URL',
+                     'http://en.wikipedia.org/wiki/(#Parentheses_.28_.29 - URL' => '<a href="http://en.wikipedia.org/wiki/(#Parentheses_.28_.29" class="_blanktarget">http://en.wikipedia.org/wiki/(#Parentheses_.28_.29</a> - URL',
+                     //escaped brackets in url
+                     'http://en.wikipedia.org/wiki/Slash_%28punctuation%29'=>'<a href="http://en.wikipedia.org/wiki/Slash_%28punctuation%29" class="_blanktarget">http://en.wikipedia.org/wiki/Slash_%28punctuation%29</a>',
+                     //anchor tag
                      'URL: <a href="http://moodle.org">http://moodle.org</a>' => 'URL: <a href="http://moodle.org">http://moodle.org</a>',
                      'URL: <a href="http://moodle.org">www.moodle.org</a>' => 'URL: <a href="http://moodle.org">www.moodle.org</a>',
                      'URL: <a href="http://moodle.org"> http://moodle.org</a>' => 'URL: <a href="http://moodle.org"> http://moodle.org</a>',
                      'URL: <a href="http://moodle.org"> www.moodle.org</a>' => 'URL: <a href="http://moodle.org"> www.moodle.org</a>',
+                     //escaped anchor tag
+                     htmlspecialchars('escaped anchor tag <a href="http://moodle.org">www.moodle.org</a>') => 'escaped anchor tag &lt;a href="http://moodle.org"&gt; www.moodle.org&lt;/a&gt;',
+                     //trailing fullstop
                      'URL: http://moodle.org/s/i=1&j=2.' => 'URL: <a href="http://moodle.org/s/i=1&j=2" class="_blanktarget">http://moodle.org/s/i=1&j=2</a>.',
                      'URL: www.moodle.org/s/i=1&amp;j=2.' => 'URL: <a href="http://www.moodle.org/s/i=1&amp;j=2" class="_blanktarget">www.moodle.org/s/i=1&amp;j=2</a>.',
+                     //trailing unmatched bracket
                      'URL: http://moodle.org)<br />' => 'URL: <a href="http://moodle.org" class="_blanktarget">http://moodle.org</a>)<br />',
+                     //partially escaped html
                      'URL: <p>text www.moodle.org&lt;/p> text' => 'URL: <p>text <a href="http://www.moodle.org" class="_blanktarget">www.moodle.org</a>&lt;/p> text',
+                     //decimal url parameter
                      'URL: www.moodle.org?u=1.23' => 'URL: <a href="http://www.moodle.org?u=1.23" class="_blanktarget">www.moodle.org?u=1.23</a>',
+                     //escaped space in url
                      'URL: www.moodle.org?u=test+param&' => 'URL: <a href="http://www.moodle.org?u=test+param&" class="_blanktarget">www.moodle.org?u=test+param&</a>',
+                     //odd characters in url param
                      'URL: www.moodle.org?param=:)' => 'URL: <a href="http://www.moodle.org?param=:)" class="_blanktarget">www.moodle.org?param=:)</a>',
+                     //multiple urls
                      'URL: http://moodle.org www.moodle.org'
                      => 'URL: <a href="http://moodle.org" class="_blanktarget">http://moodle.org</a> <a href="http://www.moodle.org" class="_blanktarget">www.moodle.org</a>',
+                     //containing anchor tags including a class parameter and a url to convert
                      'URL: <a href="http://moodle.org">http://moodle.org</a> www.moodle.org <a class="customclass" href="http://moodle.org">http://moodle.org</a>'
                      => 'URL: <a href="http://moodle.org">http://moodle.org</a> <a href="http://www.moodle.org" class="_blanktarget">www.moodle.org</a> <a class="customclass" href="http://moodle.org">http://moodle.org</a>',
+                     //subdomain
                      'http://subdomain.moodle.org - URL' => '<a href="http://subdomain.moodle.org" class="_blanktarget">http://subdomain.moodle.org</a> - URL',
+                     //multiple subdomains
                      'http://subdomain.subdomain.moodle.org - URL' => '<a href="http://subdomain.subdomain.moodle.org" class="_blanktarget">http://subdomain.subdomain.moodle.org</a> - URL',
+                     //looks almost like a link but isnt
                      'This contains http, http:// and www but no actual links.'=>'This contains http, http:// and www but no actual links.',
+                     //no link at all
                      'This is a story about moodle.coming to a cinema near you.'=>'This is a story about moodle.coming to a cinema near you.',
-                     'http://en.wikipedia.org/wiki/Slash_%28punctuation%29'=>'<a href="http://en.wikipedia.org/wiki/Slash_%28punctuation%29" class="_blanktarget">http://en.wikipedia.org/wiki/Slash_%28punctuation%29</a>',
-                     'http://en.wikipedia.org/wiki/Slash_(punctuation)'=>'<a href="http://en.wikipedia.org/wiki/Slash_(punctuation)" class="_blanktarget">http://en.wikipedia.org/wiki/Slash_(punctuation)</a>',
-                     'http://en.wikipedia.org/wiki/%28#Parentheses_.28_.29 - URL' => '<a href="http://en.wikipedia.org/wiki/%28#Parentheses_.28_.29" class="_blanktarget">http://en.wikipedia.org/wiki/%28#Parentheses_.28_.29</a> - URL',
-                     'http://en.wikipedia.org/wiki/(#Parentheses_.28_.29 - URL' => '<a href="http://en.wikipedia.org/wiki/(#Parentheses_.28_.29" class="_blanktarget">http://en.wikipedia.org/wiki/(#Parentheses_.28_.29</a> - URL',
+                     //utf 8 characters
                      'http://Iñtërnâtiônàlizætiøn.com?ô=nëø'=>'<a href="http://Iñtërnâtiônàlizætiøn.com?ô=nëø" class="_blanktarget">http://Iñtërnâtiônàlizætiøn.com?ô=nëø</a>',
                      'www.Iñtërnâtiônàlizætiøn.com?ô=nëø'=>'<a href="http://www.Iñtërnâtiônàlizætiøn.com?ô=nëø" class="_blanktarget">www.Iñtërnâtiônàlizætiøn.com?ô=nëø</a>',
-                     'moodle.org' => 'moodle.org',//too hard to identify without additional regexs
+                     //too hard to identify without additional regexs
+                     'moodle.org' => 'moodle.org',
+                     //some text with no link between related html tags
+                     '<b>no link here</b>' => '<b>no link here</b>',
+                     //some text with a link between related html tags
+                     '<b>a link here www.moodle.org</b>' => '<b>a link here <a href="http://www.moodle.org" class="_blanktarget">www.moodle.org</a></b>',
+                     //some text containing a link within unrelated tags
+                     '<br />This is some text. www.moodle.com then some more text<br />' => '<br />This is some text. <a href="http://www.moodle.com" class="_blanktarget">www.moodle.com</a> then some more text<br />',
+                     //check we aren't modifying img tags
+                     'image<img src="http://moodle.org/logo/logo-240x60.gif" />' => 'image<img src="http://moodle.org/logo/logo-240x60.gif" />',
+                     //partially escaped img tag
+                     'partially escaped img tag &lt;img src="http://moodle.org/logo/logo-240x60.gif" />' => 'partially escaped img tag &lt;img src="http://moodle.org/logo/logo-240x60.gif" />',
+                     //fully escaped img tag
+                     htmlspecialchars('fully escaped img tag <img src="http://moodle.org/logo/logo-240x60.gif" />') => 'fully escaped img tag &lt;img src="http://moodle.org/logo/logo-240x60.gif" /&gt;',
                  );
        foreach ($texts as $text => $correctresult) {
             if(mb_detect_encoding($text)=='UTF-8') {
@@ -232,6 +271,7 @@ END;
             $this->assertEqual($text, $correctresult, $msg);
         }
 
+        //performance testing
         $reps = 1000;
 
         $time_start = microtime(true);
index 3df5258c44c60c30ef9636f13719be9622438f86..b99b5e141343ae5b786715997943d667a61d0cee 100644 (file)
@@ -1720,10 +1720,16 @@ function html_to_text($html) {
  * @param string $text Passed in by reference. The string to be searched for urls.
  */
 function convert_urls_into_links(&$text) {
-    $filterignoretagsopen  = array('<a\s[^>]+?>');
-    $filterignoretagsclose = array('</a>');
+    //I've added img tags to this list of tags to ignore.
+    //See MDL-21168 for more info. A better way to ignore tags whether or not
+    //they are escaped partially or completely would be desirable. For example:
+    //<a href="blah">
+    //&lt;a href="blah"&gt;
+    //&lt;a href="blah">
+    $filterignoretagsopen  = array('<a\s[^>]+?>', '<img\s[^>]+?>');
+    $filterignoretagsclose = array('</a>','');
     filter_save_ignore_tags($text,$filterignoretagsopen,$filterignoretagsclose,$ignoretags);
-    
+
     // Check if we support unicode modifiers in regular expressions. Cache it.
     // TODO: this check should be a environment requirement in Moodle 2.0, as far as unicode
     // chars are going to arrive to URLs officially really soon (2010?)