HotPot: refine detection of media urls in escaped javascript and <param> tags
authorgbateson <gbateson>
Fri, 16 Oct 2009 05:35:51 +0000 (05:35 +0000)
committergbateson <gbateson>
Fri, 16 Oct 2009 05:35:51 +0000 (05:35 +0000)
mod/hotpot/lib.php

index 5e2ec5b573ff3ff772ee22fa381ea22f3b25c642..53316c7ec56b0deb69e8f3f38f1e1bd7f8e0deb7 100644 (file)
@@ -2199,44 +2199,46 @@ class hotpot_xml_quiz extends hotpot_xml_tree {
             $quote = '["'."']?"; // single, double, or no quote
 
             // patterns to media files types and paths
-            $filetype = "avi|mpeg|mpg|mp3|mov|wmv|flv";
+            $filetypes = "avi|mpeg|mpg|mp3|mov|wmv|flv";
             if ($CFG->filter_mediaplugin_enable_swf) {
-                $filetype .= '|swf';
+                $filetypes .= '|swf';
             }
-            $filepath = ".*?\.($filetype)";
+            $filepath = '[^"'."']*".'\\.(?:'.$filetypes.')[^"'."']*";
 
             $tagopen = '(?:(<)|(\\\\u003C))'; // left angle-bracket (uses two parenthese)
+            $tagchars = '(?(1)[^>]|(?(2).(?!\\\\u003E)))*?';  // string of chars inside the tag
             $tagclose = '(?(1)>|(?(2)\\\\u003E))'; // right angle-bracket (to match the left one)
             $tagreopen = '(?(1)<|(?(2)\\\\u003C))'; // another left angle-bracket (to match the first one)
 
-            // pattern to match <PARAM> tags which contain the file path
+            // pattern to match <param> tags which contain the file path
+            $param_names = 'movie|src|url|flashvars';
             //  wmp        : url
             //  quicktime  : src
             //  realplayer : src
-            //  flash      : movie (doesn't need replacing)
-            $param_url = "/{$tagopen}param{$space}name=$quote(?:movie|src|url)$quote{$space}value=$quote($filepath)$quote.*?$tagclose/is";
+            //  flash      : movie, flashvars
+            $param_url = '/'.$tagopen.'param'.'\s'.$tagchars.'name="(?:'.$param_names.')"'.$tagchars.'value="('.$filepath.')"'.$tagchars.$tagclose.'/is';
 
             // pattern to match <a> tags which link to multimedia files
-            $link_url = "/{$tagopen}a{$space}href=$quote($filepath)$quote.*?$tagclose.*?$tagreopen\/a$tagclose/is";
+            $link_url = '/'.$tagopen.'a'.'\s'.$tagchars.'href="('.$filepath.')"'.$tagchars.$tagclose.'.*?'.$tagreopen.'\/a'.$tagclose.'/is';
 
             // extract <object> tags
-            preg_match_all("/{$tagopen}object.*?{$tagclose}(.*?)(?:{$tagreopen}\/object{$tagclose})+/is", $this->html, $objects);
+            $object_tag = '/'.$tagopen.'object'.'\s'.$tagchars.$tagclose.'(.*?)'.'(?:'.$tagreopen.'\/object'.$tagclose.')+/is';
+            preg_match_all($object_tag, $this->html, $objects);
 
             $i_max = count($objects[0]);
             for ($i=0; $i<$i_max; $i++) {
 
-                // extract URL from <PARAM> or <A> 
+                // extract URL from <param> or <a>
                 $url = '';
                 if (preg_match($param_url, $objects[3][$i], $matches) || preg_match($link_url, $objects[3][$i], $matches)) {
                     $url = $matches[3];
                 }
-
                 if ($url) {
                     // strip inner tags (e.g. <embed>)
                     $txt = preg_replace("/$tagopen.*?$tagclose/", '', $objects[3][$i]);
 
                     // if url is in the query string, remove the leading characters
-                    $url = preg_replace('/^[^?]*\?([^=]+=[^&]*&)*[^=]+=([^&]*)$/', '$2', $url, 1);
+                    $url = preg_replace('/^([^=]+=[^&]*&)*[^=]+=(http:[^&]*)$/', '$2', $url, 1);
                     $link = '<a href="'.$url.'">'.$txt.'</a>';
 
                     $new_object = hotpot_mediaplayer_moodle($this, $link);