MDL-12399, shorten_text() truncates all closing tags

author toyomoyo <toyomoyo>

Tue, 4 Dec 2007 05:10:12 +0000 (05:10 +0000)

committer toyomoyo <toyomoyo>

Tue, 4 Dec 2007 05:10:12 +0000 (05:10 +0000)
author toyomoyo <toyomoyo>
Tue, 4 Dec 2007 05:10:12 +0000 (05:10 +0000)
committer toyomoyo <toyomoyo>
Tue, 4 Dec 2007 05:10:12 +0000 (05:10 +0000)
diff --git a/lib/moodlelib.php b/lib/moodlelib.php

index afad8738dd6c7c1b01d25c467e5feddf1a87c67a..30be5066f8b1bb8e7eefd3682e3c29b5d7926bee 100644 (file)
--- a/lib/moodlelib.php
+++ b/lib/moodlelib.php
@@ -6345,62 +6345,114 @@ function random_string ($length=15) {
  /*
   * Given some text (which may contain HTML) and an ideal length,
   * this function truncates the text neatly on a word boundary if possible
+ * @param string $text - text to be shortened
+ * @param int $ideal - ideal string length
+ * @param boolean $exact if false, $text will not be cut mid-word
+ * @return string $truncate - shortened string 
   */
-function shorten_text($text, $ideal=30) {
  
-   global $CFG;
+function shorten_text($text, $ideal=30, $exact = false) {
  
+    global $CFG;
+    $ending = '...';
+
+    // if the plain text is shorter than the maximum length, return the whole text
+    if (strlen(preg_replace('/<.*?>/', '', $text)) <= $ideal) {
+        return $text;
+    }
+            
+    // splits all html-tags to scanable lines
+    preg_match_all('/(<.+?>)?([^<>]*)/s', $text, $lines, PREG_SET_ORDER);
+
+    $total_length = strlen($ending);
+    $open_tags = array();
+    $truncate = '';
+
+    foreach ($lines as $line_matchings) {
+        // if there is any html-tag in this line, handle it and add it (uncounted) to the output
+        if (!empty($line_matchings[1])) {
+            // if it's an "empty element" with or without xhtml-conform closing slash (f.e. <br/>)
+            if (preg_match('/^<(\s*.+?\/\s*|\s*(img|br|input|hr|area|base|basefont|col|frame|isindex|link|meta|param)(\s.+?)?)>$/is', $line_matchings[1])) {
+                    // do nothing
+            // if tag is a closing tag (f.e. </b>)
+            } else if (preg_match('/^<\s*\/([^\s]+?)\s*>$/s', $line_matchings[1], $tag_matchings)) {
+                // delete tag from $open_tags list
+                $pos = array_search($tag_matchings[1], array_reverse($open_tags, true)); // can have multiple exact same open tags, close the last one
+                if ($pos !== false) {
+                    unset($open_tags[$pos]);
+                }
+            // if tag is an opening tag (f.e. <b>)
+            } else if (preg_match('/^<\s*([^\s>!]+).*?>$/s', $line_matchings[1], $tag_matchings)) {
+                // add tag to the beginning of $open_tags list
+                array_unshift($open_tags, strtolower($tag_matchings[1]));
+            }
+            // add html-tag to $truncate'd text
+            $truncate .= $line_matchings[1];
+        }
+
+        // calculate the length of the plain text part of the line; handle entities as one character
+        $content_length = strlen(preg_replace('/&[0-9a-z]{2,8};|&#[0-9]{1,7};|&#x[0-9a-f]{1,6};/i', ' ', $line_matchings[2]));
+        if ($total_length+$content_length > $ideal) {
+            // the number of characters which are left
+            $left = $ideal - $total_length;
+            $entities_length = 0;
+            // search for html entities
+            if (preg_match_all('/&[0-9a-z]{2,8};|&#[0-9]{1,7};|&#x[0-9a-f]{1,6};/i', $line_matchings[2], $entities, PREG_OFFSET_CAPTURE)) {
+                // calculate the real length of all entities in the legal range
+                foreach ($entities[0] as $entity) {
+                    if ($entity[1]+1-$entities_length <= $left) {
+                        $left--;
+                        $entities_length += strlen($entity[0]);
+                    } else {
+                        // no more characters left
+                        break;
+                    }
+                }
+            }
+            $truncate .= substr($line_matchings[2], 0, $left+$entities_length);
+            // maximum lenght is reached, so get off the loop
+            break;
+        } else {
+            $truncate .= $line_matchings[2];
+            $total_length += $content_length;
+        }
+                
+        // if the maximum length is reached, get off the loop
+        if($total_length >= $ideal) {
+            break;
+        }
+    }
  
-   $i = 0;
-   $tag = false;
-   $length = strlen($text);
-   $count = 0;
-   $stopzone = false;
-   $truncate = 0;
-
-   if ($length <= $ideal) {
-       return $text;
-   }
-
-   for ($i=0; $i<$length; $i++) {
-       $char = $text[$i];
-
-       switch ($char) {
-           case "<":
-               $tag = true;
-               break;
-           case ">":
-               $tag = false;
-               break;
-           default:
-               if (!$tag) {
-                   if ($stopzone) {
-                       if ($char == '.' or $char == ' ') {
-                           $truncate = $i+1;
-                           break 2;
-                       } else if (ord($char) >= 0xE0) {  // Chinese/Japanese/Korean text
-                           $truncate = $i;               // can be truncated at any UTF-8
-                           break 2;                      // character boundary.
-                       }
-                   }
-                   $count++;
-               }
-               break;
-       }
-       if (!$stopzone) {
-           if ($count > $ideal) {
-               $stopzone = true;
-           }
-       }
-   }
+    // if the words shouldn't be cut in the middle...
+    if (!$exact) {
+        // ...search the last occurance of a space...
+               for ($k=strlen($truncate);$k>0;$k--) {
+            if (!empty($truncate[$k]) && ($char = $truncate[$k])) {
+                if ($char == '.' or $char == ' ') {
+                    $breakpos = $k+1;
+                    break;
+                } else if (ord($char) >= 0xE0) {  // Chinese/Japanese/Korean text
+                    $breakpos = $k;               // can be truncated at any UTF-8
+                    break;                        // character boundary.
+                }
+            }
+               }
+        
+               if (isset($breakpos)) {
+            // ...and cut the text in this position
+            $truncate = substr($truncate, 0, $breakpos);
+               }
+       }
  
-   if (!$truncate) {
-       $truncate = $i;
-   }
+    // add the defined ending to the text
+       $truncate .= $ending;
  
-   $ellipse = ($truncate < $length) ? '...' : '';
+    // close all unclosed html-tags
+    foreach ($open_tags as $tag) {
+        $truncate .= '</' . $tag . '>';
+    }
  
-   return substr($text, 0, $truncate).$ellipse;
+       return $truncate;
  }
author	toyomoyo <toyomoyo>
	Tue, 4 Dec 2007 05:10:12 +0000 (05:10 +0000)
committer	toyomoyo <toyomoyo>
	Tue, 4 Dec 2007 05:10:12 +0000 (05:10 +0000)