From 4e2cb0e3f3e13f0630f5e63767fa511cf899eb41 Mon Sep 17 00:00:00 2001 From: stronk7 Date: Wed, 13 Jun 2007 19:43:56 +0000 Subject: [PATCH] Added entities_to_utf8() function to textlib, suitable to convert any entity (numerical and html) to utf-8, providing a PHP4 and PH5 way to do the work. Also, added the code2utf8() function to convert Unicode code numbers to UTF-8 chars. Merged from MOODLE_18_STABLE --- lib/textlib.class.php | 60 +++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 60 insertions(+) diff --git a/lib/textlib.class.php b/lib/textlib.class.php index fb833154d4..19e27d8b52 100644 --- a/lib/textlib.class.php +++ b/lib/textlib.class.php @@ -299,6 +299,43 @@ class textlib { return $encoded; } + /** + * Converts all the numeric entities &#nnnn; or &#xnnn; to UTF-8 + * Original from laurynas dot butkus at gmail at: + * http://php.net/manual/en/function.html-entity-decode.php#75153 + * with some custom mods to provide more functionality + * + * @param string $str input string + * @param boolean $htmlent convert also html entities (defaults to true) + * + * NOTE: we could have used typo3 entities_to_utf8() here + * but the direct alternative used runs 400% quicker + * and uses 0.5Mb less memory, so, let's use it + * (tested agains 10^6 conversions) + */ + function entities_to_utf8($str, $htmlent=true) { + + static $trans_tbl; /// Going to use static translit table + + /// Replace numeric entities + $result = preg_replace('~&#x([0-9a-f]+);~ei', 'textlib::code2utf8(hexdec("\\1"))', $str); + $result = preg_replace('~&#([0-9]+);~e', 'textlib::code2utf8(\\1)', $result); + + /// Replace literal entities (if desired) + if ($htmlent) { + /// Generate/create $trans_tbl + if (!isset($trans_tbl)) { + $trans_tbl = array(); + foreach (get_html_translation_table(HTML_ENTITIES) as $val=>$key) { + $trans_tbl[$key] = utf8_encode($val); + } + } + $result = strtr($result, $trans_tbl); + } + /// Return utf8-ised string + return $result; + } + /** * Converts all Unicode chars > 127 to numeric entities &#nnnn; or &#xnnn;. * @@ -353,5 +390,28 @@ class textlib { } return $encodings; } + + /** + * Returns the utf8 string corresponding to the unicode value + * (from php.net, courtesy - romans@void.lv) + * + * @param int $num one unicode value + * @return string the UTF-8 char corresponding to the unicode value + */ + function code2utf8($num) { + if ($num < 128) { + return chr($num); + } + if ($num < 2048) { + return chr(($num >> 6) + 192) . chr(($num & 63) + 128); + } + if ($num < 65536) { + return chr(($num >> 12) + 224) . chr((($num >> 6) & 63) + 128) . chr(($num & 63) + 128); + } + if ($num < 2097152) { + return chr(($num >> 18) + 240) . chr((($num >> 12) & 63) + 128) . chr((($num >> 6) & 63) + 128) . chr(($num & 63) + 128); + } + return ''; + } } ?> -- 2.39.5