From b4cf937102ba3117255c1920c4d364274b39d481 Mon Sep 17 00:00:00 2001 From: tjhunt Date: Wed, 1 Apr 2009 00:39:17 +0000 Subject: [PATCH] translation: MDL-18766 Prototype automatic translation system for Moodle. If Google can do something like http://translate.google.com/, surely we can do something just as good in Moodle. Here is a first attempt. It is a bit rough around the edges, and only a couple of target lananguages have been implemented so far. (More and better attempts welcome!) In order to ensure that this gets adequate testing, I have made sure that it gets turn on by default. However, if you really don't like it, you can turn it off under Administration > Development -> Experimental settings. --- admin/index.php | 1 + admin/settings/development.php | 1 + lang/en_utf8/autotranslate.php | 8 + lib/autotranslatelib.php | 218 ++++++++++++++++++++++++ lib/moodlelib.php | 3 + lib/setup.php | 1 + lib/simpletest/testautotranslatelib.php | 132 ++++++++++++++ lib/weblib.php | 20 +-- 8 files changed, 372 insertions(+), 12 deletions(-) create mode 100644 lang/en_utf8/autotranslate.php create mode 100644 lib/autotranslatelib.php create mode 100644 lib/simpletest/testautotranslatelib.php diff --git a/admin/index.php b/admin/index.php index fa2e7632f8..1c67f01ef9 100644 --- a/admin/index.php +++ b/admin/index.php @@ -323,6 +323,7 @@ if (empty($CFG->rolesactive)) { set_config('rolesactive', 1); set_config('adminsetuppending', 1); + unset_config('autotranslatetolang'); // we neeed this redirect to setup proper session upgrade_finished("index.php?sessionstarted=1&lang=$CFG->lang"); } diff --git a/admin/settings/development.php b/admin/settings/development.php index 484cf51a07..e6165027cb 100644 --- a/admin/settings/development.php +++ b/admin/settings/development.php @@ -14,6 +14,7 @@ if ($hassiteconfig) { // speedup for non-admins, add all caps used on this page $item->set_updatedcallback('reset_text_filters_cache'); $temp->add($item); $temp->add(new admin_setting_configcheckbox('experimentalsplitrestore', get_string('experimentalsplitrestore', 'admin'), get_string('configexperimentalsplitrestore', 'admin'), 0)); + $temp->add(new admin_setting_configselect('autotranslatetolang', get_string('autotranslate', 'autotranslate'), get_string('configautotranslate', 'autotranslate'), 'null', auto_translate_target_languages())); $ADMIN->add('experimental', $temp); diff --git a/lang/en_utf8/autotranslate.php b/lang/en_utf8/autotranslate.php new file mode 100644 index 0000000000..a8838a92bb --- /dev/null +++ b/lang/en_utf8/autotranslate.php @@ -0,0 +1,8 @@ + \ No newline at end of file diff --git a/lib/autotranslatelib.php b/lib/autotranslatelib.php new file mode 100644 index 0000000000..fa1b6796b1 --- /dev/null +++ b/lib/autotranslatelib.php @@ -0,0 +1,218 @@ +autotranslatetolang)) { + if (empty($CFG->rolesactive)) { + $CFG->autotranslatetolang = 'null'; + } else { + $langs = array_keys(auto_translate_target_languages()); + array_shift($langs); + $lang = $langs[mt_rand(0, count($langs) - 1)]; + set_config('autotranslatetolang', $lang); + } + } + $translator = translator_factory::instance()->get_translator($CFG->autotranslatetolang); + return $translator->translate_content($content); +} + +function is_untranslatable_string($identifier, $module) { + global $CFG; + return $module == 'autotranslate' || $module == 'langconfig' || empty($CFG->rolesactive); +} + +/** + * Get a list of languages we know how to automatically translate into. + * @return array language code => human readable name. + */ +function auto_translate_target_languages() { + static $list = null; + if (!is_null($list)) { + return $list; + } + $codes = array('null', 'cs_ps', 'en_nz_pl'); + $list = array(); + foreach ($codes as $code) { + $name = get_string('targetlang_' . $code, 'autotranslate'); + if (substr($name, 0, 2) != '[[') { + $name = base64_decode($name); + } + $list[$code] = $name; + } + return $list; +} + +/** + * Singleton class that gets the right auto_translator for a target language. + */ +class translator_factory { + private static $instance = null; + private $translators = array(); + + protected function __constructor() { + } + + public static function instance() { + if (is_null(self::$instance)) { + self::$instance = new translator_factory(); + } + return self::$instance; + } + + public static function get_translator($lang) { + if (empty($lang)) { + $lang = 'null'; + } + if (isset($translators[$lang])) { + return $translators[$lang]; + } + $classname = $lang . '_auto_translator'; + if (strpos(print_backtrace(debug_backtrace(), true), 'database') !== false || + strpos(print_backtrace(debug_backtrace(), true), 'print_error') !== false) { + $classname = 'null_auto_translator'; + } + if (!class_exists($classname)) { + throw new moodle_exception(); + } + $translators[$lang] = new $classname; + return $translators[$lang]; + } +} + +interface auto_translator { + public function translate_content($content); +} + +class null_auto_translator implements auto_translator { + public function translate_content($content) { + return $content; + } +} + +abstract class word_by_word_translator implements auto_translator { + public function translate_content($content) { + $parsedcontent = $this->split_text_and_tags($content); + foreach ($parsedcontent as $key => $item) { + if ($item->type == 'text') { + $parsedcontent[$key]->content = $this->translate_text($item->content); + } + } + return $this->join_content($parsedcontent); + } + + protected function split_text_and_tags($content) { + $bits = preg_split('/((?:<[^#%*>][^>]*>|&\w+;|&#\d+;|&#[xX][0-9a-fA-F]+;)+)/', $content, -1, PREG_SPLIT_DELIM_CAPTURE); + $parsedcontent = array(); + foreach ($bits as $index => $bit) { + $item = new stdClass; + $item->content = $bit; + if ($index % 2) { + $item->type = 'tag'; + } else { + $item->type = 'text'; + } + $parsedcontent[] = $item; + } + return $parsedcontent; + } + + protected function translate_text($text) { + $wordsandbreaks = preg_split('/\b/', $text); + foreach ($wordsandbreaks as $index => $word) { + if (preg_match('/\w+/', $word)) { + $wordsandbreaks[$index] = $this->translate_word($word); + } + } + return implode('', $wordsandbreaks); + } + + protected function join_content($content) { + $out = ''; + foreach ($content as $item) { + $out .= $item->content; + } + return $out; + } + + abstract protected function translate_word($word); +} + +class reverse_auto_translator extends word_by_word_translator { + protected function translate_word($word) { + return strrev($word); + } +} + +class cs_ps_auto_translator extends word_by_word_translator { + protected function translate_word($word) { + $len = strlen($word); + if ($len == 0) { + return ''; + } + $newword = chr(71); + if ($len >= 2) { + $end = round(($len - 2) / 5); + $newword .= str_repeat(chr(114), $len - $end - 1); + $newword .= str_repeat(chr(33), $end); + } + return $newword; + } +} + +class en_nz_pl_auto_translator extends word_by_word_translator { + private $library = null; + private $librarylen; + private function ensure_library_loaded() { + if (is_null($this->library)) { + $this->library = unserialize(base64_decode( + 'YTo5OntpOjA7czozOiJjYXQiO2k6MTtzOjQ6InBvbnkiO2k6MjtzOjQ6InJh' . + 'Z2UiO2k6MztzOjU6Im5pbmphIjtpOjQ7czo1OiJhbmdyeSI7aTo1O3M6Njoi' . + 'ZmllcmNlIjtpOjY7czo2OiJjb2ZmZWUiO2k6NztzOjc6ImNhZmZpbmUiO2k6' . + 'ODtzOjY6Im1haGFyYSI7fQ==')); + $this->librarylen = count($this->library); + } + } + public function translate_word($word) { + $len = strlen($word); + if ($len == 0) { + return ''; + } + $this->ensure_library_loaded(); + return $this->library[($len - 1) % $this->librarylen]; + } +} diff --git a/lib/moodlelib.php b/lib/moodlelib.php index 0869278036..3b061be620 100644 --- a/lib/moodlelib.php +++ b/lib/moodlelib.php @@ -5535,6 +5535,9 @@ class string_manager { foreach (array('_local', '') as $suffix) { $file = $location . $lang . $suffix . '/' . $module . '.php'; if ($result = $this->get_string_from_file($identifier, $file, $a)) { + if (!is_untranslatable_string($identifier, $module)) { + $result = auto_translate_content($result); + } return $result; } } diff --git a/lib/setup.php b/lib/setup.php index 7eac4a0d55..471930add0 100644 --- a/lib/setup.php +++ b/lib/setup.php @@ -158,6 +158,7 @@ global $SCRIPT; require_once($CFG->libdir .'/eventslib.php'); // Events functions require_once($CFG->libdir .'/grouplib.php'); // Groups functions require_once($CFG->libdir .'/sessionlib.php'); // All session and cookie related stuff + require_once($CFG->libdir .'/autotranslatelib.php');// Other general-purpose functions //point pear include path to moodles lib/pear so that includes and requires will search there for files before anywhere else //the problem is that we need specific version of quickforms and hacked excel files :-( diff --git a/lib/simpletest/testautotranslatelib.php b/lib/simpletest/testautotranslatelib.php new file mode 100644 index 0000000000..e4d7f5d3e7 --- /dev/null +++ b/lib/simpletest/testautotranslatelib.php @@ -0,0 +1,132 @@ +libdir . '/autotranslatelib.php'); + +class test_null_auto_translator extends UnitTestCase { + public function test_translate_content() { + $translator = new null_auto_translator; + $some_content = 'some content'; + $this->assertEqual($translator->translate_content($some_content), $some_content); + } +} + +class testable_word_by_word_translator extends word_by_word_translator { + public function split_text_and_tags($content) { + return parent::split_text_and_tags($content); + } + public function translate_text($text) { + return parent::translate_text($text); + } + public function join_content($content) { + return parent::join_content($content); + } + public function translate_word($word) { + return 'word'; + } +} + +class test_word_by_word_translator extends UnitTestCase { + private $wwt; + + public function setUp() { + $this->wwt = new testable_word_by_word_translator(); + } + + public function test_split_text_and_tags_simple() { + $parsedcontent = $this->wwt->split_text_and_tags('Some text.'); + $expected = array( + (object) array('content' => 'Some text.', 'type' => 'text'), + ); + $this->assertEqual($expected, $parsedcontent); + } + + public function test_split_text_and_tags_entity_uc() { + $parsedcontent = $this->wwt->split_text_and_tags('Hiઠworld!'); + $expected = array( + (object) array('content' => 'Hi', 'type' => 'text'), + (object) array('content' => 'ઠ', 'type' => 'tag'), + (object) array('content' => 'world!', 'type' => 'text'), + ); + $this->assertEqual($expected, $parsedcontent); + } + + public function test_split_text_and_tags_complex_html() { + $parsedcontent = $this->wwt->split_text_and_tags('
This & that '); + $expected = array( + (object) array('content' => '', 'type' => 'text'), + (object) array('content' => '
', 'type' => 'tag'), + (object) array('content' => 'This ', 'type' => 'text'), + (object) array('content' => '&', 'type' => 'tag'), + (object) array('content' => ' ', 'type' => 'text'), + (object) array('content' => '', 'type' => 'tag'), + (object) array('content' => 'that', 'type' => 'text'), + (object) array('content' => ' ', 'type' => 'tag'), + (object) array('content' => '', 'type' => 'text'), + ); + $this->assertEqual($expected, $parsedcontent); + } + + public function test_translate_text() { + $this->assertEqual('word *word* word word (word) word!', + $this->wwt->translate_text('This *is* some text (rough) content!')); + } + + public function test_translate_text_empty() { + $this->assertEqual('', $this->wwt->translate_text('')); + } + + public function test_join_content() { + $this->assertEqual('Test <->', $this->wwt->join_content(array( + (object) array('content' => 'Tes'), + (object) array('content' => 't <'), + (object) array('content' => '->'), + ))); + } +} + +class test_reverse_auto_translator extends UnitTestCase { + private $translator; + + public function setUp() { + $this->translator = new reverse_auto_translator(); + } + + public function test_translate_content() { + $this->assertEqual('
sihT & taht ', + $this->translator->translate_content('
This & that ')); + } +} diff --git a/lib/weblib.php b/lib/weblib.php index ce82070c59..3fdc880c4e 100644 --- a/lib/weblib.php +++ b/lib/weblib.php @@ -113,21 +113,16 @@ $ALLOWED_PROTOCOLS = array('http', 'https', 'ftp', 'news', 'mailto', 'rtsp', 'te * This function is very similar to {@link p()} * * @param string $var the string potentially containing HTML characters - * @param boolean $strip to decide if we want to strip slashes or no. Default to false. - * true should be used to print data from forms and false for data from DB. + * @param boolean $obsolete no longer used. * @return string */ -function s($var, $strip=false) { +function s($var, $obsolete = false) { if ($var == '0') { // for integer 0, boolean false, string '0' return '0'; } - if ($strip) { - return preg_replace("/&(#\d+);/i", "&$1;", htmlspecialchars($var)); - } else { - return preg_replace("/&(#\d+);/i", "&$1;", htmlspecialchars($var)); - } + return auto_translate_content(preg_replace("/&(#\d+);/i", "&$1;", htmlspecialchars($var))); } /** @@ -137,12 +132,11 @@ function s($var, $strip=false) { * This function is very similar to {@link s()} * * @param string $var the string potentially containing HTML characters - * @param boolean $strip to decide if we want to strip slashes or no. Default to false. - * true should be used to print data from forms and false for data from DB. + * @param boolean $obsolete no longer used. * @return string */ -function p($var, $strip=false) { - echo s($var, $strip); +function p($var, $obsolete = false) { + echo s($var, $obsolete); } /** @@ -1378,6 +1372,7 @@ function format_text($text, $format=FORMAT_MOODLE, $options=NULL, $courseid=NULL } break; } + $text = auto_translate_content($text); if (empty($options->nocache) and !empty($CFG->cachetext) and $CFG->currenttextiscacheable) { if (CLI_SCRIPT) { @@ -1506,6 +1501,7 @@ function format_string ($string, $striplinks=true, $courseid=NULL ) { } $string = clean_text($string); } + $string = auto_translate_content($string); //Store to cache $strcache[$md5] = $string; -- 2.39.5