From: garvinhicking Date: Fri, 27 Oct 2006 09:18:19 +0000 (+0000) Subject: Fix import UTF-8 bug, thanks to jan X-Git-Tag: 1.1~58 X-Git-Url: http://git.mjollnir.org/gw?a=commitdiff_plain;h=b7660ccc4f7da489d492cc2cbe2dc8f884d32402;p=s9y.git Fix import UTF-8 bug, thanks to jan --- diff --git a/docs/NEWS b/docs/NEWS index e64f1bf..0ff5e0d 100644 --- a/docs/NEWS +++ b/docs/NEWS @@ -3,6 +3,10 @@ Version 1.1 () ------------------------------------------------------------------------ + * Fixed bug that prevented native imports from other blog system + to recode ISO-charsets into UTF-8. Major thanks to Jan of + blog.salid.de. (garvinhicking) + * Added functionality to reply to comments in the admin interface (garvinhicking) diff --git a/include/admin/entries.inc.php b/include/admin/entries.inc.php index e7d0610..e7098cb 100644 --- a/include/admin/entries.inc.php +++ b/include/admin/entries.inc.php @@ -309,7 +309,7 @@ function serendipity_drawList() { - + diff --git a/include/admin/import.inc.php b/include/admin/import.inc.php index 1c69768..bbe6240 100644 --- a/include/admin/import.inc.php +++ b/include/admin/import.inc.php @@ -17,8 +17,8 @@ if (function_exists('set_time_limit')) { /* Class construct. Each importer plugin must extend this class. */ class Serendipity_Import { - var $trans_table = ''; - + var $trans_table = ''; + var $force_recode = true; /** * Return textual notes of an importer plugin * @@ -68,8 +68,8 @@ class Serendipity_Import { * @return string converted string */ function &decode($string) { - // xml_parser_* functions to recoding from ISO-8859-1/UTF-8 - if (LANG_CHARSET == 'ISO-8859-1' || LANG_CHARSET == 'UTF-8') { + // xml_parser_* functions do recoding from ISO-8859-1/UTF-8 + if (!$this->force_recode && (LANG_CHARSET == 'ISO-8859-1' || LANG_CHARSET == 'UTF-8')) { return $string; } @@ -84,6 +84,8 @@ class Serendipity_Import { $out = iconv('ISO-8859-1', LANG_CHARSET, $string); } elseif (function_exists('recode')) { $out = recode('iso-8859-1..' . LANG_CHARSET, $string); + } elseif (LANG_CHARSET == 'UTF-8') { + return utf8_encode($string); } else { return $string; } diff --git a/include/admin/importers/generic.inc.php b/include/admin/importers/generic.inc.php index 66b6547..fe38f21 100644 --- a/include/admin/importers/generic.inc.php +++ b/include/admin/importers/generic.inc.php @@ -8,6 +8,7 @@ class Serendipity_Import_Generic extends Serendipity_Import { var $info = array('software' => IMPORT_GENERIC_RSS); var $data = array(); var $inputFields = array(); + var $force_recode = false; function Serendipity_Import_Generic($data) { $this->data = $data; diff --git a/include/admin/importers/livejournal.inc.php b/include/admin/importers/livejournal.inc.php new file mode 100644 index 0000000..daa9cce --- /dev/null +++ b/include/admin/importers/livejournal.inc.php @@ -0,0 +1,216 @@ + 'LiveJournal XML'); + var $data = array(); + var $inputFields = array(); + var $force_recode = false; + + function Serendipity_Import_LiveJournalXML($data) { + global $serendipity; + $this->data = $data; + $this->inputFields = array(array('text' => 'LiveJournal XML', + 'type' => 'input', + 'name' => 'url', + 'default' => $serendipity['serendipityPath'] . $serendipity['uploadPath'] . 'EVbackup.xml'), + + array('text' => RSS_IMPORT_CATEGORY, + 'type' => 'list', + 'name' => 'category', + 'value' => 0, + 'default' => $this->_getCategoryList()), + + array('text' => STATUS, + 'type' => 'list', + 'name' => 'type', + 'value' => 'publish', + 'default' => array('publish' => PUBLISH, 'draft' => DRAFT)), + + ); + } + + function _getCategoryList() { + $res = serendipity_fetchCategories('all'); + $ret = array(0 => NO_CATEGORY); + if (is_array($res)) { + foreach ($res as $v) { + $ret[$v['categoryid']] = $v['category_name']; + } + } + return $ret; + } + + function GetChildren(&$vals, &$i) { + $children = array(); + $cnt = sizeof($vals); + while (++$i < $cnt) { + // compare type + switch ($vals[$i]['type']) { + case 'cdata': + $children[] = $vals[$i]['value']; + break; + + case 'complete': + $children[] = array( + 'tag' => $vals[$i]['tag'], + 'attributes' => $vals[$i]['attributes'], + 'value' => $vals[$i]['value'] + ); + break; + + case 'open': + $children[] = array( + 'tag' => $vals[$i]['tag'], + 'attributes' => $vals[$i]['attributes'], + 'value' => $vals[$i]['value'], + 'children' => $this->GetChildren($vals, $i) + ); + break; + + case 'close': + return $children; + } + } + } + + function &parseXML(&$xml) { + // XML functions + $xml_string = ''; + if (preg_match('@(<\?xml.+\?>)@imsU', $xml, $xml_head)) { + $xml_string = $xml_head[1]; + } + + $encoding = 'UTF-8'; + if (preg_match('@encoding="([^"]+)"@', $xml_string, $xml_encoding)) { + $encoding = $xml_encoding[1]; + } + + preg_match_all('@(.*)@imsU', $xml, $xml_matches); + if (!is_array($xml_matches)) { + return false; + } + + $i = 0; + $tree = array(); + $tree[$i] = array( + 'tag' => 'entries', + 'attributes' => '', + 'value' => '', + 'children' => array() + ); + + foreach($xml_matches[0] as $xml_index => $xml_package) { + $i = 0; + + switch(strtolower($encoding)) { + case 'iso-8859-1': + case 'utf-8': + $p = xml_parser_create($encoding); + break; + + default: + $p = xml_parser_create(''); + } + + xml_parser_set_option($p, XML_OPTION_CASE_FOLDING, 0); + @xml_parser_set_option($p, XML_OPTION_TARGET_ENCODING, LANG_CHARSET); + $xml_package = $xml_string . "\n" . $xml_package; + xml_parse_into_struct($p, $xml_package, $vals); + xml_parser_free($p); + $tree[0]['children'][] = array( + 'tag' => $vals[$i]['tag'], + 'attributes' => $vals[$i]['attributes'], + 'value' => $vals[$i]['value'], + 'children' => $this->GetChildren($vals, $i) + ); + unset($vals); + } + + return $tree; + } + + function validateData() { + return sizeof($this->data); + } + + function getInputFields() { + return $this->inputFields; + } + + function getTimestamp($string) { + if (preg_match('@(\d{4})-(\d{2})-(\d{2}) (\d{2}):(\d{2}):(\d{2})@', $string, $match)) { + return mktime($match[4], $match[5], $match[6], $match[2], $match[3], $match[1]); + } else { + return time(); + } + } + + function import() { + global $serendipity; + + if (!file_exists($this->data['url'])) { + printf(FILE_NOT_FOUND, htmlspecialchars($this->data['url'])); + return false; + } + + $file = file_get_contents($this->data['url']); + $tree =& $this->parseXML($file); + $serendipity['noautodiscovery'] = 1; + + foreach($tree[0]['children'] AS $idx => $entry) { + if (!is_array($entry)) continue; + if ($entry['tag'] != 'entry') { + continue; + } + + $new_entry = array( + 'allow_comments' => true, + 'extended' => '', + 'categories' => array(), + 'isdraft' => ($this->data['type'] == 'draft' ? 'true' : 'false'), + 'categories' => array($this->data['category'] => $this->data['category']) + ); + + if (!is_array($entry['children'])) continue; + + foreach($entry['children'] AS $idx2 => $entrydata) { + if (!is_array($entrydata)) { + continue; + } + + switch($entrydata['tag']) { + case 'eventtime': + $new_entry['timestamp'] = $this->getTimestamp($entrydata['value']); + break; + + case 'subject': + $new_entry['title'] = $entrydata['value']; + break; + + case 'event': + $new_entry['body'] = $entrydata['value']; + break; + } + } + $id = serendipity_updertEntry($new_entry); + echo 'Inserted entry #' . $id . ', "' . htmlspecialchars($new_entry['title']) . '"
' . "\n"; + + if (function_exists('ob_flush')) { + @ob_flush(); + } + if (function_exists('flush')) { + @flush(); + } + } + + return true; + } +} + +return 'Serendipity_Import_LiveJournalXML'; + +/* vim: set sts=4 ts=4 expandtab : */ diff --git a/include/admin/importers/nucleus.inc.php b/include/admin/importers/nucleus.inc.php index 9aa6ebe..331d076 100644 --- a/include/admin/importers/nucleus.inc.php +++ b/include/admin/importers/nucleus.inc.php @@ -154,6 +154,11 @@ class Serendipity_Import_Nucleus extends Serendipity_Import { for ($x=0, $max_x = mysql_num_rows($res) ; $x < $max_x ; $x++ ) { $entries[$x] = mysql_fetch_assoc($res); + + echo "BODY: " . $entries[$x]['ibody'] . "
\n"; + echo "DECODED BODY: " . $this->strtr($entries[$x]['ibody']) . "
\n"; + die('done'); + $entry = array('title' => $this->decode($entries[$x]['ititle']), 'isdraft' => ($entries[$x]['idraft'] != '1') ? 'false' : 'true', 'allow_comments' => ($entries[$x]['iclosed'] == '1' ) ? 'false' : 'true', diff --git a/include/admin/importers/voodoopad.inc.php b/include/admin/importers/voodoopad.inc.php old mode 100644 new mode 100755 index f52c970..c27e30b --- a/include/admin/importers/voodoopad.inc.php +++ b/include/admin/importers/voodoopad.inc.php @@ -38,6 +38,7 @@ class Serendipity_Import_VoodooPad extends Serendipity_Import { var $info = array('software' => 'VoodooPad'); var $data = array(); var $inputFields = array(); + var $force_recode = false; function Serendipity_Import_VoodooPad($data) { $this->data = $data;