PHP5 did not need xml_parser_create() to specify a charset, but PHP4 does.
PHP5 made errors when feeds in UTF-8 were double-recoded because of a xml_parser_set_option() call without specifying the charset in xml_parser_create.
Hope this works. ;)
* private $type;
*/
- function ONYX_RSS()
+ function ONYX_RSS($charset = 'UTF-8')
{
- $this->__construct();
+ $this->__construct($charset);
}
// Forward compatibility with PHP v.5
// http://www.phpvolcano.com/eide/php5.php?page=start
- function __construct()
+ function __construct($charset = 'UTF-8')
{
$this->conf = array();
$this->conf['error'] = '<br /><strong>Error on line %s of '.__FILE__.'</strong>: %s<br />';
return false;
}
- $this->parser = @xml_parser_create();
+ if ($charset == 'native') {
+ $charset = LANG_CHARSET;
+ }
+ $this->parser = @xml_parser_create($charset);
if (!is_resource($this->parser))
{
$this->raiseError((__LINE__-3), ONYX_ERR_NO_PARSER);
Version 0.9 ()
------------------------------------------------------------------------
+ * Change Onyx RSS parser and xml_parser_* functions to already specify
+ the source charset, so that PHP functions can do the recoding on
+ their own. Functionality differed on PHP4 and PHP5, this has now
+ been unified. Thanks a lot to W-Mark Kubacki!
+
* Enhance XHTML Cleanup plugin to recode double-encoded UTF-8 HTML
entities on NON-UTF8 blogs. Patch thanks to W-Mark Kubacki!
}
function &decode($string) {
- static $phpCharset = null;
- $target = $this->data['charset'];
-
- if ($phpCharset === null) {
- $phpCharset = version_compare(phpversion(), '4.3.11', '>=');
- }
-
- if ($phpCharset == 1) {
- // Luckily PHP5 supports
- // xml_parser_set_option($this->parser, XML_OPTION_TARGET_ENCODING, LANG_CHARSET);
- // which means we need no transcoding here.
+ // xml_parser_* functions to recoding from ISO-8859-1/UTF-8
+ if (LANG_CHARSET == 'ISO-8859-1' || LANG_CHARSET == 'UTF-8') {
return $string;
}
+ $target = $this->data['charset'];
+
switch($target) {
case 'native':
return $string;
function import() {
global $serendipity;
- $c = &new Onyx_RSS();
+ $c = &new Onyx_RSS($this->data['charset']);
$c->parse($this->data['url']);
$this->data['encoding'] = $c->rss['encoding'];
}
function decode(&$data) {
+ // xml_parser_* functions to recoding from ISO-8859-1/UTF-8
+ if (LANG_CHARSET == 'ISO-8859-1' || LANG_CHARSET == 'UTF-8') {
+ return true;
+ }
+
switch (strtolower(LANG_CHARSET)) {
case 'utf-8':
// The XML file is UTF-8 format. No changes needed.
}
// XML functions
- $xml_string = '<?xml version="1.0" encoding="ISO-8859-1" ?>';
+ $xml_string = '<?xml version="1.0" encoding="UTF-8" ?>';
if (preg_match('@(<\?xml.+\?>)@imsU', $xml, $xml_head)) {
$xml_string = $xml_head[1];
}
+
+ $encoding = 'UTF-8';
+ if (preg_match('@encoding="([^"]+)"@', $xml_string, $xml_encoding)) {
+ $encoding = $xml_encoding[1];
+ }
preg_match_all('@(<package version="[^"]+">.*</package>)@imsU', $xml, $xml_matches);
if (!is_array($xml_matches)) {
foreach($xml_matches[0] as $xml_index => $xml_package) {
$i = 0;
- $p = xml_parser_create();
+ $p = xml_parser_create($encoding);
xml_parser_set_option($p, XML_OPTION_CASE_FOLDING, 0);
+ @xml_parser_set_option($this->parser, XML_OPTION_TARGET_ENCODING, LANG_CHARSET);
$xml_package = $xml_string . "\n" . $xml_package;
xml_parse_into_struct($p, $xml_package, $vals);
xml_parser_free($p);
$val = &$this->cleanup_val;
// Instead of nasty regex-mangling we use the XML parser to get the attribute list of our input tag
- $p = xml_parser_create();
+ $p = xml_parser_create(LANG_CHARSET);
@xml_parse_into_struct($p, $data[0], $vals, $index);
xml_parser_free($p);
$data = str_replace('</outline>', '', $data);
// XML functions
- $p = xml_parser_create();
+ $xml_string = '<?xml version="1.0" encoding="UTF-8" ?>';
+ if (preg_match('@(<\?xml.+\?>)@imsU', $data, $xml_head)) {
+ $xml_string = $xml_head[1];
+ }
+
+ $encoding = 'UTF-8';
+ if (preg_match('@encoding="([^"]+)"@', $xml_string, $xml_encoding)) {
+ $encoding = $xml_encoding[1];
+ }
+
+ $p = xml_parser_create($encoding);
// by: anony@mous.com - meets XML 1.0 specification
@xml_parser_set_option($p, XML_OPTION_CASE_FOLDING, 0);
xml_parser_set_option($p, XML_OPTION_TARGET_ENCODING, LANG_CHARSET);
$feedtype = $this->get_config('feedtype', 'rss');
$markup = $this->get_config('markup', 'false');
$bulletimg = $this->get_config('bulletimg');
+ $charset = $this->get_config('charset', 'native');
if (!$number || !is_numeric($number) || $number < 1) {
$showAll = true;
if ($feedtype == 'rss') {
require_once S9Y_PEAR_PATH . 'Onyx/RSS.php';
- $c = &new Onyx_RSS();
+ $c = &new Onyx_RSS($charset);
$c->parse($rssuri);
$this->encoding = $c->rss['encoding'];
}
function &decode($string) {
- static $phpCharset = null;
-
- if ($phpCharset === null) {
- $phpCharset = version_compare(phpversion(), '4.3.11', '>=');
- }
+ $target = $this->get_config('charset', 'native');
- if ($phpCharset == 1) {
- // Luckily PHP5 supports
- // xml_parser_set_option($this->parser, XML_OPTION_TARGET_ENCODING, LANG_CHARSET);
- // which means we need no transcoding here.
+ // xml_parser_* functions to recoding from ISO-8859-1/UTF-8
+ if (LANG_CHARSET == 'ISO-8859-1' || LANG_CHARSET == 'UTF-8') {
return $string;
}
- $target = $this->get_config('charset', 'native');
-
switch($target) {
case 'native':
return $string;
*/
include(S9Y_INCLUDE_PATH . 'include/lang.inc.php');
+/*
+ * Reset charset definition now that final language is known
+ */
+$serendipity['charsets'] = array(
+ 'UTF-8/' => 'UTF-8',
+ '' => CHARSET_NATIVE
+);
+
/*
* Set current locale, if any has been defined
*/