From: dhawes Date: Sun, 23 Jan 2005 15:47:31 +0000 (+0000) Subject: Magpie 0.7.1 - all relevant files. Simplog had been using only one of these files... X-Git-Url: http://git.mjollnir.org/gw?a=commitdiff_plain;h=100ed1cf2bf2f1bd44f3b53e9004e83f0e868d61;p=moodle.git Magpie 0.7.1 - all relevant files. Simplog had been using only one of these files and it was renamed. Simplog had written an Atom feed parser based on magpie and was losing out on much of the magpie functionality because of it. This latest release supports Atom feeds so the need to for separate objects is gone. By using the full release upgrades become easier in the future. We also gain some of magpie's lost feature set such as the use of a modified Snoopy http client for retrieving feeds instead of an fopen() call - which means that php.ini does not need to be modified to turn allow_url_fopen to On. --- diff --git a/rss/magpie/AUTHORS b/rss/magpie/AUTHORS new file mode 100644 index 0000000000..7d7f3f53eb --- /dev/null +++ b/rss/magpie/AUTHORS @@ -0,0 +1 @@ +kellan diff --git a/rss/magpie/README b/rss/magpie/README new file mode 100644 index 0000000000..6af7edb07c --- /dev/null +++ b/rss/magpie/README @@ -0,0 +1,48 @@ +NAME + + MagpieRSS - a simple RSS integration tool + +SYNOPSIS + + require_once(rss_fetch.inc); + $url = $_GET['url']; + $rss = fetch_rss( $url ); + + echo "Channel Title: " . $rss->channel['title'] . "

"; + echo "

"; + +DESCRIPTION + + MapieRSS is an XML-based RSS parser in PHP. It attempts to be "PHP-like", + and simple to use. + + Some features include: + + * supports RSS 0.9 - 1.0, with limited RSS 2.0 support + * supports namespaces, and modules, including mod_content and mod_event + * open minded [1] + * simple, functional interface, to object oriented backend parser + * automatic caching of parsed RSS objects makes its easy to integrate + * supports conditional GET with Last-Modified, and ETag + * uses constants for easy override of default behaviour + * heavily commented + + +1. By open minded I mean Magpie will accept any tag it finds in good faith that + it was supposed to be here. For strict validation, look elsewhere. + + +GETTING STARTED + + + +COPYRIGHT: + Copyright(c) 2002 kellan@protest.net. All rights reserved. + This software is released under the GNU General Public License. + Please read the disclaimer at the top of the Snoopy.class.inc file. diff --git a/rss/magpie/README_MOODLE.txt b/rss/magpie/README_MOODLE.txt new file mode 100644 index 0000000000..3bc9d8c900 --- /dev/null +++ b/rss/magpie/README_MOODLE.txt @@ -0,0 +1,3 @@ +This folder is the MagpieRSS news feed client library +http://magpierss.sourceforge.net/ +Moodle's rss_client block uses these libraries to download, parse and cache remote new feeds. diff --git a/rss/magpie/TROUBLESHOOTING b/rss/magpie/TROUBLESHOOTING new file mode 100644 index 0000000000..89068d382c --- /dev/null +++ b/rss/magpie/TROUBLESHOOTING @@ -0,0 +1,152 @@ +TROUBLESHOOTING + + +Trouble Installing MagpieRSS: + +1. Fatal error: Failed opening required '/path/to/script/rss_fetch.inc' + (include_path='.:/usr/local/lib/php:/usr/local/lib/php/pear') + +2. Cache couldn't make dir './cache'. + +3. Fatal error: Failed to load PHP's XML Extension. + http://www.php.net/manual/en/ref.xml.php + +Trouble Using MagpieRSS + +4. Warning: MagpieRSS: Failed to fetch example.com/index.rdf. + (HTTP Error: Invalid protocol "") + +5. Warning: MagpieRSS: Failed to parse RSS file. + (not well-formed (invalid token) at line 19, column 98) + +6. Warning: MagpieRSS: Failed to fetch http://localhost/rss/features.1-0.rss. + (HTTP Response: HTTP/1.1 404 Not Found) + +If you would rather provide a custom error, see the COOKBOOK +(http://magpierss.sf.net/cookbook.html) recipe 2. + +************************************************************************* +1. Fatal error: Failed opening required '/path/to/script/rss_fetch.inc' + (include_path='.:/usr/local/lib/php:/usr/local/lib/php/pear') + + This could mean that: + + a) PHP can't find the MagpieRSS files. + b) PHP found them the MagpieRSS files, but can't read them. + + a. Telling PHP where to look for MagpieRSS file. + + This might mean your PHP program can't find the MagpieRSS libraries. + Magpie relies on 4 include files, rss_fetch.inc, rss_parse.inc, + rss_cache.inc, rss_util.inc, and for normal use you'll need all 4 (see the + cookbook for exceptions). + + This can be fixed by making sure the MagpieRSS files are in your include + path. + + If you can edit your include path (for example your on a shared host) then + you need to replace: + + require_once('rss_fetch.inc'); + + -with- + + define('MAGPIE_DIR', '/path/to/magpierss/'); + require_once(MAGPIE_DIR.'rss_fetch.inc'); + + b. PHP can't read the MagpieRSS files + + All PHP libraries need to be readable by your webserver. + + On Unix you can accomplish this with: + + chmod 755 rss_fetch.inc rss_parse.inc rss_cache.inc rss_util.inc + +************************************************************************* +2. Cache couldn't make dir './cache'. + + MagpieRSS caches the results of fetched and parsed RSS to reduce the load on + both your server, and the remote server providing the RSS. It does this by + writing files to a cache directory. + + This error means the webserver doesn't have write access to the current + directory. + + a. Make a webserver writeable cache directory + + Find the webserver's group. (on my system it is 'www') + + mkdir ./cache + chgrp www directory_name + chmod g+w directory_name + + (this is the best, and desired solution) + + b. Tell MagpieRSS to create the cache directory somewhere the webserver can + write to. + + define('MAGPIE_CACHE_DIR', '/tmp/magpierss'); + + (this is not a great solution, and might have security considerations) + + c. Turn off cacheing. + + Magpie can work fine with cacheing, but it will be slower, and you might + become a nuiance to the RSS provider, but it is an option. + + define('MAGPIE_CACHE_ON', 0); + + d. And lastly, do NOT + + chmod 777 ./cache + + Any of the above solutions are better then this. + + NOTE: If none of this works for you, let me know. I've got root, and a + custom compiled Apache on almost any box I ever touch, so I can be a little + out of touch with reality. But I won't know that if I don't feedback. + +************************************************************************* 3. +3. Fatal error: Failed to load PHP's XML Extension. + http://www.php.net/manual/en/ref.xml.php + + -or- + + Fatal error: Failed to create an instance of PHP's XML parser. + http://www.php.net/manual/en/ref.xml.php + + Make sure your PHP was built with --with-xml + + This has been turned on by default for several versions of PHP, but it might + be turned off in your build. + + See php.net for details on building and configuring PHP. + + +************************************************************************* +4. Warning: MagpieRSS: Failed to fetch index.rdf. + (HTTP Error: Invalid protocol "") + + You need to put http:// in front of your the URL to your RSS feed + +************************************************************************* +5. Warning: MagpieRSS: Failed to parse RSS file. + (not well-formed (invalid token) at line 19, column 98) + + There is a problem with the RSS feed you are trying to read. + MagpieRSS is an XML parser, and therefore can't parse RSS feed with invalid + characters. Some RSS parser are based on regular expressions, and can + parse invalid RSS but they have their own problems. + + You could try contacting the author of the RSS feed, and pointing them to + the online RSS validator at: + + http://feeds.archive.org/validator/ + +************************************************************************* +6. Warning: MagpieRSS: Failed to fetch http://example.com/index.rdf + (HTTP Response: HTTP/1.1 404 Not Found) + + Its a 404! The RSS file ain't there. + + diff --git a/rss/magpie/cookbook b/rss/magpie/cookbook new file mode 100644 index 0000000000..45dda98bcd --- /dev/null +++ b/rss/magpie/cookbook @@ -0,0 +1,125 @@ +MAGPIERSS RECIPES: Cooking with Corbies + + "Four and twenty blackbirds baked in a pie." + +1. LIMIT THE NUMBER OF HEADLINES(AKA ITEMS) RETURNED. + +PROBLEM: + +You want to display the 10 (or 3) most recent headlines, but the RSS feed +contains 15. + +SOLUTION: + +$num_items = 10; +$rss = fetch_rss($url); + +$items = array_slice($rss->items, 0, $num_items); + +DISCUSSION: + +Rather then trying to limit the number of items Magpie parses, a much simpler, +and more flexible approach is to take a "slice" of the array of items. And +array_slice() is smart enough to do the right thing if the feed has less items +then $num_items. + +See: http://www.php.net/array_slice + + +2. DISPLAY A CUSTOM ERROR MESSAGE IF SOMETHING GOES WRONG + +PROBLEM: + +You don't want Magpie's error messages showing up if something goes wrong. + +SOLUTION: + +# Magpie throws USER_WARNINGS only +# so you can cloak these, by only showing ERRORs +error_reporting(E_ERROR); + +# check the return value of fetch_rss() + +$rss = fetch_rss($url); + +if ( $rss ) { +...display rss feed... +} +else { + echo "An error occured! " . + "Consider donating more $$$ for restoration of services." . + "
Error Message: " . magpie_error(); +} + +DISCUSSION: + +MagpieRSS triggers a warning in a number of circumstances. The 2 most common +circumstances are: if the specified RSS file isn't properly formed (usually +because it includes illegal HTML), or if Magpie can't download the remote RSS +file, and there is no cached version. + +If you don't want your users to see these warnings change your error_reporting +settings to only display ERRORs. Another option is to turn off display_error, +so that WARNINGs, and NOTICEs still go to the error_log but not to the webpages. + +You can do this with: + +ini_set('display_errors', 0); + +See: http://www.php.net/error_reporting, + http://www.php.net/ini_set, + http://www.php.net/manual/en/ref.errorfunc.php + +3. GENERATE A NEW RSS FEED + +PROBLEM: + +Create an RSS feed for other people to use. + +SOLUTION: + +Use Useful Inc's RSSWriter (http://usefulinc.com/rss/rsswriter/) + +DISCUSSION: + +An example of turning a Magpie parsed RSS object back into an RSS file is forth +coming. In the meantime RSSWriter has great documentation. + +4. DISPLAY HEADLINES MORE RECENT THEN X DATE + +PROBLEM: + +You only want to display headlines that were published on, or after a certain +date. + + +SOLUTION: + +require 'rss_utils.inc'; + +# get all headlines published today +$today = getdate(); + +# today, 12AM +$date = mktime(0,0,0,$today['mon'], $today['mday'], $today['year']); + +$rss = fetch_rss($url); + +foreach ( $rss->items as $item ) { + $published = parse_w3cdtf($item['dc']['date']); + if ( $published >= $date ) { + echo "Title: " . $item['title']; + echo "Published: " . date("h:i:s A", $published); + echo "

"; + } +} + +DISCUSSION: + +This recipe only works for RSS 1.0 feeds that include the field. +(which is very good RSS style) + +parse_w3cdtf is defined in rss_utils.inc, and parses RSS style dates into Unix +epoch seconds. + +See: http://www.php.net/manual/en/ref.datetime.php diff --git a/rss/magpie/rss_cache.inc b/rss/magpie/rss_cache.inc new file mode 100644 index 0000000000..fdfa5dd259 --- /dev/null +++ b/rss/magpie/rss_cache.inc @@ -0,0 +1,184 @@ + + * Version: 0.51 + * License: GPL + * + * The lastest version of MagpieRSS can be obtained from: + * http://magpierss.sourceforge.net + * + * For questions, help, comments, discussion, etc., please join the + * Magpie mailing list: + * http://lists.sourceforge.net/lists/listinfo/magpierss-general + * + */ + +class RSSCache { + var $BASE_CACHE = './cache'; // where the cache files are stored + var $MAX_AGE = 3600; // when are files stale, default one hour + var $ERROR = ""; // accumulate error messages + + function RSSCache ($base='', $age='') { + if ( $base ) { + $this->BASE_CACHE = $base; + } + if ( $age ) { + $this->MAX_AGE = $age; + } + + // attempt to make the cache directory + if ( ! file_exists( $this->BASE_CACHE ) ) { + $status = @mkdir( $this->BASE_CACHE, 0755 ); + + // if make failed + if ( ! $status ) { + $this->error( + "Cache couldn't make dir '" . $this->BASE_CACHE . "'." + ); + } + } + } + +/*=======================================================================*\ + Function: set + Purpose: add an item to the cache, keyed on url + Input: url from wich the rss file was fetched + Output: true on sucess +\*=======================================================================*/ + function set ($url, $rss) { + $this->ERROR = ""; + $cache_file = $this->file_name( $url ); + $fp = @fopen( $cache_file, 'w' ); + + if ( ! $fp ) { + $this->error( + "Cache unable to open file for writing: $cache_file" + ); + return 0; + } + + + $data = $this->serialize( $rss ); + fwrite( $fp, $data ); + fclose( $fp ); + + return $cache_file; + } + +/*=======================================================================*\ + Function: get + Purpose: fetch an item from the cache + Input: url from wich the rss file was fetched + Output: cached object on HIT, false on MISS +\*=======================================================================*/ + function get ($url) { + $this->ERROR = ""; + $cache_file = $this->file_name( $url ); + + if ( ! file_exists( $cache_file ) ) { + $this->debug( + "Cache doesn't contain: $url (cache file: $cache_file)" + ); + return 0; + } + + $fp = @fopen($cache_file, 'r'); + if ( ! $fp ) { + $this->error( + "Failed to open cache file for reading: $cache_file" + ); + return 0; + } + + $data = fread( $fp, filesize($cache_file) ); + $rss = $this->unserialize( $data ); + + return $rss; + } + +/*=======================================================================*\ + Function: check_cache + Purpose: check a url for membership in the cache + and whether the object is older then MAX_AGE (ie. STALE) + Input: url from wich the rss file was fetched + Output: cached object on HIT, false on MISS +\*=======================================================================*/ + function check_cache ( $url ) { + $this->ERROR = ""; + $filename = $this->file_name( $url ); + + if ( file_exists( $filename ) ) { + // find how long ago the file was added to the cache + // and whether that is longer then MAX_AGE + $mtime = filemtime( $filename ); + $age = time() - $mtime; + if ( $this->MAX_AGE > $age ) { + // object exists and is current + return 'HIT'; + } + else { + // object exists but is old + return 'STALE'; + } + } + else { + // object does not exist + return 'MISS'; + } + } + +/*=======================================================================*\ + Function: serialize +\*=======================================================================*/ + function serialize ( $rss ) { + return serialize( $rss ); + } + +/*=======================================================================*\ + Function: unserialize +\*=======================================================================*/ + function unserialize ( $data ) { + return unserialize( $data ); + } + +/*=======================================================================*\ + Function: file_name + Purpose: map url to location in cache + Input: url from wich the rss file was fetched + Output: a file name +\*=======================================================================*/ + function file_name ($url) { + $filename = md5( $url ); + return join( DIRECTORY_SEPARATOR, array( $this->BASE_CACHE, $filename ) ); + } + +/*=======================================================================*\ + Function: error + Purpose: register error +\*=======================================================================*/ + function error ($errormsg, $lvl=E_USER_WARNING) { + // append PHP's error message if track_errors enabled + if ( isset($php_errormsg) ) { + $errormsg .= " ($php_errormsg)"; + } + $this->ERROR = $errormsg; + if ( MAGPIE_DEBUG ) { + trigger_error( $errormsg, $lvl); + } + else { + error_log( $errormsg, 0); + } + } + + function debug ($debugmsg, $lvl=E_USER_NOTICE) { + if ( MAGPIE_DEBUG ) { + $this->error("MagpieRSS [debug] $debugmsg", $lvl); + } + } + +} + +?> diff --git a/rss/magpie/rss_fetch.inc b/rss/magpie/rss_fetch.inc new file mode 100644 index 0000000000..c41945cc73 --- /dev/null +++ b/rss/magpie/rss_fetch.inc @@ -0,0 +1,459 @@ + + * License: GPL + * + * The lastest version of MagpieRSS can be obtained from: + * http://magpierss.sourceforge.net + * + * For questions, help, comments, discussion, etc., please join the + * Magpie mailing list: + * magpierss-general@lists.sourceforge.net + * + */ + +// Setup MAGPIE_DIR for use on hosts that don't include +// the current path in include_path. +// with thanks to rajiv and smarty +if (!defined('DIR_SEP')) { + define('DIR_SEP', DIRECTORY_SEPARATOR); +} + +if (!defined('MAGPIE_DIR')) { + define('MAGPIE_DIR', dirname(__FILE__) . DIR_SEP); +} + +require_once( MAGPIE_DIR . 'rss_parse.inc' ); +require_once( MAGPIE_DIR . 'rss_cache.inc' ); + +// for including 3rd party libraries +define('MAGPIE_EXTLIB', MAGPIE_DIR . 'extlib' . DIR_SEP); +require_once( MAGPIE_EXTLIB . 'Snoopy.class.inc'); + + +/* + * CONSTANTS - redefine these in your script to change the + * behaviour of fetch_rss() currently, most options effect the cache + * + * MAGPIE_CACHE_ON - Should Magpie cache parsed RSS objects? + * For me a built in cache was essential to creating a "PHP-like" + * feel to Magpie, see rss_cache.inc for rationale + * + * + * MAGPIE_CACHE_DIR - Where should Magpie cache parsed RSS objects? + * This should be a location that the webserver can write to. If this + * directory does not already exist Mapie will try to be smart and create + * it. This will often fail for permissions reasons. + * + * + * MAGPIE_CACHE_AGE - How long to store cached RSS objects? In seconds. + * + * + * MAGPIE_CACHE_FRESH_ONLY - If remote fetch fails, throw error + * instead of returning stale object? + * + * MAGPIE_DEBUG - Display debugging notices? + * +*/ + + +/*=======================================================================*\ + Function: fetch_rss: + Purpose: return RSS object for the give url + maintain the cache + Input: url of RSS file + Output: parsed RSS object (see rss_parse.inc) + + NOTES ON CACHEING: + If caching is on (MAGPIE_CACHE_ON) fetch_rss will first check the cache. + + NOTES ON RETRIEVING REMOTE FILES: + If conditional gets are on (MAGPIE_CONDITIONAL_GET_ON) fetch_rss will + return a cached object, and touch the cache object upon recieving a + 304. + + NOTES ON FAILED REQUESTS: + If there is an HTTP error while fetching an RSS object, the cached + version will be return, if it exists (and if MAGPIE_CACHE_FRESH_ONLY is off) +\*=======================================================================*/ + +define('MAGPIE_VERSION', '0.7'); + +$MAGPIE_ERROR = ""; + +function fetch_rss ($url) { + // initialize constants + init(); + + if ( !isset($url) ) { + do_error("fetch_rss called without a url"); + return false; + } + + // if cache is disabled + if ( !MAGPIE_CACHE_ON ) { + // fetch file, and parse it + $resp = _fetch_remote_file( $url ); + if ( is_success( $resp->status ) ) { + return _response_to_rss( $resp ); + } + else { + do_error("Failed to fetch $url and cache is off"); + return false; + } + } + // else cache is ON + else { + // Flow + // 1. check cache + // 2. if there is a hit, make sure its fresh + // 3. if cached obj fails freshness check, fetch remote + // 4. if remote fails, return stale object, or error + + $cache = new RSSCache( MAGPIE_CACHE_DIR, MAGPIE_CACHE_AGE ); + + if (MAGPIE_DEBUG and $cache->ERROR) { + do_debug($cache->ERROR, E_USER_WARNING); + } + + + $cache_status = 0; // response of check_cache + $request_headers = array(); // HTTP headers to send with fetch + $rss = 0; // parsed RSS object + $errormsg = 0; // errors, if any + + // store parsed XML by desired output encoding + // as character munging happens at parse time + $cache_key = $url . MAGPIE_OUTPUT_ENCODING; + + if (!$cache->ERROR) { + // return cache HIT, MISS, or STALE + $cache_status = $cache->check_cache( $cache_key); + } + + // if object cached, and cache is fresh, return cached obj + if ( $cache_status == 'HIT' ) { + $rss = $cache->get( $cache_key ); + if ( isset($rss) and $rss ) { + $rss->from_cache = 1; + if ( MAGPIE_DEBUG > 1) { + do_debug("MagpieRSS: Cache HIT", E_USER_NOTICE); + } + return $rss; + } + } + + // else attempt a conditional get + + // setup headers + if ( $cache_status == 'STALE' ) { + $rss = $cache->get( $url ); + if ( $rss->etag and $rss->last_modified ) { + $request_headers['If-None-Match'] = $rss->etag; + $request_headers['If-Last-Modified'] = $rss->last_modified; + } + } + + $resp = _fetch_remote_file( $url, $request_headers ); + + if (isset($resp) and $resp) { + if ($resp->status == '304' ) { + // we have the most current copy + if ( MAGPIE_DEBUG > 1) { + do_debug("Got 304 for $url"); + } + // reset cache on 304 (at minutillo insistent prodding) + $cache->set($cache_key, $rss); + return $rss; + } + elseif ( is_success( $resp->status ) ) { + $rss = _response_to_rss( $resp ); + if ( $rss ) { + if (MAGPIE_DEBUG > 1) { + do_debug("Fetch successful"); + } + // add object to cache + $cache->set( $cache_key, $rss ); + return $rss; + } + } + else { + $errormsg = "Failed to fetch $url. "; + if ( $resp->status == '-100' ) { + $errormsg .= "(Request timed out after " . MAGPIE_FETCH_TIME_OUT . " seconds)"; + } + elseif ( $resp->error ) { + # compensate for Snoopy's annoying habbit to tacking + # on '\n' + $http_error = substr($resp->error, 0, -2); + $errormsg .= "(HTTP Error: $http_error)"; + } + else { + $errormsg .= "(HTTP Response: " . $resp->response_code .')'; + } + } + } + else { + $errormsg = "Unable to retrieve RSS file for unknown reasons."; + } + + // else fetch failed + + // attempt to return cached object + if ($rss) { + if ( MAGPIE_DEBUG ) { + do_debug("Returning STALE object for $url"); + } + return $rss; + } + + // else we totally failed + do_error( $errormsg ); + + return false; + + } // end if ( !MAGPIE_CACHE_ON ) { +} // end fetch_rss() + +/*=======================================================================*\ + Function: error + Purpose: set MAGPIE_ERROR, and trigger error +\*=======================================================================*/ +//Daryl Hawes note: had to rename this function from error to do_error for moodle +// due to conflict with existing moodle function name +function do_error ($errormsg, $lvl=E_USER_WARNING) { + global $MAGPIE_ERROR; + + // append PHP's error message if track_errors enabled + if ( isset($php_errormsg) ) { + $errormsg .= " ($php_errormsg)"; + } + if ( $errormsg ) { + $errormsg = "MagpieRSS: $errormsg"; + $MAGPIE_ERROR = $errormsg; + trigger_error( $errormsg, $lvl); + } +} + +//Daryl Hawes note: renamed this function from debug to do_debug for moodle +function do_debug ($debugmsg, $lvl=E_USER_NOTICE) { + trigger_error("MagpieRSS [debug] $debugmsg", $lvl); +} + +/*=======================================================================*\ + Function: magpie_error + Purpose: accessor for the magpie error variable +\*=======================================================================*/ +function magpie_error ($errormsg="") { + global $MAGPIE_ERROR; + + if ( isset($errormsg) and $errormsg ) { + $MAGPIE_ERROR = $errormsg; + } + + return $MAGPIE_ERROR; +} + +/*=======================================================================*\ + Function: _fetch_remote_file + Purpose: retrieve an arbitrary remote file + Input: url of the remote file + headers to send along with the request (optional) + Output: an HTTP response object (see Snoopy.class.inc) +\*=======================================================================*/ +function _fetch_remote_file ($url, $headers = "" ) { + // Snoopy is an HTTP client in PHP + $client = new Snoopy(); + $client->agent = MAGPIE_USER_AGENT; + $client->read_timeout = MAGPIE_FETCH_TIME_OUT; + $client->use_gzip = MAGPIE_USE_GZIP; + if (is_array($headers) ) { + $client->rawheaders = $headers; + } + + @$client->fetch($url); + return $client; + +} + +/*=======================================================================*\ + Function: _response_to_rss + Purpose: parse an HTTP response object into an RSS object + Input: an HTTP response object (see Snoopy) + Output: parsed RSS object (see rss_parse) +\*=======================================================================*/ +function _response_to_rss ($resp) { + $rss = new MagpieRSS( $resp->results, MAGPIE_OUTPUT_ENCODING, MAGPIE_INPUT_ENCODING, MAGPIE_DETECT_ENCODING ); + + // if RSS parsed successfully + if ( $rss and !$rss->ERROR) { + + // find Etag, and Last-Modified + foreach($resp->headers as $h) { + // 2003-03-02 - Nicola Asuni (www.tecnick.com) - fixed bug "Undefined offset: 1" + if (strpos($h, ": ")) { + list($field, $val) = explode(": ", $h, 2); + } + else { + $field = $h; + $val = ""; + } + + if ( $field == 'ETag' ) { + $rss->etag = $val; + } + + if ( $field == 'Last-Modified' ) { + $rss->last_modified = $val; + } + } + + return $rss; + } // else construct error message + else { + $errormsg = "Failed to parse RSS file."; + + if ($rss) { + $errormsg .= " (" . $rss->ERROR . ")"; + } + do_error($errormsg); + + return false; + } // end if ($rss and !$rss->error) +} + +/*=======================================================================*\ + Function: init + Purpose: setup constants with default values + check for user overrides +\*=======================================================================*/ +function init () { + if ( defined('MAGPIE_INITALIZED') ) { + return; + } + else { + define('MAGPIE_INITALIZED', true); + } + + if ( !defined('MAGPIE_CACHE_ON') ) { + define('MAGPIE_CACHE_ON', true); + } + + if ( !defined('MAGPIE_CACHE_DIR') ) { + define('MAGPIE_CACHE_DIR', './cache'); + } + + if ( !defined('MAGPIE_CACHE_AGE') ) { + define('MAGPIE_CACHE_AGE', 60*60); // one hour + } + + if ( !defined('MAGPIE_CACHE_FRESH_ONLY') ) { + define('MAGPIE_CACHE_FRESH_ONLY', false); + } + + if ( !defined('MAGPIE_OUTPUT_ENCODING') ) { + define('MAGPIE_OUTPUT_ENCODING', 'ISO-8859-1'); + } + + if ( !defined('MAGPIE_INPUT_ENCODING') ) { + define('MAGPIE_INPUT_ENCODING', null); + } + + if ( !defined('MAGPIE_DETECT_ENCODING') ) { + define('MAGPIE_DETECT_ENCODING', true); + } + + if ( !defined('MAGPIE_DEBUG') ) { + define('MAGPIE_DEBUG', 0); + } + + if ( !defined('MAGPIE_USER_AGENT') ) { + $ua = 'MagpieRSS/'. MAGPIE_VERSION . ' (+http://magpierss.sf.net'; + + if ( MAGPIE_CACHE_ON ) { + $ua = $ua . ')'; + } + else { + $ua = $ua . '; No cache)'; + } + + define('MAGPIE_USER_AGENT', $ua); + } + + if ( !defined('MAGPIE_FETCH_TIME_OUT') ) { + define('MAGPIE_FETCH_TIME_OUT', 5); // 5 second timeout + } + + // use gzip encoding to fetch rss files if supported? + if ( !defined('MAGPIE_USE_GZIP') ) { + define('MAGPIE_USE_GZIP', true); + } +} + +// NOTE: the following code should really be in Snoopy, or at least +// somewhere other then rss_fetch! + +/*=======================================================================*\ + HTTP STATUS CODE PREDICATES + These functions attempt to classify an HTTP status code + based on RFC 2616 and RFC 2518. + + All of them take an HTTP status code as input, and return true or false + + All this code is adapted from LWP's HTTP::Status. +\*=======================================================================*/ + + +/*=======================================================================*\ + Function: is_info + Purpose: return true if Informational status code +\*=======================================================================*/ +function is_info ($sc) { + return $sc >= 100 && $sc < 200; +} + +/*=======================================================================*\ + Function: is_success + Purpose: return true if Successful status code +\*=======================================================================*/ +function is_success ($sc) { + return $sc >= 200 && $sc < 300; +} + +/*=======================================================================*\ + Function: is_redirect + Purpose: return true if Redirection status code +\*=======================================================================*/ +function is_redirect ($sc) { + return $sc >= 300 && $sc < 400; +} + +/*=======================================================================*\ + Function: is_error + Purpose: return true if Error status code +\*=======================================================================*/ +function is_error ($sc) { + return $sc >= 400 && $sc < 600; +} + +/*=======================================================================*\ + Function: is_client_error + Purpose: return true if Error status code, and its a client error +\*=======================================================================*/ +function is_client_error ($sc) { + return $sc >= 400 && $sc < 500; +} + +/*=======================================================================*\ + Function: is_client_error + Purpose: return true if Error status code, and its a server error +\*=======================================================================*/ +function is_server_error ($sc) { + return $sc >= 500 && $sc < 600; +} + +?> diff --git a/rss/magpie/rss_parse.inc b/rss/magpie/rss_parse.inc new file mode 100644 index 0000000000..af4020ebfb --- /dev/null +++ b/rss/magpie/rss_parse.inc @@ -0,0 +1,589 @@ + +* @version 0.7a +* @license GPL +* +*/ + +define('RSS', 'RSS'); +define('ATOM', 'Atom'); + +require_once (MAGPIE_DIR . 'rss_utils.inc'); + +/** +* Hybrid parser, and object, takes RSS as a string and returns a simple object. +* +* see: rss_fetch.inc for a simpler interface with integrated caching support +* +*/ +class MagpieRSS { + var $parser; + + var $current_item = array(); // item currently being parsed + var $items = array(); // collection of parsed items + var $channel = array(); // hash of channel fields + var $textinput = array(); + var $image = array(); + var $feed_type; + var $feed_version; + var $encoding = ''; // output encoding of parsed rss + + var $_source_encoding = ''; // only set if we have to parse xml prolog + + var $ERROR = ""; + var $WARNING = ""; + + // define some constants + + var $_CONTENT_CONSTRUCTS = array('content', 'summary', 'info', 'title', 'tagline', 'copyright'); + var $_KNOWN_ENCODINGS = array('UTF-8', 'US-ASCII', 'ISO-8859-1'); + + // parser variables, useless if you're not a parser, treat as private + var $stack = array(); // parser stack + var $inchannel = false; + var $initem = false; + var $incontent = false; // if in Atom field + var $intextinput = false; + var $inimage = false; + var $current_field = ''; + var $current_namespace = false; + + + /** + * Set up XML parser, parse source, and return populated RSS object.. + * + * @param string $source string containing the RSS to be parsed + * + * NOTE: Probably a good idea to leave the encoding options alone unless + * you know what you're doing as PHP's character set support is + * a little weird. + * + * NOTE: A lot of this is unnecessary but harmless with PHP5 + * + * + * @param string $output_encoding output the parsed RSS in this character + * set defaults to ISO-8859-1 as this is PHP's + * default. + * + * NOTE: might be changed to UTF-8 in future + * versions. + * + * @param string $input_encoding the character set of the incoming RSS source. + * Leave blank and Magpie will try to figure it + * out. + * + * + * @param bool $detect_encoding if false Magpie won't attempt to detect + * source encoding. (caveat emptor) + * + */ + function MagpieRSS ($source, $output_encoding='ISO-8859-1', + $input_encoding=null, $detect_encoding=true) + { + # if PHP xml isn't compiled in, die + # + if (!function_exists('xml_parser_create')) { + $this->error( "Failed to load PHP's XML Extension. " . + "http://www.php.net/manual/en/ref.xml.php", + E_USER_ERROR ); + } + + list($parser, $source) = $this->create_parser($source, + $output_encoding, $input_encoding, $detect_encoding); + + + if (!is_resource($parser)) { + $this->error( "Failed to create an instance of PHP's XML parser. " . + "http://www.php.net/manual/en/ref.xml.php", + E_USER_ERROR ); + } + + + $this->parser = $parser; + + # pass in parser, and a reference to this object + # setup handlers + # + xml_set_object( $this->parser, $this ); + xml_set_element_handler($this->parser, + 'feed_start_element', 'feed_end_element' ); + + xml_set_character_data_handler( $this->parser, 'feed_cdata' ); + + $status = xml_parse( $this->parser, $source ); + + if (! $status ) { + $errorcode = xml_get_error_code( $this->parser ); + if ( $errorcode != XML_ERROR_NONE ) { + $xml_error = xml_error_string( $errorcode ); + $error_line = xml_get_current_line_number($this->parser); + $error_col = xml_get_current_column_number($this->parser); + $errormsg = "$xml_error at line $error_line, column $error_col"; + + $this->error( $errormsg ); + } + } + + xml_parser_free( $this->parser ); + + $this->normalize(); + } + + function feed_start_element($p, $element, &$attrs) { + $el = $element = strtolower($element); + $attrs = array_change_key_case($attrs, CASE_LOWER); + + // check for a namespace, and split if found + $ns = false; + if ( strpos( $element, ':' ) ) { + list($ns, $el) = split( ':', $element, 2); + } + if ( $ns and $ns != 'rdf' ) { + $this->current_namespace = $ns; + } + + # if feed type isn't set, then this is first element of feed + # identify feed from root element + # + if (!isset($this->feed_type) ) { + if ( $el == 'rdf' ) { + $this->feed_type = RSS; + $this->feed_version = '1.0'; + } + elseif ( $el == 'rss' ) { + $this->feed_type = RSS; + $this->feed_version = $attrs['version']; + } + elseif ( $el == 'feed' ) { + $this->feed_type = ATOM; + $this->feed_version = $attrs['version']; + $this->inchannel = true; + } + return; + } + + if ( $el == 'channel' ) + { + $this->inchannel = true; + } + elseif ($el == 'item' or $el == 'entry' ) + { + $this->initem = true; + if ( isset($attrs['rdf:about']) ) { + $this->current_item['about'] = $attrs['rdf:about']; + } + } + + // if we're in the default namespace of an RSS feed, + // record textinput or image fields + elseif ( + $this->feed_type == RSS and + $this->current_namespace == '' and + $el == 'textinput' ) + { + $this->intextinput = true; + } + + elseif ( + $this->feed_type == RSS and + $this->current_namespace == '' and + $el == 'image' ) + { + $this->inimage = true; + } + + # handle atom content constructs + elseif ( $this->feed_type == ATOM and in_array($el, $this->_CONTENT_CONSTRUCTS) ) + { + // avoid clashing w/ RSS mod_content + if ($el == 'content' ) { + $el = 'atom_content'; + } + + $this->incontent = $el; + + + } + + // if inside an Atom content construct (e.g. content or summary) field treat tags as text + elseif ($this->feed_type == ATOM and $this->incontent ) + { + // if tags are inlined, then flatten + $attrs_str = join(' ', + array_map('map_attrs', + array_keys($attrs), + array_values($attrs) ) ); + + $this->append_content( "<$element $attrs_str>" ); + + array_unshift( $this->stack, $el ); + } + + // Atom support many links per containging element. + // Magpie treats link elements of type rel='alternate' + // as being equivalent to RSS's simple link element. + // + elseif ($this->feed_type == ATOM and $el == 'link' ) + { + if ( isset($attrs['rel']) and $attrs['rel'] == 'alternate' ) + { + $link_el = 'link'; + } + else { + $link_el = 'link_' . $attrs['rel']; + } + + $this->append($link_el, $attrs['href']); + } + // set stack[0] to current element + else { + array_unshift($this->stack, $el); + } + } + + + + function feed_cdata ($p, $text) { + + if ($this->feed_type == ATOM and $this->incontent) + { + $this->append_content( $text ); + } + else { + $current_el = join('_', array_reverse($this->stack)); + $this->append($current_el, $text); + } + } + + function feed_end_element ($p, $el) { + $el = strtolower($el); + + if ( $el == 'item' or $el == 'entry' ) + { + $this->items[] = $this->current_item; + $this->current_item = array(); + $this->initem = false; + } + elseif ($this->feed_type == RSS and $this->current_namespace == '' and $el == 'textinput' ) + { + $this->intextinput = false; + } + elseif ($this->feed_type == RSS and $this->current_namespace == '' and $el == 'image' ) + { + $this->inimage = false; + } + elseif ($this->feed_type == ATOM and in_array($el, $this->_CONTENT_CONSTRUCTS) ) + { + $this->incontent = false; + } + elseif ($el == 'channel' or $el == 'feed' ) + { + $this->inchannel = false; + } + elseif ($this->feed_type == ATOM and $this->incontent ) { + // balance tags properly + // note: i don't think this is actually neccessary + if ( $this->stack[0] == $el ) + { + $this->append_content(""); + } + else { + $this->append_content("<$el />"); + } + + array_shift( $this->stack ); + } + else { + array_shift( $this->stack ); + } + + $this->current_namespace = false; + } + + function concat (&$str1, $str2="") { + if (!isset($str1) ) { + $str1=""; + } + $str1 .= $str2; + } + + + + function append_content($text) { + if ( $this->initem ) { + $this->concat( $this->current_item[ $this->incontent ], $text ); + } + elseif ( $this->inchannel ) { + $this->concat( $this->channel[ $this->incontent ], $text ); + } + } + + // smart append - field and namespace aware + function append($el, $text) { + if (!$el) { + return; + } + if ( $this->current_namespace ) + { + if ( $this->initem ) { + $this->concat( + $this->current_item[ $this->current_namespace ][ $el ], $text); + } + elseif ($this->inchannel) { + $this->concat( + $this->channel[ $this->current_namespace][ $el ], $text ); + } + elseif ($this->intextinput) { + $this->concat( + $this->textinput[ $this->current_namespace][ $el ], $text ); + } + elseif ($this->inimage) { + $this->concat( + $this->image[ $this->current_namespace ][ $el ], $text ); + } + } + else { + if ( $this->initem ) { + $this->concat( + $this->current_item[ $el ], $text); + } + elseif ($this->intextinput) { + $this->concat( + $this->textinput[ $el ], $text ); + } + elseif ($this->inimage) { + $this->concat( + $this->image[ $el ], $text ); + } + elseif ($this->inchannel) { + $this->concat( + $this->channel[ $el ], $text ); + } + + } + } + + function normalize () { + // if atom populate rss fields + if ( $this->is_atom() ) { + $this->channel['description'] = $this->channel['tagline']; + for ( $i = 0; $i < count($this->items); $i++) { + $item = $this->items[$i]; + if ( isset($item['summary']) ) + $item['description'] = $item['summary']; + if ( isset($item['atom_content'])) + $item['content']['encoded'] = $item['atom_content']; + + $atom_date = (isset($item['issued']) ) ? $item['issued'] : $item['modified']; + if ( $atom_date ) { + $epoch = @parse_w3cdtf($item['modified']); + if ($epoch and $epoch > 0) { + $item['date_timestamp'] = $epoch; + } + } + + $this->items[$i] = $item; + } + } + elseif ( $this->is_rss() ) { + $this->channel['tagline'] = $this->channel['description']; + for ( $i = 0; $i < count($this->items); $i++) { + $item = $this->items[$i]; + if ( isset($item['description'])) + $item['summary'] = $item['description']; + if ( isset($item['content']['encoded'] ) ) + $item['atom_content'] = $item['content']['encoded']; + + if ( $this->is_rss() == '1.0' and isset($item['dc']['date']) ) { + $epoch = @parse_w3cdtf($item['dc']['date']); + if ($epoch and $epoch > 0) { + $item['date_timestamp'] = $epoch; + } + } + elseif ( isset($item['pubdate']) ) { + $epoch = @strtotime($item['pubdate']); + if ($epoch > 0) { + $item['date_timestamp'] = $epoch; + } + } + + $this->items[$i] = $item; + } + } + } + + + function is_rss () { + if ( $this->feed_type == RSS ) { + return $this->feed_version; + } + else { + return false; + } + } + + function is_atom() { + if ( $this->feed_type == ATOM ) { + return $this->feed_version; + } + else { + return false; + } + } + + /** + * return XML parser, and possibly re-encoded source + * + */ + function create_parser($source, $out_enc, $in_enc, $detect) { + if ( substr(phpversion(),0,1) == 5) { + $parser = $this->php5_create_parser($in_enc, $detect); + } + else { + list($parser, $source) = $this->php4_create_parser($source, $in_enc, $detect); + } + if ($out_enc) { + $this->encoding = $out_enc; + xml_parser_set_option($parser, XML_OPTION_TARGET_ENCODING, $out_enc); + } + + return array($parser, $source); + } + + /** + * Instantiate an XML parser under PHP5 + * + * PHP5 will do a fine job of detecting input encoding + * if passed an empty string as the encoding. + * + * All hail libxml2! + * + */ + function php5_create_parser($in_enc, $detect) { + // by default php5 does a fine job of detecting input encodings + if(!$detect && $in_enc) { + return xml_parser_create($in_enc); + } + else { + return xml_parser_create(''); + } + } + + /** + * Instaniate an XML parser under PHP4 + * + * Unfortunately PHP4's support for character encodings + * and especially XML and character encodings sucks. As + * long as the documents you parse only contain characters + * from the ISO-8859-1 character set (a superset of ASCII, + * and a subset of UTF-8) you're fine. However once you + * step out of that comfy little world things get mad, bad, + * and dangerous to know. + * + * The following code is based on SJM's work with FoF + * @see http://minutillo.com/steve/weblog/2004/6/17/php-xml-and-character-encodings-a-tale-of-sadness-rage-and-data-loss + * + */ + function php4_create_parser($source, $in_enc, $detect) { + if ( !$detect ) { + return array(xml_parser_create($in_enc), $source); + } + + if (!$in_enc) { + if (preg_match('//m', $source, $m)) { + $in_enc = strtoupper($m[1]); + $this->source_encoding = $in_enc; + } + else { + $in_enc = 'UTF-8'; + } + } + + if ($this->known_encoding($in_enc)) { + return array(xml_parser_create($in_enc), $source); + } + + // the dectected encoding is not one of the simple encodings PHP knows + + // attempt to use the iconv extension to + // cast the XML to a known encoding + // @see http://php.net/iconv + + if (function_exists('iconv')) { + $encoded_source = iconv($in_enc,'UTF-8', $source); + if ($encoded_source) { + return array(xml_parser_create('UTF-8'), $encoded_source); + } + } + + // iconv didn't work, try mb_convert_encoding + // @see http://php.net/mbstring + if(function_exists('mb_convert_encoding')) { + $encoded_source = mb_convert_encoding($source, 'UTF-8', $in_enc ); + if ($encoded_source) { + return array(xml_parser_create('UTF-8'), $encoded_source); + } + } + + // else + $this->error("Feed is in an unsupported character encoding. ($in_enc) " . + "You may see strange artifacts, and mangled characters.", + E_USER_NOTICE); + + return array(xml_parser_create(), $source); + } + + function known_encoding($enc) { + $enc = strtoupper($enc); + if ( in_array($enc, $this->_KNOWN_ENCODINGS) ) { + return $enc; + } + else { + return false; + } + } + + function error ($errormsg, $lvl=E_USER_WARNING) { + // append PHP's error message if track_errors enabled + if ( $php_errormsg ) { + $errormsg .= " ($php_errormsg)"; + } + if ( MAGPIE_DEBUG ) { + trigger_error( $errormsg, $lvl); + } + else { + error_log( $errormsg, 0); + } + + $notices = E_USER_NOTICE|E_NOTICE; + if ( $lvl&$notices ) { + $this->WARNING = $errormsg; + } else { + $this->ERROR = $errormsg; + } + } + + +} // end class RSS + +function map_attrs($k, $v) { + return "$k=\"$v\""; +} + + +?> diff --git a/rss/magpie/rss_utils.inc b/rss/magpie/rss_utils.inc new file mode 100644 index 0000000000..2a29e72a96 --- /dev/null +++ b/rss/magpie/rss_utils.inc @@ -0,0 +1,67 @@ + + * Version: 0.51 + * License: GPL + * + * The lastest version of MagpieRSS can be obtained from: + * http://magpierss.sourceforge.net + * + * For questions, help, comments, discussion, etc., please join the + * Magpie mailing list: + * magpierss-general@lists.sourceforge.net + */ + + +/*======================================================================*\ + Function: parse_w3cdtf + Purpose: parse a W3CDTF date into unix epoch + + NOTE: http://www.w3.org/TR/NOTE-datetime +\*======================================================================*/ + +function parse_w3cdtf ( $date_str ) { + + # regex to match wc3dtf + $pat = "/(\d{4})-(\d{2})-(\d{2})T(\d{2}):(\d{2})(:(\d{2}))?(?:([-+])(\d{2}):?(\d{2})|(Z))?/"; + + if ( preg_match( $pat, $date_str, $match ) ) { + list( $year, $month, $day, $hours, $minutes, $seconds) = + array( $match[1], $match[2], $match[3], $match[4], $match[5], $match[6]); + + # calc epoch for current date assuming GMT + $epoch = gmmktime( $hours, $minutes, $seconds, $month, $day, $year); + + $offset = 0; + if ( $match[10] == 'Z' ) { + # zulu time, aka GMT + } + else { + list( $tz_mod, $tz_hour, $tz_min ) = + array( $match[8], $match[9], $match[10]); + + # zero out the variables + if ( ! $tz_hour ) { $tz_hour = 0; } + if ( ! $tz_min ) { $tz_min = 0; } + + $offset_secs = (($tz_hour*60)+$tz_min)*60; + + # is timezone ahead of GMT? then subtract offset + # + if ( $tz_mod == '+' ) { + $offset_secs = $offset_secs * -1; + } + + $offset = $offset_secs; + } + $epoch = $epoch + $offset; + return $epoch; + } + else { + return -1; + } +} + +?>