5.x-1.x branch
Parse the incoming URL with SimpleXML then provide a data structure of the feed. Requires PHP5 because of SimpleXML.
| Name | Description |
|---|---|
| parser_common_syndication_feedapi_feed | Implementation of hook_feedapi_feed(). |
| parser_common_syndication_help | Implementation of hook_help(). |
| parser_common_syndication_nodeapi | Delete cache validating functions when feed is deleted |
| parser_common_syndication_requirements | Implementaton of hook_requirements(). |
| _parser_common_syndication_atom10_parse | Parse atom feeds. |
| _parser_common_syndication_cache_get | Get the cached version of the <var>$url</var> |
| _parser_common_syndication_cache_set | Store the parsed feed into the cache |
| _parser_common_syndication_download | Call one of the possible feedapi_get hook and pass back the downloaded data |
| _parser_common_syndication_feedapi_get | Get the content from the given URL. |
| _parser_common_syndication_feedapi_parse | Parse the feed into a data structure. |
| _parser_common_syndication_feed_format_detect | Determine the feed format of a SimpleXML parsed object structure. |
| _parser_common_syndication_link | Extract the link that points to the original content (back to site or origi |
| _parser_common_syndication_parse_date | Parse a date comes from a feed. |
| _parser_common_syndication_parse_w3cdtf | Parse the W3C date/time format, a subset of ISO 8601. |
| _parser_common_syndication_RDF10_parse | Parse RSS1.0/RDF feeds. |
| _parser_common_syndication_RSS20_parse | Parse RSS2.0 feeds. |
| _parser_common_syndication_sanitize_cache | Set the default caching directory if the current setting is not useable |
| _parser_common_syndication_title | Prepare raw data to be a title |
- <?php
-
- /**
- * @file
- * Parse the incoming URL with SimpleXML then provide a data structure of the feed.
- * Requires PHP5 because of SimpleXML.
- */
-
- /**
- * Implementation of hook_help().
- */
- function parser_common_syndication_help($section) {
- switch ($section) {
- case 'admin/modules#description':
- return t('Provide a common syndication parser for FeedAPI-compatible modules. Only PHP5-compatible. Rather fast.');
- case 'feedapi/full_name':
- return t('Parser Common Syndication - only for PHP5');
- }
- }
-
- /**
- * Implementation of hook_feedapi_feed().
- */
- function parser_common_syndication_feedapi_feed($op) {
- $args = func_get_args();
- switch ($op) {
- case 'type':
- return array("XML feed");
- case 'compatible':
- if (!function_exists('simplexml_load_string')) {
- return FALSE;
- }
- $url = $args[1]->url;
- $downloaded_string = _parser_common_syndication_download($url, $op);
- if (is_object($downloaded_string)) {
- return array_shift(parser_common_syndication_feedapi_feed('type'));
- }
- if (!defined('LIBXML_VERSION') || (version_compare(phpversion(), '5.1.0', '<'))) {
- @ $xml = simplexml_load_string($downloaded_string, NULL);
- }
- else {
- @ $xml = simplexml_load_string($downloaded_string, NULL, LIBXML_NOERROR | LIBXML_NOWARNING);
- }
- if (_parser_common_syndication_feed_format_detect($xml) != FALSE) {
- // The parser is compatible. Then has to parse the feed and cache it. Because in the download
- // part, the feed etag data be already saved perhaps (depends on the webserver).
- $parsed_feed = _parser_common_syndication_feedapi_parse($xml);
- if (is_object($parsed_feed) && $parsed_feed->from_cache !== TRUE) {
- _parser_common_syndication_cache_set($url, $parsed_feed);
- }
- // We don't have to choose between the types, because this module is only able to parse one.
- return array_shift(parser_common_syndication_feedapi_feed('type'));
- }
- return FALSE;
- case 'parse':
- $feed = is_object($args[1]) ? $args[1] : FALSE;
- $parsed_feed = _parser_common_syndication_feedapi_parse($feed);
- if (is_object($parsed_feed) && $parsed_feed->from_cache !== TRUE) {
- _parser_common_syndication_cache_set($feed->url, $parsed_feed);
- }
- return $parsed_feed;
- }
- }
-
- /**
- * Implementaton of hook_requirements().
- */
- function parser_common_syndication_requirements() {
- $t = get_t();
- if (!version_compare(5, PHP_VERSION, '<=') || !function_exists('simplexml_load_file')) {
- return array('Parser Common Syndication' => array('title' => t('SimpleXML library.'), 'description' => t('A fast XML parsing library. (From PHP5)'), 'severity' => REQUIREMENT_ERROR, 'value' => $t('Missing')));
- }
- return array('Parser Common Syndication' => array('title' => t('SimpleXML library.'), 'description' => t('A fast XML parsing library. (From PHP5)'), 'severity' => REQUIREMENT_OK, 'value' => $t('Available')));
- }
-
- /**
- * Parse the feed into a data structure.
- *
- * @param $feed
- * The feed object (contains the URL or the parsed XML structure)
- * @return stdClass
- * The structured datas extracted from the feed
- */
- function _parser_common_syndication_feedapi_parse($feed) {
- if (is_a($feed, 'SimpleXMLElement')) {
- $xml = $feed;
- }
- else {
- $downloaded_string = _parser_common_syndication_download($feed->url, 'parse');
- if ($downloaded_string === FALSE || is_object($downloaded_string)) {
- return $downloaded_string;
- }
-
- if (!defined('LIBXML_VERSION') || (version_compare(phpversion(), '5.1.0', '<'))) {
- @ $xml = simplexml_load_string($downloaded_string, NULL);
- }
- else {
- @ $xml = simplexml_load_string($downloaded_string, NULL, LIBXML_NOERROR | LIBXML_NOWARNING);
- }
-
- // Got a malformed XML.
- if ($xml === FALSE || $xml == NULL) {
- return FALSE;
- }
- }
- $feed_type = _parser_common_syndication_feed_format_detect($xml);
- if ($feed_type == "atom1.0") {
- return _parser_common_syndication_atom10_parse($xml);
- }
- if ($feed_type == "RSS2.0" || $feed_type == "RSS0.91" || $feed_type == "RSS0.92") {
- return _parser_common_syndication_RSS20_parse($xml);
- }
- if ($feed_type == "RDF") {
- return _parser_common_syndication_RDF10_parse($xml);
- }
- return FALSE;
- }
-
- /**
- * Get the cached version of the <var>$url</var>
- */
- function _parser_common_syndication_cache_get($url) {
- $cache_file = _parser_common_syndication_sanitize_cache() .'/'. md5($url);
- if (file_exists($cache_file)) {
- $file_content = file_get_contents($cache_file);
- return unserialize($file_content);
- }
- return FALSE;
- }
-
- /**
- * Store the parsed feed into the cache
- */
- function _parser_common_syndication_cache_set($url, $parsed_feed) {
- $cache_file = _parser_common_syndication_sanitize_cache() .'/'. md5($url);
- $cache_fp = fopen($cache_file, 'w');
- fwrite($cache_fp, serialize($parsed_feed));
- fclose($cache_fp);
- }
-
- /**
- * Get the content from the given URL.
- *
- * @param $url
- * A valid URL (not only web URLs).
- * @param $username
- * If the URL use authentication, here you can supply the username for this.
- * @param $password
- * If the URL use authentication, here you can supply the password for this.
- * @return
- * The data pulled from the URL or FALSE if the feed does not need refresh.
- */
- function _parser_common_syndication_feedapi_get($url, $username = NULL, $password = NULL) {
- $method = 'GET';
- $follow = 3;
- $data = NULL;
- // Only download and parse data if really needs refresh. Based on Last-Modified and If-Modified-Since
- $headers = array();
- $validate = db_fetch_array(db_query("SELECT etag, last_modified FROM {parser_common_syndication} WHERE url = '%s'", $url));
- if (!empty($validate['etag'])) {
- $headers['If-None-Match'] = $validate['etag'];
- }
- if (!empty($validate['last_modified'])) {
- $headers['If-Modified-Since'] = $validate['last_modified'];
- }
- if (!empty($username)) {
- $headers['Authorization'] = 'Basic '. base64_encode("$username:$password");
- }
- $result = drupal_http_request($url, $headers, $method, $data, $follow);
- // In this case return the cached data
- if ($result->code == 304) {
- $cached_data = _parser_common_syndication_cache_get($url);
- if (is_object($cached_data)) {
- $cached_data->from_cache = TRUE;
- return $cached_data;
- }
- else {
- // It's a tragedy, this file has to be exist and contains good data. In this case, repeat the stuff without cache
- db_query("DELETE FROM {parser_common_syndication} WHERE url = '%s'", $url);
- return _parser_common_syndication_feedapi_get($url, $username, $password);
- }
- }
-
- if (db_result(db_query("SELECT COUNT(*) FROM {parser_common_syndication} WHERE url = '%s'", $url)) == 0) {
- db_query("INSERT INTO {parser_common_syndication} (etag, last_modified, url) VALUES ('%s', '%s', '%s')", $result->headers['ETag'], $result->headers['Last-Modified'], $url);
- }
- else {
- db_query("UPDATE {parser_common_syndication} SET etag = '%s', last_modified = '%s' WHERE url = '%s'", $result->headers['ETag'], $result->headers['Last-Modified'], $url);
- }
-
-
- return $result->data;
- }
- /**
- * Delete cache validating functions when feed is deleted
- */
- function parser_common_syndication_nodeapi(&$node, $op) {
- if (isset($node->feed) || feedapi_enabled_type($node->type)) {
- switch ($op) {
- case 'delete':
- db_query("DELETE FROM {parser_common_syndication} WHERE url = '%s'", $node->feed->url);
- $cache_dir = _parser_common_syndication_sanitize_cache();
- $cache_filename = $cache_dir .'/'. md5($node->feed->url);
- if (file_exists($cache_filename)) {
- unlink($cache_filename);
- }
- break;
- }
- }
- }
-
- /**
- * Determine the feed format of a SimpleXML parsed object structure.
- *
- * @param $xml
- * SimpleXML-preprocessed feed.
- * @return
- * The feed format short description or FALSE if not compatible.
- */
- function _parser_common_syndication_feed_format_detect($xml) {
- if (!is_object($xml)) {
- return FALSE;
- }
- $attr = $xml->attributes();
- $type = strtolower($xml->getName());
- if (isset($xml->entry) && $type == "feed") {
- return "atom1.0";
- }
- if ($type == "rss" && $attr["version"] == "2.0") {
- return "RSS2.0";
- }
- if ($type == "rdf" && isset($xml->channel)) {
- return "RDF";
- }
- if ($type == "rss" && $attr["version"] == "0.91") {
- return "RSS0.91";
- }
- if ($type == "rss" && $attr["version"] == "0.92") {
- return "RSS0.92";
- }
- return FALSE;
- }
-
- /**
- * Call one of the possible feedapi_get hook and pass back the downloaded data
- *
- * @return
- * string - the downloaded data, FALSE - if the URL is not reachable
- */
- function _parser_common_syndication_download($url, $op) {
- if (valid_url($url, TRUE)) {
- // Handle password protected feeds.
- $url_parts = parse_url($url);
- $password = $username = NULL;
- if (!empty($url_parts['user'])) {
- $password = $url_parts['pass'];
- $username = $url_parts['user'];
- }
- }
-
- $downloaded_string = _parser_common_syndication_feedapi_get($url, $username, $password, $op);
-
- // Cannot get the feed, pass the problem to one level upper
- if ($downloaded_string == "") {
- return FALSE;
- }
- // The data comes from cache, just pass one level upper
- else if (is_object($downloaded_string)) {
- return $downloaded_string;
- }
-
- // Do the autodiscovery at this level, pass back the real data
- // Maybe it's HTML. If it's not HTML, not worth to take a look into the downloaded string
- if (strpos(strtolower($downloaded_string), "<html") !== FALSE) {
- $allowed_mime = array("text/xml", "application/rss+xml", "application/atom+xml", "application/rdf+xml", "application/xml");
- $matches = array();
- // Get all the links tag
- preg_match_all('/<link\s+(.*?)\s*\/?>/si', $downloaded_string, $matches);
- $links = $matches[1];
- $rss_link = FALSE;
- foreach ($links as $link) {
- $mime = array();
- // Get the type attribute and check if the mime type is allowed.
- preg_match_all('/type\s*=\s*("|' ."'". ')([A-Za-z\/+]*)("|' ."'". ')/si', $link, $mime);
- if (in_array(array_pop($mime[2]), $allowed_mime)) {
- $href = array();
- // Get the href attribute.
- preg_match_all('/href\s*=\s*("|' ."'". ')([=#\?_:.0-9A-Za-z\/+]*)("|' ."'". ')/si', $link, $href);
- $rss_link = array_pop($href[2]);
- if (is_string($rss_link) && strlen($rss_link) > 0 && $rss_link != $url) {
- // Handle base url related stuff.
- $parsed_url = parse_url($rss_link);
- if (!isset($parsed_url['host'])) {
- // It's relative so make it absolute.
- $base_tag = array();
- preg_match_all('/<base href\s*=\s*("|'. "'" .')([_:.0-9A-Za-z\/+]*)("|'. "'" .')/si', $link, $base_tag);
- $base_url = array_pop($base_tag[2]);
- if (is_string($base_url) && strlen($base_url) > 0) {
- // Get from the HTML base tag.
- $rss_link = $base_url . $rss_link;
- }
- else {
- // Guess from the original URL.
- $original_url = parse_url($url);
- $rss_link = $original_url['scheme'] .'://'. $original_url['host'] . (isset($original_url['port']) ? ':' : '') . $original_url['port'] . $parsed_url['path'] .'?'. $parsed_url['query'] .'#'. $parsed_url['fragment'];
- }
- }
- $downloaded_string = _parser_common_syndication_download($rss_link, $op);
- break;
- }
- }
- }
- }
-
- // Filter out strange tags. Without this, the text would contain strange stuff.
- // @todo: make sure that these are not important for feed element mapper
- $downloaded_string = preg_replace(array('@<script[^>]*?.*?</script>@si', '@<object[^>]*?.*?</object>@si', '@<embed[^>]*?.*?</embed>@si', '@<applet[^>]*?.*?</applet>@si', '@<noframes[^>]*?.*?</noframes>@si', '@<noscript[^>]*?.*?</noscript>@si', '@<noembed[^>]*?.*?</noembed>@si'), array('', '', '', '', '', '', ''), $downloaded_string);
-
- // Ugly hack to be able to retrieve the xml:base property, no way to access xml:lang inside <feed>
- $downloaded_string = preg_replace('/xml:base *=/', 'base=', $downloaded_string);
- return $downloaded_string;
- }
-
- /**
- * Parse atom feeds.
- */
- function _parser_common_syndication_atom10_parse($feed_XML) {
- $parsed_source = new stdClass();
-
- $base = (string) array_shift(($feed_XML->xpath("@base")));
- if (!valid_url($base, TRUE)) {
- $base = FALSE;
- }
-
- // Detect the title
- $parsed_source->title = isset($feed_XML->title) ? _parser_common_syndication_title("{$feed_XML->title}") : "";
- // Detect the description
- $parsed_source->description = isset($feed_XML->subtitle) ? "{$feed_XML->subtitle}" : "";
- $parsed_source->options = new stdClass();
-
- $parsed_source->options->link = _parser_common_syndication_link($feed_XML->link);
- if (valid_url($parsed_source->options->link) && !valid_url($parsed_source->options->link, TRUE) && !empty($base)) {
- $parsed_source->options->link = $base . $parsed_source->options->link;
- }
-
- $parsed_source->items = array();
-
- foreach ($feed_XML->entry as $news) {
- $original_url = NULL;
-
- $guid = !empty($news->id) ? "{$news->id}" : NULL;
-
- // I don't know how standard this is, but sometimes the id is the URL.
- if (valid_url($guid, TRUE)) {
- $original_url = $guid;
- }
-
- $additional_taxonomies = array();
-
- if (isset($news->category)) {
- $additional_taxonomies['ATOM Categories'] = array();
- $additional_taxonomies['ATOM Domains'] = array();
- foreach ($news->category as $category) {
- $additional_taxonomies['ATOM Categories'][] = "{$category['term']}";
- if (isset($category['scheme'])) {
- $domain = "{$category['scheme']}";
- if (!empty($domain)) {
- if (!isset($additional_taxonomies['ATOM Domains'][$domain])) {
- $additional_taxonomies['ATOM Domains'][$domain] = array();
- }
- $additional_taxonomies['ATOM Domains'][$domain][] = count($additional_taxonomies['ATOM Categories']) - 1;
- }
- }
- }
- }
- $title = "{$news->title}";
-
- if (!empty($news->content)) {
- $body = '';
- foreach ($news->content->children() as $child) {
- $body .= $child->asXML();
- }
- $body .= "{$news->content}";
- }
- else if (!empty($news->summary)) {
- $body = '';
- foreach ($news->summary->children() as $child) {
- $body .= $child->asXML();
- }
- $body .= "{$news->summary}";
- }
-
- if (!empty($news->content['src'])) {
- // some src elements in some valid atom feeds contained no urls at all
- if (valid_url("{$news->content['src']}")) {
- $original_url = "{$news->content['src']}";
- }
- }
-
- $author_found = FALSE;
-
- if (!empty($news->source->author->name)) {
- $original_author = "{$news->source->author->name}";
- $author_found = TRUE;
- }
- else if (!empty($news->author->name)) {
- $original_author = "{$news->author->name}";
- $author_found = TRUE;
- }
-
- if (!empty($feed_XML->author->name) && !$author_found) {
- $original_author = "{$feed_XML->author->name}";
- }
-
- $original_url = _parser_common_syndication_link($news->link);
-
- $item = new stdClass();
- $item->title = _parser_common_syndication_title($title);
- $item->description = $body;
- $item->options = new stdClass();
- $item->options->original_author = $original_author;
- $item->options->timestamp = _parser_common_syndication_parse_date(isset($news->published) ? "{$news->published}" : "{$news->issued}");
- $item->options->original_url = $original_url;
- if (valid_url($item->options->original_url) && !valid_url($item->options->original_url, TRUE) && !empty($base)) {
- $item->options->original_url = $base . $item->options->original_url;
- }
- $item->options->guid = $guid;
- $item->options->tags = $additional_taxonomies['ATOM Categories'];
- $item->options->domains = $additional_taxonomies['ATOM Domains'];
- $parsed_source->items[] = $item;
- }
- return $parsed_source;
- }
-
- /**
- * Parse RSS1.0/RDF feeds.
- */
- function _parser_common_syndication_RDF10_parse($feed_XML) {
- $parsed_source = new stdClass();
- // Detect the title.
- $parsed_source->title = isset($feed_XML->channel->title) ? _parser_common_syndication_title("{$feed_XML->channel->title}") : "";
- // Detect the description.
- $parsed_source->description = isset($feed_XML->channel->description) ? "{$feed_XML->channel->description}" : "";
- $parsed_source->options = new stdClass();
- // Detect the link.
- $parsed_source->options->link = isset($feed_XML->channel->link) ? "{$feed_XML->channel->link}" : "";
- $parsed_source->items = array();
-
- // Set category splitter (space is for del.icio.us feed).
- $category_splitter = ' ';
-
- // Get the default original author.
- if ($feed_XML->channel->title) {
- $oa = "{$feed_XML->channel->title}";
- }
-
- // Get all namespaces.
- if (version_compare(phpversion(), '5.1.2', '<')) {
- // Versions prior 5.1.2 don't allow namespaces.
- $namespaces['default'] = NULL;
- }
- else {
- $namespaces = $feed_XML->getNamespaces(TRUE);
- }
-
- foreach ($feed_XML->item as $news) {
- // Initialization.
- $guid = $original_url = NULL;
- $title = $body = '';
- $additional_taxonomies = array();
- $original_author = $oa;
-
- foreach ($namespaces as $ns_link) {
- // Get about attribute as guid.
- foreach ($news->attributes($ns_link) as $name => $value) {
- if ($name == 'about') {
- $guid = "{$value}";
- }
- }
-
- // Get children for current namespace.
- if (version_compare(phpversion(), '5.1.2', '<')) {
- $ns = (array) $news;
- }
- else {
- $ns = (array) $news->children($ns_link);
- }
-
- // Title
- if (!empty($ns['title'])) {
- $title = "{$ns['title']}";
- }
-
- // Description or dc:description
- if (!empty($ns['description']) && $body == '') {
- $body = "{$ns['description']}";
- }
-
- // Link
- if (!empty($ns['link'])) {
- $original_url = "{$ns['link']}";
- }
-
- // dc:creator
- if (!empty($ns['creator'])) {
- $original_author = "{$ns['creator']}";
- }
-
- // content:encoded
- if (!empty($ns['encoded'])) {
- $body = "{$ns['encoded']}";
- }
-
- // dc:subject
- if (!empty($ns['subject'])) {
- // There can be multiple category tags.
- if (is_array($ns['subject'])) {
- foreach ($ns['subject'] as $cat) {
- if (is_object($cat)) {
- $additional_taxonomies['RDF Categories'][] = trim(strip_tags($cat->asXML()));
- }
- else {
- $additional_taxonomies['RDF Categories'][] = $cat;
- }
- }
- }
- else { //or single tag
- $additional_taxonomies['RDF Categories'] = explode($category_splitter, "{$ns['subject']}");
- }
- }
- }
-
- // The description is not mandatory so use title if description not present.
- if (empty($body)) {
- $body = $title;
- }
-
- // If there are no link tag but rdf:about is provided.
- if (empty($original_url) && !empty($guid)) {
- $original_url = $guid;
- }
- $item = new stdClass();
- $item->title = _parser_common_syndication_title($title);
- $item->description = $body;
- $item->options = new stdClass();
- $item->options->original_author = $original_author;
- $item->options->timestamp = _parser_common_syndication_parse_date(empty($ns['pubDate']) ? "{$ns['date']}" : "{$ns['date']}");
- $item->options->original_url = $original_url;
- $item->options->guid = $guid;
- $item->options->tags = $additional_taxonomies['RDF Categories'];
- $parsed_source->items[] = $item;
- }
- return $parsed_source;
- }
-
- /**
- * Parse RSS2.0 feeds.
- */
- function _parser_common_syndication_RSS20_parse($feed_XML) {
- $parsed_source = new stdClass();
- // Detect the title.
- $parsed_source->title = isset($feed_XML->channel->title) ? _parser_common_syndication_title("{$feed_XML->channel->title}") : "";
- // Detect the description.
- $parsed_source->description = isset($feed_XML->channel->description) ? "{$feed_XML->channel->description}" : "";
- $parsed_source->options = new stdClass();
- // Detect the link.
- $parsed_source->options->link = isset($feed_XML->channel->link) ? "{$feed_XML->channel->link}" : "";
- $parsed_source->items = array();
-
- foreach ($feed_XML->xpath('//item') as $news) {
- // for PHP > 5.1.2 get 'content' namespace
- $category = $news->xpath('category');
- // Get children for current namespace.
- if (version_compare(phpversion(), '5.1.2', '>')) {
- $content = (array)$news->children('http://purl.org/rss/1.0/modules/content/');
- }
- $news = (array)$news;
- $news['category'] = $category;
-
- if (isset($news['guid'])) {
- $guid = $news['guid'];
- }
- else {
- $guid = NULL;
- }
-
- if (isset($news['title'])) {
- $title = "{$news['title']}";
- }
- else {
- $title = '';
- }
-
- if (isset($news['description'])) {
- $body = "{$news['description']}";
- }
- // Some sources use content:encoded as description i.e. PostNuke PageSetter module.
- elseif (isset($news['encoded'])) { // content:encoded for PHP < 5.1.2.
- $body = "{$news['encoded']}";
- }
- elseif (isset($content['encoded'])) { // content:encoded for PHP >= 5.1.2.
- $body = "{$content['encoded']}";
- }
- else {
- $body = "{$news['title']}";
- }
-
- if (!empty($feed_XML->channel->title)) {
- $original_author = "{$feed_XML->channel->title}";
- }
-
- if (!empty($news['link'])) {
- $original_url = "{$news['link']}";
- }
- else {
- $original_url = NULL;
- }
-
- $additional_taxonomies = array();
- $additional_taxonomies['RSS Categories'] = array();
- $additional_taxonomies['RSS Domains'] = array();
- if (isset($news['category'])) {
- foreach ($news['category'] as $category) {
- $additional_taxonomies['RSS Categories'][] = "{$category}";
- if (isset($category['domain'])) {
- $domain = "{$category['domain']}";
- if (!empty($domain)) {
- if (!isset($additional_taxonomies['RSS Domains'][$domain])) {
- $additional_taxonomies['RSS Domains'][$domain] = array();
- }
- $additional_taxonomies['RSS Domains'][$domain][] = count($additional_taxonomies['RSS Categories']) - 1;
- }
- }
- }
- }
-
- $item = new stdClass();
- $item->title = _parser_common_syndication_title($title);
- $item->description = $body;
- $item->options = new stdClass();
- $item->options->original_author = $original_author;
- $item->options->timestamp = _parser_common_syndication_parse_date($news['pubDate']);
- $item->options->original_url = $original_url;
- $item->options->guid = $guid;
- $item->options->tags = $additional_taxonomies['RSS Categories'];
- $item->options->domains = $additional_taxonomies['RSS Domains'];
- $parsed_source->items[] = $item;
- }
- return $parsed_source;
- }
-
- /**
- * Set the default caching directory if the current setting is not useable
- */
- function _parser_common_syndication_sanitize_cache() {
- $cache_location = file_directory_path() .'/parser_common_syndication_cache';
- if (!is_writeable($cache_location) || !is_dir($cache_location)) {
- $cache_location = file_create_path($cache_location);
- if (!file_exists($cache_location) && is_writable(file_directory_path())) {
- mkdir($cache_location);
- }
- if (!is_writeable($cache_location)) {
- return FALSE;
- }
- }
- return $cache_location;
- }
-
- /**
- * Parse a date comes from a feed.
- *
- * @param $date_string
- * The date string in various formats.
- * @return
- * The timestamp of the string or the current time if can't be parsed
- */
- function _parser_common_syndication_parse_date($date_str) {
- $parsed_date = strtotime($date_str);
- if ($parsed_date === FALSE || $parsed_date == -1) {
- $parsed_date = _parser_common_syndication_parse_w3cdtf($date_str);
- }
- return $parsed_date === FALSE ? time() : $parsed_date;
- }
-
- /**
- * Parse the W3C date/time format, a subset of ISO 8601.
- *
- * PHP date parsing functions do not handle this format.
- * See http://www.w3.org/TR/NOTE-datetime for more information.
- * Originally from MagpieRSS (http://magpierss.sourceforge.net/).
- *
- * @param $date_str
- * A string with a potentially W3C DTF date.
- * @return
- * A timestamp if parsed successfully or FALSE if not.
- */
- function _parser_common_syndication_parse_w3cdtf($date_str) {
- if (preg_match('/(\d{4})-(\d{2})-(\d{2})T(\d{2}):(\d{2})(:(\d{2}))?(?:([-+])(\d{2}):?(\d{2})|(Z))?/', $date_str, $match)) {
- list($year, $month, $day, $hours, $minutes, $seconds) = array($match[1], $match[2], $match[3], $match[4], $match[5], $match[6]);
- // Calculate the epoch for current date assuming GMT.
- $epoch = gmmktime($hours, $minutes, $seconds, $month, $day, $year);
- if ($match[10] != 'Z') { // Z is zulu time, aka GMT
- list($tz_mod, $tz_hour, $tz_min) = array($match[8], $match[9], $match[10]);
- // Zero out the variables.
- if (!$tz_hour) {
- $tz_hour = 0;
- }
- if (!$tz_min) {
- $tz_min = 0;
- }
- $offset_secs = (($tz_hour * 60) + $tz_min) * 60;
- // Is timezone ahead of GMT? If yes, subtract offset.
- if ($tz_mod == '+') {
- $offset_secs *= -1;
- }
- $epoch += $offset_secs;
- }
- return $epoch;
- }
- else {
- return FALSE;
- }
- }
-
- /**
- * Extract the link that points to the original content (back to site or origi
- *
- * @param $links
- * Array of SimpleXML objects
- */
- function _parser_common_syndication_link($links) {
- $to_link = '';
- if (count($links) > 0) {
- foreach ($links as $link) {
- $link = $link->attributes();
- $to_link = isset($link["href"]) ? "{$link["href"]}" : "";
- if (isset($link["rel"])) {
- if ("{$link["rel"]}" == 'alternate') {
- break;
- }
- }
- }
- }
- return $to_link;
- }
-
- /**
- * Prepare raw data to be a title
- */
- function _parser_common_syndication_title($title) {
- return html_entity_decode(strip_tags($title), ENT_QUOTES, 'UTF-8');
- }
-