RE: PHP RSS help

Hello:

I have taken over a project for a co-worker and have had a change to the URL in my RSS feed. The change is as follows:

The old RSS structure on which your current PHP is based looks like this:
http://www.khq.com/Global/category.asp?C=180510&clienttype=rss

The new structure looks like this:
http://www.khq.com/category/180510/local-news?clienttype=rss

The key changes are:

· The name of the category page or story page is now in the URL directly following the object ID number.

· The ampersand for the rss client type call has been replaced with a question mark.

That being said here is the code for the parser class:

<?php // Sets the correct locale setlocale(LC_ALL, 'en_US.UTF8'); class Parser { var $story_id_url = 'http://api.worldnow.com/feed/v2.0/categories/181615/stories'; //var $full_story_url ='http://www.khq.com/Global/story.asp?S={story_id}&clienttype=rssstory'; // var $full_story_url = 'http://www.khq.com/category/181615/local-news-only-for-fox-feed?S={story_id}clienttype=rssstory'; var $full_story_url= 'http://www.khq.com/category/181615/local-news-only-for-fox-feed?S={story_id}clienttype=rss'; '; var $full_story_url_id_placeholder = '{story_id}'; var $valid_image_extensions = array('jpg','jpeg','gif','png'); var $console = TRUE; /* * Perform the full fetching and parsing of the story ID's and * corresponding stories. */ function parse() { // Get the story ID's $story_ids = $this->get_story_ids(); // Get the stories $stories = $this->get_stories($story_ids); return $stories; } /* * Grabs the contents of the story headline feed and parses out the story * ID's for individual story parsing. */ function get_story_ids() { // Grab the raw data $raw_data = $this->get_url_contents($this->story_id_url); // Convert the raw data into objects $xml = new SimpleXMLElement($raw_data); // Create an array for the story ID's $story_ids = array(); // Grab each story ID from the raw data foreach($xml->story as $story) { $story_ids[] = $story->id; } // Delete the xml object unset($xml); // CONSOLE: Echo the story ID's if($this->console == TRUE) { echo "Story ID's retrieved: ".count($story_ids)."\n"; } // Return array of story ID's return $story_ids; } /* * Grabs all the individal stories from the array of story ID's that is * fed to this method. It outputs an array of the stories. */ function get_stories($ids) { // Start stories array $stories = array(); // CONSOLE: Echo the console header and start counter if($this->console == TRUE) { echo "\n-------------------------------\n\nSTART RETRIEVING AND RENDERING STORIES\n\n"; $i=1; } // Process each story ID foreach($ids as $id) { // Generate the URL to pull the raw data $url = str_replace($this->full_story_url_id_placeholder,$id,$this->full_story_url); // Grab the raw data $raw_data = $this->get_url_contents($url); // Convert the raw data into objects $xml = new SimpleXMLElement($raw_data,LIBXML_NOCDATA); // CONSOLE: Echo the story details if($this->console == TRUE) { echo $i.' '.$id." - ".(string)$xml->channel->item->title."\n"; } // Put story contents into array $stories[] = array( 'title' => (string)$xml->channel->item->title, 'slug' => $this->generate_slug((string)$xml->channel->item->title), 'id' => (int)$id, 'category' => (string)$xml->channel->category, 'pubDate' => date( 'Y-m-d H:i:s', strtotime((string)$xml->channel->item->pubDate) ), 'story' => $this->render_story( (string)$xml->channel->item->description, $xml->channel->item->enclosure ), 'hash' => $this->generate_story_hash( (string)$xml->channel->item->title, (string)$xml->channel->item->pubDate, (string)$xml->channel->item->description ) ); // CONSOLE: Increment counter if($this->console == TRUE) { $i++; } } // CONSOLE: Print final results if($this->console == TRUE) { echo "\nSuccessfully retrieved ".($i-1)." stories!\n\n"; } return $stories; } /* * Render the assets from the story, including text and images */ function render_story($story_contents,$assets) { // Start the output array $output = array(); // Get the images $output['images'] = $this->render_story_assets($story_contents,$assets); // Echo the status of the stories if($this->console == TRUE) { echo " images: ".count($output['images'])."\n\n"; } // Get the story text $output['text'] = $this->render_story_text($story_contents); return $output; } /* * Render out the images from the story body and assets */ function render_story_assets($story_contents,$assets) { // Start output array $output = array(); // Isolate all the image tags in the story body preg_match_all('/(img|src)\=(\"|\')[^\"\'\>]+/i', $story_contents, $images); $data = preg_replace('/(img|src)(\"|\'|\=\"|\=\')(.*)/i',"$3",$images[0]); // Add link of image to output array if it's a valid image foreach($data as $url) { // Get the info for the file $info = pathinfo($url); // Check to make sure the extension is valid if(isset($info['extension'])) { if(in_array($info['extension'],$this->valid_image_extensions)) { // Extension is valid - Add it to the output array $output[] = (string)$url; } } } // Grab the images from the story xml assets if(!empty($assets)) { foreach($assets as $asset) { $output[] = (string)$asset['url']; } } return $output; } /* * Render out the text from the story, removing any images and extra * paragraphs, images, etc. */ function render_story_text($story_contents) { // Remove extra blank paragraphs $story_contents = str_replace('

 

','',$story_contents); // Remove extra line breaks $story_contents = str_replace("\n",'',$story_contents); // Remove HTML tags $story_contents = strip_tags($story_contents,'


Sponsor our Newsletter | Privacy Policy | Terms of Service