<?php /* RSS_PHP - the PHP DOM based RSS Parser Author: <rssphp.net> Published: 200801 :: blacknet :: via rssphp.net RSS_PHP is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY. Usage: See the documentation at http://rssphp.net/documentation Examples: Can be found online at http://rssphp.net/examples */ class rss_php { public $document; public $channel; public $items; /**************************** public load methods ***/ # load RSS by URL public function load($url=false, $unblock=true) { if($url) { if($unblock) { $this->loadParser(file_get_contents($url, false, $this->randomContext())); } else { $this->loadParser(file_get_contents($url)); } } } # load raw RSS data public function loadRSS($rawxml=false) { if($rawxml) { $this->loadParser($rawxml); } } /**************************** public load methods @param $includeAttributes BOOLEAN return array; ***/ # return full rss array public function getRSS($includeAttributes=false) { if($includeAttributes) { return $this->document; } return $this->valueReturner(); } # return channel data public function getChannel($includeAttributes=false) { if($includeAttributes) { return $this->channel; } return $this->valueReturner($this->channel); } # return rss items public function getItems($includeAttributes=false) { if($includeAttributes) { return $this->items; } return $this->valueReturner($this->items); } /**************************** internal methods ***/ private function loadParser($rss=false) { if($rss) { $this->document = array(); $this->channel = array(); $this->items = array(); $DOMDocument = new DOMDocument; $DOMDocument->strictErrorChecking = false; $DOMDocument->loadXML($rss); $this->document = $this->extractDOM($DOMDocument->childNodes); } } private function valueReturner($valueBlock=false) { if(!$valueBlock) { $valueBlock = $this->document; } foreach($valueBlock as $valueName => $values) { if(isset($values['value'])) { $values = $values['value']; } if(is_array($values)) { $valueBlock[$valueName] = $this->valueReturner($values); } else { $valueBlock[$valueName] = $values; } } return $valueBlock; } private function extractDOM($nodeList,$parentNodeName=false) { $itemCounter = 0; foreach($nodeList as $values) { if(substr($values->nodeName,0,1) != '#') { if($values->nodeName == 'item') { $nodeName = $values->nodeName.':'.$itemCounter; $itemCounter++; } else { $nodeName = $values->nodeName; } $tempNode[$nodeName] = array(); if($values->attributes) { for($i=0;$values->attributes->item($i);$i++) { $tempNode[$nodeName]['properties'][$values->attributes->item($i)->nodeName] = $values->attributes->item($i)->nodeValue; } } if(!$values->firstChild) { $tempNode[$nodeName]['value'] = $values->textContent; } else { $tempNode[$nodeName]['value'] = $this->extractDOM($values->childNodes, $values->nodeName); } if(in_array($parentNodeName, array('channel','rdf:RDF'))) { if($values->nodeName == 'item') { $this->items[] = $tempNode[$nodeName]['value']; } elseif(!in_array($values->nodeName, array('rss','channel'))) { $this->channel[$values->nodeName] = $tempNode[$nodeName]; } } } elseif(substr($values->nodeName,1) == 'text') { $tempValue = trim(preg_replace('/\s\s+/',' ',str_replace("\n",' ', $values->textContent))); if($tempValue) { $tempNode = $tempValue; } } elseif(substr($values->nodeName,1) == 'cdata-section'){ $tempNode = $values->textContent; } } return $tempNode; } private function randomContext() { $headerstrings = array(); $headerstrings['User-Agent'] = 'Mozilla/5.0 (Windows; U; Windows NT 5.'.rand(0,2).'; en-US; rv:1.'.rand(2,9).'.'.rand(0,4).'.'.rand(1,9).') Gecko/2007'.rand(10,12).rand(10,30).' Firefox/2.0.'.rand(0,1).'.'.rand(1,9); $headerstrings['Accept-Charset'] = rand(0,1) ? 'en-gb,en;q=0.'.rand(3,8) : 'en-us,en;q=0.'.rand(3,8); $headerstrings['Accept-Language'] = 'en-us,en;q=0.'.rand(4,6); $setHeaders = 'Accept: text/xml,application/xml,application/xhtml+xml,text/html;q=0.9,text/plain;q=0.8,image/png,*/*;q=0.5'."\r\n". 'Accept-Charset: '.$headerstrings['Accept-Charset']."\r\n". 'Accept-Language: '.$headerstrings['Accept-Language']."\r\n". 'User-Agent: '.$headerstrings['User-Agent']."\r\n"; $contextOptions = array( 'http'=>array( 'method'=>"GET", 'header'=>$setHeaders ) ); return stream_context_create($contextOptions); } } ?>