rss
/**
* Rss Parse Class ver0.1
*
* @link http://www.ugia.cn/?p=42
* @author: legend (PASiOcn@msn.com)
* @version 0.1
*/
class RssParse {
var $encoding = "utf-8";
var $rssurl = "http://www.ugia.cn/wp-rss2.php";
var $resource = "";
var $tag = "";
var $insidechannel = false;
var $insideitem = false;
var $insideimage = false;
var $item = array();
var $channel = array();
var $image = "";
var $items = array();
var $images = array();
function rssReset()
{
$this->item = array();
$this->channel = array();
$this->images = "";
$this->items = array();
$this->images = array();
}
function getResource()
{
$fp = @fopen($this->rssurl, "rb");
if (is_resource($fp)) {
while($data = fread($fp, 4096)) {
$ipd .= $data;
}
$this->resource = $ipd;
@fclose($fp);
return true;
}
return false;
}
function getEncoding()
{
if (preg_match('| encoding="([^"]*)"|', $this->resource, $result))
{
$this->encoding = strtolower($result[1]);
}
else
{
$this->encoding = "utf-8";
}
}
function parseRss($rssurl = '')
{
if (!empty($rssurl))
{
$this->rssurl = $rssurl;
}
if (!$this->getResource())
{
return false;
}
$this->getEncoding();
if ($this->encoding != "utf-8")
{
$this->resource = iconv($this->encoding, "UTF-8", $this->resource);
}
$xml_parser = xml_parser_create("utf-8");
xml_parser_set_option($xml_parser, XML_OPTION_CASE_FOLDING, false);
xml_set_object($xml_parser, $this);
xml_set_element_handler($xml_parser, "startElement", "endElement");
xml_set_character_data_handler($xml_parser, "characterData");
xml_parse($xml_parser, $this->resource, true);
xml_parser_free($xml_parser);
if ( count($this->channel) > 1)
{
$this->channel['pubdate'] = $this->mystrtotime($this->channel['pubdate']);
if ($this->channel['pubdate'] {
$this->channel['pubdate'] = $this->items[0]['pubdate'];
}
}
return true;
}
function getAll()
{
return array(
'channel' => $this->channel,
'items' => $this->items,
'images' => $this->images
);
}
function getChannel()
{
return $this->channel;
}
function getItems()
{
return $this->items;
}
function getImages()
{
return $this->images;
}
function startElement($parser, $name, $attrs)
{
if ($this->insideitem || $this->insideimage || $this->insidechannel)
{
$this->tag = strtolower($name);
}
switch ($name)
{
case "channel" : $this->insidechannel = true; break;
case "item" : $this->insideitem = true; break;
case "image" : $this->insideimage = true; break;
}
}
function endElement($parser, $name)
{
if ($name == "channel")
{
$this->insidechannel = false;
}
else if ($name == "url")
{
$this->images[] = trim($this->image);
$this->insideimage = false;
$this->image = "";
}
else if ($name == "item")
{
$this->item['pubdate'] = $this->mystrtotime($this->item['pubdate']);
$this->item['description'] = trim(strip_tags($this->item['description']));
$this->item['description'] = str_replace(" ", "", $this->item['description']);
/**
if (strlen($this->item['description']) > 700)
{
$this->item['description'] = substr($this->item['description'], 0, 697) . "…";
}
*/
$this->items[] = $this->item;
$this->item = array();
$this->insideitem = false;
}
}
function characterData($parser, $data)
{
if ($this->insideitem)
{
switch ($this->tag)
{
case "title": $this->item['title'] .= $data; break;
case "description": $this->item['description'] .= $data; break;
case "link": $this->item['link'] .= $data; break;
case "dc:date": $this->item['pubdate'] .= $data; break;
case "pubdate": $this->item['pubdate'] .= $data; break;
case "modified": $this->item['pubdate'] .= $data; break;
}
}
elseif ($this->insideimage && $this->tag == "url")
{
$this->image .= $data;
}
elseif ($this->insidechannel)
{
switch ($this->tag)
{
case "title": $this->channel['title'] .= $data; break;
case "description": $this->channel['description'] .= $data; break;
case "link": $this->channel['link'] .= $data; break;
case "dc:date": $this->channel['pubdate'] .= $data; break;
case "pubdate": $this->channel['pubdate'] .= $data; break;
case "lastbuilddate": $this->channel['pubdate'] .= $data; break;
case "modified": $this->channel['pubdate'] .= $data; break;
}
}
}
/**
* 日期格式太多,除了php中的strtotime()函数能够转化的,我另外加了一个格式的识别,其他的未写。
*/
function mystrtotime($time)
{
$curtime = strtotime($time);
if ($curtme {
if (preg_match("|\d{4}-\d{2}-\d{2}T\d{2}:\d{2}:\d{2}\+\d{2}:\d{2}|", $time, $result))
{
$time = str_replace(array("T", "+"), array(" ", " +"), $time);
$time[23] = "";
}
// if (………
$curtime = strtotime($time);
}
return $curtime;
}
function getError($msg)
{
die($msg);
}
}
?>