Home > php教程 > php手册 > body text

很好用的php rss解析类

WBOY
Release: 2016-06-21 09:06:26
Original
945 people have browsed it

rss

/**
* Rss Parse Class ver0.1
*
* @link http://www.ugia.cn/?p=42
* @author: legend (PASiOcn@msn.com)
* @version 0.1
*/

class RssParse {

    var $encoding      = "utf-8";
    var $rssurl        = "http://www.ugia.cn/wp-rss2.php";

    var $resource      = "";
    var $tag           = "";

    var $insidechannel = false;
    var $insideitem    = false;
    var $insideimage   = false;

    var $item          = array();
    var $channel       = array();
    var $image         = "";

    var $items         = array();
    var $images        = array();

    function rssReset()
    {
        $this->item    = array();
        $this->channel = array();
        $this->images  = "";
        $this->items   = array();
        $this->images  = array();
    }

    function getResource()
    {
        $fp = @fopen($this->rssurl, "rb");

        if (is_resource($fp)) {

            while($data = fread($fp, 4096)) {
                $ipd .= $data;
            }
            $this->resource = $ipd;
            @fclose($fp);

            return true;
        }

        return false;
    }

    function getEncoding()
    {
        if (preg_match('| encoding="([^"]*)"|', $this->resource, $result))
        {
            $this->encoding = strtolower($result[1]);
        }
        else
        {
            $this->encoding = "utf-8";
        }
    }

    function parseRss($rssurl = '')
    {
        if (!empty($rssurl))
        {
            $this->rssurl = $rssurl;
        }

        if (!$this->getResource())
        {
            return false;
        }

        $this->getEncoding();

        if ($this->encoding != "utf-8")
        {
            $this->resource = iconv($this->encoding, "UTF-8", $this->resource);
        }

        $xml_parser = xml_parser_create("utf-8");

        xml_parser_set_option($xml_parser, XML_OPTION_CASE_FOLDING, false);
        xml_set_object($xml_parser, $this);
        xml_set_element_handler($xml_parser, "startElement", "endElement");
        xml_set_character_data_handler($xml_parser, "characterData");

        xml_parse($xml_parser, $this->resource, true);
        xml_parser_free($xml_parser);

        if ( count($this->channel) > 1)
        {
            $this->channel['pubdate'] = $this->mystrtotime($this->channel['pubdate']);
            if ($this->channel['pubdate']            {
                $this->channel['pubdate'] = $this->items[0]['pubdate'];
            }
        }
        return true;
    }

    function getAll()
    {
        return array(
                     'channel' => $this->channel,
                     'items'   => $this->items,
                     'images'  => $this->images
                    );
    }

    function getChannel()
    {
        return $this->channel;
    }

    function getItems()
    {
        return $this->items;
    }

    function getImages()
    {
        return $this->images;
    }

    function startElement($parser, $name, $attrs)
    {
        if ($this->insideitem || $this->insideimage || $this->insidechannel)
        {
            $this->tag = strtolower($name);
        }

        switch ($name)
        {
            case "channel" : $this->insidechannel = true; break;
            case "item"    : $this->insideitem    = true; break;
            case "image"   : $this->insideimage   = true; break;
        }
    }

    function endElement($parser, $name)
    {
        if ($name == "channel")
        {
            $this->insidechannel = false;

        }
        else if ($name == "url")
        {
            $this->images[]    = trim($this->image);
            $this->insideimage = false;
            $this->image       = "";
        }
        else if ($name == "item")
        {
            $this->item['pubdate']     = $this->mystrtotime($this->item['pubdate']);
            $this->item['description'] = trim(strip_tags($this->item['description']));
            $this->item['description'] = str_replace(" ", "", $this->item['description']);

            /**
            if (strlen($this->item['description']) > 700)
            {
                $this->item['description'] = substr($this->item['description'], 0, 697) . "…";
            }
            */

            $this->items[]         = $this->item;
            $this->item            = array();
            $this->insideitem      = false;
        }
    }

    function characterData($parser, $data)
    {
        if ($this->insideitem)
        {
            switch ($this->tag)
            {
                case "title":       $this->item['title']       .= $data; break;
                case "description": $this->item['description'] .= $data; break;
                case "link":        $this->item['link']        .= $data; break;
                case "dc:date":     $this->item['pubdate']     .= $data; break;
                case "pubdate":     $this->item['pubdate']     .= $data; break;
                case "modified":     $this->item['pubdate']     .= $data; break;
            }
        }
        elseif ($this->insideimage && $this->tag == "url")
        {
            $this->image .= $data;
        }
        elseif ($this->insidechannel)
        {
            switch ($this->tag)
            {
                case "title":         $this->channel['title']       .= $data; break;
                case "description":   $this->channel['description'] .= $data; break;
                case "link":          $this->channel['link']        .= $data; break;
                case "dc:date":       $this->channel['pubdate']     .= $data; break;
                case "pubdate":       $this->channel['pubdate']     .= $data; break;
                case "lastbuilddate": $this->channel['pubdate']     .= $data; break;
                case "modified":      $this->channel['pubdate']     .= $data; break;
            }
        }
    }

    /**
     * 日期格式太多,除了php中的strtotime()函数能够转化的,我另外加了一个格式的识别,其他的未写。
     */
    function mystrtotime($time)
    {
        $curtime = strtotime($time);
        if ($curtme        {
            if (preg_match("|\d{4}-\d{2}-\d{2}T\d{2}:\d{2}:\d{2}\+\d{2}:\d{2}|", $time, $result))
            {
                $time = str_replace(array("T", "+"), array(" ", " +"), $time);
                $time[23] = "";
            }

            // if (………

            $curtime = strtotime($time);
        }

        return $curtime;
    }

   function getError($msg)
   {
       die($msg);
   }
}
?>



Related labels:
source:php.cn
Statement of this Website
The content of this article is voluntarily contributed by netizens, and the copyright belongs to the original author. This site does not assume corresponding legal responsibility. If you find any content suspected of plagiarism or infringement, please contact admin@php.cn
Popular Recommendations
Popular Tutorials
More>
Latest Downloads
More>
Web Effects
Website Source Code
Website Materials
Front End Template