Home > php教程 > php手册 > php获取网页标题和内容函数

php获取网页标题和内容函数

WBOYWBOYWBOYWBOYWBOYWBOYWBOYWBOYWBOYWBOYWBOYWBOYWB
Release: 2016-06-13 09:43:53
Original
1302 people have browsed it

有时候我们需要获取网页的标题与内容,就是个采集函数,这里简单分享下,方便需要的朋友  代码如下: function getPageContent($url) {              //$url='http://www.ttphp.com;              $pageinfo = array();            $pageinfo[content_type] = '';            $pageinfo[charset] = '';            $pageinfo[title] = '';            $pageinfo[description] = '';            $pageinfo[keywords] = '';            $pageinfo[body] = '';            $pageinfo['httpcode'] = 200;            $pageinfo['all'] = '';               $ch = curl_init();            curl_setopt($ch, CURLOPT_USERAGENT, "Mozilla/4.0 (compatible; MSIE 5.01; Windows NT 5.0)");            curl_setopt($ch, CURLOPT_RETURNTRANSFER,1);            curl_setopt($ch, CURLOPT_SSL_VERIFYHOST, 0);            curl_setopt($ch, CURLOPT_SSL_VERIFYPEER,0);            curl_setopt($ch, CURLOPT_TIMEOUT, 8);            curl_setopt($ch, CURLOPT_FILETIME, 1);            curl_setopt($ch, CURLOPT_FOLLOWLOCATION, 1);            //curl_setopt($ch, CURLOPT_HEADER, 1);                  curl_setopt($ch, CURLOPT_URL,$url);              $curl_start = microtime(true);            $store = curl_exec ($ch);              $curl_time = microtime(true) - $curl_start;            if( curl_error($ch) ) {                $pageinfo['httpcode'] = 505;  //gate way error                echo 'Curl error: ' . curl_error($ch) ."/n";                return $pageinfo;            }              //print_r(curl_getinfo($ch));            $pageinfo['httpcode'] = curl_getinfo($ch,CURLINFO_HTTP_CODE);            //echo curl_getinfo($ch,CURLINFO_CONTENT_TYPE)."/n";            $pageinfo[content_type] = curl_getinfo($ch,CURLINFO_CONTENT_TYPE);            if(intval($pageinfo['httpcode']) 200 or !preg_match('@text/html@',curl_getinfo($ch,CURLINFO_CONTENT_TYPE) )   ) {                    //print_r(curl_getinfo($ch) );                    //exit;                    return $pageinfo;            }            preg_match('/charset=([^/s/n/r]+)/i',curl_getinfo($ch,CURLINFO_CONTENT_TYPE),$matches); //从header 里取charset            if( trim($matches[1]) ) {                $pageinfo[charset] = trim($matches[1]);            }            //echo $pageinfo[charset];            //exit;            curl_close ($ch);            //echo $store;                 //remove javascript            $store = preg_replace("/            $store = preg_replace("//smUi",'',$store);            //remove

source:php.cn
Statement of this Website
The content of this article is voluntarily contributed by netizens, and the copyright belongs to the original author. This site does not assume corresponding legal responsibility. If you find any content suspected of plagiarism or infringement, please contact admin@php.cn
Latest Issues
php data acquisition?
From 1970-01-01 08:00:00
0
0
0
PHP extension intl
From 1970-01-01 08:00:00
0
0
0
How to learn php well
From 1970-01-01 08:00:00
0
0
0
Popular Recommendations
Popular Tutorials
More>
Latest Downloads
More>
Web Effects
Website Source Code
Website Materials
Front End Template