PHP Get Web Page Title and Content Function_PHP Tutorial

WBOY
Release: 2016-07-13 10:39:22
Original
816 people have browsed it

Sometimes we need to get the title and content of the web page, which is a collection function. I will briefly share it here for the convenience of friends who need it. The code is as follows: function getPageContent($url) { ​ ​ ​ //$url='http://www.ttphp.com; ​ ​        $pageinfo = array();           $pageinfo[content_type] = '';                         $pageinfo[charset] =             $pageinfo[title] = '';           $pageinfo[description] = '';             $pageinfo[keywords] = '';          $pageinfo[body] = '';        $pageinfo['httpcode'] = 200; $pageinfo['all'] = ''; ​             $ch = curl_init();          curl_setopt($ch, CURLOPT_USERAGENT, "Mozilla/4.0 (compatible; MSIE 5.01; Windows NT 5.0)");       curl_setopt($ch, CURLOPT_RETURNTRANSFER,1);       curl_setopt($ch, CURLOPT_SSL_VERIFYHOST, 0); ​ ​ curl_setopt($ch, CURLOPT_SSL_VERIFYPEER,0);       curl_setopt($ch, CURLOPT_TIMEOUT, 8);       curl_setopt($ch, CURLOPT_FILETIME, 1); ​ ​ curl_setopt($ch, CURLOPT_FOLLOWLOCATION, 1);          //curl_setopt($ch, CURLOPT_HEADER, 1);                curl_setopt($ch, CURLOPT_URL,$url);   ​        $curl_start = microtime(true);             $store = curl_exec ($ch);   ​ $curl_time = microtime(true) - $curl_start;                                                                                                                                                                                                             if( curl_error($ch) ) {                $pageinfo['httpcode'] = 505; //gate way error                          echo 'Curl error: ' . curl_error($ch) . "/n"; Return $ pageinfo;                               ​ ​ ​ //print_r(curl_getinfo($ch)); ​ $pageinfo['httpcode'] = curl_getinfo($ch,CURLINFO_HTTP_CODE); ​ ​ //echo curl_getinfo($ch,CURLINFO_CONTENT_TYPE)."/n"; ​ $pageinfo[content_type] = curl_getinfo($ch,CURLINFO_CONTENT_TYPE); If(intval($pageinfo['httpcode']) <> 200 or !preg_match('@text/html@',curl_getinfo($ch,CURLINFO_CONTENT_TYPE) ) ) {                            //print_r(curl_getinfo($ch) );                                                     //exit;                                     return $pageinfo;                               Preg_match('/charset=([^/s/n/r]+)/i',curl_getinfo($ch,CURLINFO_CONTENT_TYPE),$matches); //Get charset from header             if( trim($matches[1]) ) {                   $pageinfo[charset] = trim($matches[1]); } }           //echo $pageinfo[charset];            //exit;            curl_close ($ch);            //echo $store;                 //remove javascript            $store = preg_replace("/            $store = preg_replace("//smUi",'',$store);            //remove