Sometimes we need to get the title and content of the web page, which is a collection function. I will briefly share it here for the convenience of friends who need it.
The code is as follows:
function getPageContent($url) {
//$url='http://www.ttphp.com;
$pageinfo = array();
$pageinfo[content_type] = '';
$pageinfo[charset] =
$pageinfo[title] = '';
$pageinfo[description] = '';
$pageinfo[keywords] = '';
$pageinfo[body] = '';
$pageinfo['httpcode'] = 200;
$pageinfo['all'] = '';
$ch = curl_init();
curl_setopt($ch, CURLOPT_USERAGENT, "Mozilla/4.0 (compatible; MSIE 5.01; Windows NT 5.0)");
curl_setopt($ch, CURLOPT_RETURNTRANSFER,1);
curl_setopt($ch, CURLOPT_SSL_VERIFYHOST, 0);
curl_setopt($ch, CURLOPT_SSL_VERIFYPEER,0);
curl_setopt($ch, CURLOPT_TIMEOUT, 8);
curl_setopt($ch, CURLOPT_FILETIME, 1);
curl_setopt($ch, CURLOPT_FOLLOWLOCATION, 1);
//curl_setopt($ch, CURLOPT_HEADER, 1);
curl_setopt($ch, CURLOPT_URL,$url);
$curl_start = microtime(true);
$store = curl_exec ($ch);
$curl_time = microtime(true) - $curl_start;
if( curl_error($ch) ) {
$pageinfo['httpcode'] = 505; //gate way error
echo 'Curl error: ' . curl_error($ch) . "/n";
Return $ pageinfo;
//print_r(curl_getinfo($ch));
$pageinfo['httpcode'] = curl_getinfo($ch,CURLINFO_HTTP_CODE);
//echo curl_getinfo($ch,CURLINFO_CONTENT_TYPE)."/n";
$pageinfo[content_type] = curl_getinfo($ch,CURLINFO_CONTENT_TYPE);
If(intval($pageinfo['httpcode']) <> 200 or !preg_match('@text/html@',curl_getinfo($ch,CURLINFO_CONTENT_TYPE) ) ) {
//print_r(curl_getinfo($ch) );
//exit;
return $pageinfo;
Preg_match('/charset=([^/s/n/r]+)/i',curl_getinfo($ch,CURLINFO_CONTENT_TYPE),$matches); //Get charset from header
if( trim($matches[1]) ) {
$pageinfo[charset] = trim($matches[1]);
} }
//echo $pageinfo[charset];
//exit;
curl_close ($ch);
//echo $store;
//remove javascript
$store = preg_replace("/