84669 orang belajar
152542 orang belajar
20005 orang belajar
5487 orang belajar
7821 orang belajar
359900 orang belajar
3350 orang belajar
180660 orang belajar
48569 orang belajar
18603 orang belajar
40936 orang belajar
1549 orang belajar
1183 orang belajar
32909 orang belajar
尝试抓取的页面,结果:
换其他链接不会出现这样的问题,请问如何解决这个问题??
有没有能够正常抓取到 http://www.chwfsc.com/ 这个网站内容的范例??
闭关修行中......
mb_http_output('utf-8');$url="http://www.chwfsc.com/";$ch=curl_init();curl_setopt($ch,CURLOPT_URL,$url);curl_setopt($ch,CURLOPT_HEADER,0);curl_setopt($ch,CURLOPT_NOBODY, true);curl_setopt($ch,CURLOPT_RETURNTRANSFER,1);curl_setopt($ch,CURLOPT_ENCODING,'utf8');curl_setopt($ch,CURLOPT_POST,1);curl_setopt($ch,CURLOPT_USERAGENT,'Mozilla/5.0 (Windows NT 6.1) AppleWebKit/536.11 (KHTML, like Gecko) Chrome/20.0.1132.47 Safari/536.11');$html=curl_exec($ch);curl_close($ch);var_dump($html);可以了,亲测可用
封装个curl试试
function task() { $url = "url"; $headers = randIp(); $curl = curl_init(); curl_setopt($curl, CURLOPT_URL, $url); curl_setopt($curl, CURLOPT_BINARYTRANSFER, true); curl_setopt($curl, CURLOPT_HTTPHEADER, $headers); curl_setopt($curl, CURLOPT_FOLLOWLOCATION, 1); curl_setopt($curl, CURLOPT_SSL_VERIFYPEER, false); curl_setopt($curl, CURLOPT_SSL_VERIFYHOST, 2); curl_setopt($curl, CURLOPT_USERAGENT, "Mozilla/5.0 (Windows NT 6.1; WOW64; rv:12.0) Gecko/20100101 Firefox/12.0"); //模拟浏览器类型 curl_setopt($curl, CURLOPT_TIMEOUT, 300); // 设置超时限制防止死循环 curl_setopt($curl, CURLOPT_HEADER, 0); // 显示返回的Header区域内容 curl_setopt($curl, CURLOPT_RETURNTRANSFER, 1); // 获取的信息以文件流的形式返回 $tmpInfo = curl_exec($curl); if (curl_errno($curl)) { print "Error: " . curl_error($curl); } else { curl_close($curl); } } //此函数提供了国内的IP地址 function randIP(){ $ip_long = array( array('607649792', '608174079'), //36.56.0.0-36.63.255.255 array('1038614528', '1039007743'), //61.232.0.0-61.237.255.255 array('1783627776', '1784676351'), //106.80.0.0-106.95.255.255 array('2035023872', '2035154943'), //121.76.0.0-121.77.255.255 array('2078801920', '2079064063'), //123.232.0.0-123.235.255.255 array('-1950089216', '-1948778497'), //139.196.0.0-139.215.255.255 array('-1425539072', '-1425014785'), //171.8.0.0-171.15.255.255 array('-1236271104', '-1235419137'), //182.80.0.0-182.92.255.255 array('-770113536', '-768606209'), //210.25.0.0-210.47.255.255 array('-569376768', '-564133889'), //222.16.0.0-222.95.255.255 ); $rand_key = mt_rand(0, 9); $ip= long2ip(mt_rand($ip_long[$rand_key][0], $ip_long[$rand_key][1])); $headers['CLIENT-IP'] = $ip; $headers['X-FORWARDED-FOR'] = $ip; $headerArr = array(); foreach( $headers as $n => $v ) { $headerArr[] = $n .':' . $v; } return $headerArr; }
mb_http_output('utf-8');
$url="http://www.chwfsc.com/";
$ch=curl_init();
curl_setopt($ch,CURLOPT_URL,$url);
curl_setopt($ch,CURLOPT_HEADER,0);
curl_setopt($ch,CURLOPT_NOBODY, true);
curl_setopt($ch,CURLOPT_RETURNTRANSFER,1);
curl_setopt($ch,CURLOPT_ENCODING,'utf8');
curl_setopt($ch,CURLOPT_POST,1);
curl_setopt($ch,CURLOPT_USERAGENT,'Mozilla/5.0 (Windows NT 6.1) AppleWebKit/536.11 (KHTML, like Gecko) Chrome/20.0.1132.47 Safari/536.11');
$html=curl_exec($ch);
curl_close($ch);
var_dump($html);
可以了,亲测可用
封装个curl试试