//Initialize a cURL object
代码如下 |
复制代码 |
// 初始化一个 cURL 对象
$curl = curl_init();
// 设置你需要抓取的URL
curl_setopt($curl, CURLOPT_URL, 'http://www.bKjia.c0m');
// 设置header, 最后一个参数是0表示返回值不带有header,1表示带有header
curl_setopt($curl, CURLOPT_HEADER, 0);
// 设置浏览器的特定header,可选,如果目标网站有要求的话
curl_setopt($ch, CURLOPT_HTTPHEADER, array(
"User-Agent: {'Mozilla/5.0 (Windows; U; Windows NT 6.1; en-US; rv:1.9.1.6) Gecko/20091201 Firefox/3.5.6 (.NET CLR 3.5.30729)'}",
"Accept-Language: {en-us,en;q=0.5}"
));
//或者只设置user-agent,可选,如果目标网站有要求的话
curl_setopt($curl, CURLOPT_USERAGENT, "Mozilla/5.0 (Windows; U; Windows NT 5.1; en-US; rv:1.8.1.1) Gecko/20061204 Firefox/2.0.0.1");
// 设置cURL 参数,要求结果保存到字符串中还是输出到屏幕上,1表示保存到字符串
curl_setopt($curl, CURLOPT_RETURNTRANSFER, 1);
// 页面内容不需要时,设置为1. 默认为0
curl_setopt($ch, CURLOPT_NOBODY, 1);
// 运行cURL,请求网页,保存在$data
$data = curl_exec($curl);
// 关闭URL请求
curl_close($curl);
//检查错误
//比较的时候我们用的是“=== FALSE”,而非“== FALSE”,因为我们得区分’空输出‘和’布尔值FALSE‘
if ($output === FALSE) {
echo "cURL Error: " . curl_error($ch);
}
//可以获取信息
$info = curl_getinfo($ch);
echo '获取'. $info['url'] . '耗时'. $info['total_time'] . '秒';
/* ...返回的数组中包括了以下信息:
“url” //资源网络地址
“content_type” //内容编码
“http_code” //HTTP状态码
“header_size” //header的大小
“request_size” //请求的大小
“filetime” //文件创建时间
“ssl_verify_result” //SSL验证结果
“redirect_count” //跳转技术
“total_time” //总耗时
“namelookup_time” //DNS查询耗时
“connect_time” //等待连接耗时
“PRetransfer_time” //传输前准备耗时
“size_upload” //上传数据的大小
“size_download” //下载数据的大小
“speed_download” //下载速度
“speed_upload” //上传速度
“download_content_length”//下载内容的长度
“upload_content_length” //上传内容的长度
“starttransfer_time” //开始传输的时间
“redirect_time”//重定向耗时
*/ |
$curl = curl_init();
//Set the URL you need to crawl
curl_setopt($curl, CURLOPT_URL, 'http://www.bKjia.c0m');
//Set the header. The last parameter is 0, which means the return value does not have a header, and 1, which means it has a header
curl_setopt($curl, CURLOPT_HEADER, 0);
// Set the specific header of the browser, optional, if the target website requires it
curl_setopt($ch, CURLOPT_HTTPHEADER, array(
"User-Agent: {'Mozilla/5.0 (Windows; U; Windows NT 6.1; en-US; rv:1.9.1.6) Gecko/20091201 Firefox/3.5.6 (.NET CLR 3.5.30729)'}",
"Accept-Language: {en-us,en;q=0.5}"
));
//Or just set user-agent, optional, if the target website requires it
curl_setopt($curl, CURLOPT_USERAGENT, "Mozilla/5.0 (Windows; U; Windows NT 5.1; en-US; rv:1.8.1.1) Gecko/20061204 Firefox/2.0.0.1");
//Set the cURL parameters to ask whether the results are saved in a string or output to the screen. 1 means saving to a string
curl_setopt($curl, CURLOPT_RETURNTRANSFER, 1);
// When the page content is not needed, set it to 1. The default is 0
curl_setopt($ch, CURLOPT_NOBODY, 1);
// Run cURL, request the web page, and save it in $data
$data = curl_exec($curl);
// Close URL request
curl_close($curl);
//Check for errors
//When comparing, we use "=== FALSE" instead of "== FALSE" because we have to distinguish between 'empty output' and 'Boolean value FALSE'
if ($output === FALSE) {
echo "cURL Error: " . curl_error($ch);
}
//Information can be obtained
$info = curl_getinfo($ch);
echo 'Get'. $info['url'] . 'Time consuming'. $info['total_time'] . 'Seconds';
/* ...The returned array includes the following information:
"url" //Resource network address
“content_type” //Content encoding
"http_code" //HTTP status code
“header_size” //Header size
"request_size" //Request size
"filetime" //File creation time
"ssl_verify_result" //SSL verification result
“redirect_count” //Jump technology
“total_time” //Total time spent
"namelookup_time" //DNS query time
“connect_time” //Time spent waiting for connection
“PRetransfer_time” //The preparation time before transfer
“size_upload” //Size of uploaded data
“size_download” //Size of downloaded data
“speed_download” //Download speed
“speed_upload” //Upload speed
"download_content_length" //The length of the download content
“upload_content_length” //The length of the uploaded content
“starttransfer_time” //Time to start transfer
"redirect_time"//redirect time
*/ |