PHP downloads images to local implementation code through regular expression_PHP tutorial

WBOY
Release: 2016-07-21 15:24:48
Original
920 people have browsed it

Copy code The code is as follows:

/*
author: ssh_kobe
date: 20110602
shortage: If the image path in the web page is not an absolute path, it cannot be crawled
*/
set_time_limit(0);//The crawling is not limited by time

$URL=' http://pp.baidu.com/';//Any URL

get_pic($URL);

function get_pic($pic_url) {
//Get the image binary stream
$data=CurlGet($pic_url);
/*Use regular expressions to get image links*/
$pattern_src = '/<[img|IMG].*?src=['|" ](.*?(?:[.gif|.jpg]))['|"].*?[/]?>/';
$num = preg_match_all($pattern_src, $data, $match_src );
$arr_src=$match_src[1];//Get the image array
get_name($arr_src);

echo "
finished!!!";
return 0;
}

/*Get the picture type and save it to the same directory as the file*/
function get_name($pic_arr)
{
//Picture type
$pattern_type = '/(/.(jpg|bmp|jpeg|gif|png))/';

foreach($pic_arr as $pic_item){//Loop to get the address of each picture
$num = preg_match_all($pattern_type, $pic_item, $match_type);
$pic_name = get_unique().$match_type[1][0];//Change the microsecond timestamp naming
/ /Save the picture in the form of stream
$write_fd = @fopen($pic_name,"wb");
@fwrite($write_fd, CurlGet($pic_item));
@fclose($write_fd);
echo "[OK]..!";
}
return 0;
}

//Get the unique ID through microsecond time
function get_unique(){
list($msec, $sec) = explode(" ",microtime());
return $sec.intval($msec*1000000);
}

//catch Get web page content
function CurlGet($url){
$url=str_replace('&','&',$url);
$curl = curl_init();
curl_setopt($curl , CURLOPT_URL, $url);
curl_setopt($curl, CURLOPT_HEADER, false);

//curl_setopt($curl, CURLOPT_REFERER,$url);
curl_setopt($curl, CURLOPT_USERAGENT, " Mozilla/4.0 (compatible; MSIE 6.0; SeaPort/1.2; Windows NT 5.1; SV1; InfoPath.2)");
curl_setopt($curl, CURLOPT_COOKIEJAR, 'cookie.txt');
curl_setopt($curl , CURLOPT_COOKIEFILE, 'cookie.txt');
curl_setopt($curl, CURLOPT_RETURNTRANSFER, 1);
curl_setopt($curl, CURLOPT_FOLLOWLOCATION, 0);
$values ​​= curl_exec($curl);
curl_close($curl);
return $values;
}
?>

www.bkjia.comtruehttp: //www.bkjia.com/PHPjc/324220.htmlTechArticleCopy the code as follows: ?php /* author: ssh_kobe date: 20110602 shortage: If the image path in the web page is not Absolute path, you cannot crawl */ set_time_limit(0);//The crawl is not affected by time...
source:php.cn
Statement of this Website
The content of this article is voluntarily contributed by netizens, and the copyright belongs to the original author. This site does not assume corresponding legal responsibility. If you find any content suspected of plagiarism or infringement, please contact admin@php.cn
Popular Tutorials
More>
Latest Downloads
More>
Web Effects
Website Source Code
Website Materials
Front End Template