This program is actually an important part of the "thief program". This section of the program is just the section for downloading remote images. The program is relatively simple to write, and most parts are commented.
Copy code The code is as follows:
if (preg_match_all("/http://[^ "']+[. jpg|.gif|.jpeg|.png]+/ui",stripcslashes($content),$aliurl)){
$i=0; //Multiple files++
while(list($ key ,$v) = each($aliurl[0])){
//echo $v."
";
$filetype = pathinfo($v, PATHINFO_EXTENSION); // Get the suffix name
$ff = @file_get_contents($v); //Get the binary file content
if(!stripos($v,"jb51.net")){//Determine whether it is your own website The picture below
if (!empty($ff)){ //Perform the following operations after obtaining the file
$dir = "upload/".date("Ymd")."/";// Specify a new storage path
if (!file_exists($dir)){//Determine whether the directory exists
@mkdir($dir,511,true); //Create a multi-level directory, 511 is converted into decimal 777 has executable permissions
}
$nfn = $dir.date("Ymdhis").$i.".".$filetype; //New name of build file
$nf = @fopen ($nfn,"w"); //Create file
fwrite($nf,$ff); //Write file
fclose($nf); //Close file
$i++; / /Multiple files++
echo "
";
$content = str_replace($v,$nfn, $content);//Replace content Parameters
}else{//If the image cannot be obtained, replace it with the default image
$content = str_replace($v,"/upload/201204/20120417213810742.gif", $content);//Replace content Parameters
}
}
}
}
PHP implementation code for downloading images to local through regular expressions
Copy code The code is as follows:
/*
author: ssh_kobe
date: 20110602
shortage: If in the web page If the image path is not an absolute path, it cannot be crawled
*/
set_time_limit(0);//The crawling is not limited by time
$URL='http://pp.baidu. com/';//Any URL
get_pic($URL);
function get_pic($pic_url) {
//Get the image binary stream
$data=CurlGet( $pic_url);
/*Use regular expressions to get image links*/
$pattern_src = '/<[img|IMG].*?src=['|"](.*?(?: [.gif|.jpg]))['|"].*?[/]?>/';
$num = preg_match_all($pattern_src, $data, $match_src);
$arr_src= $match_src[1];//Get the image array
get_name($arr_src);
echo "
finished!!!";
return 0;
}
/*Get the picture type and save it to the same directory as the file*/
function get_name($pic_arr)
{
//Picture type
$pattern_type = '/ (/.(jpg|bmp|jpeg|gif|png))/';
foreach($pic_arr as $pic_item){//Loop out the address of each picture
$num = preg_match_all( $pattern_type, $pic_item, $match_type);
$pic_name = get_unique().$match_type[1][0];//Change the microsecond timestamp naming
//Save the picture in the form of a stream
$write_fd = @fopen($pic_name,"wb");
@fwrite($write_fd, CurlGet($pic_item));
@fclose($write_fd);
echo "[OK] ..!";
}
return 0;
}
//Get the unique ID through microsecond time
function get_unique(){
list($msec, $sec) = explode(" ",microtime());
return $sec.intval($msec*1000000);
}
//Capture web page content
function CurlGet ($url){
$url=str_replace('&','&',$url);
$curl = curl_init();
curl_setopt($curl, CURLOPT_URL, $url);
curl_setopt($curl, CURLOPT_HEADER, false);
//curl_setopt($curl, CURLOPT_REFERER,$url);
curl_setopt($curl, CURLOPT_USERAGENT, "Mozilla/4.0 (compatible; MSIE 6.0 ; SeaPort/1.2; Windows NT 5.1; SV1; InfoPath.2)");
curl_setopt($curl, CURLOPT_COOKIEJAR, 'cookie.txt');
curl_setopt($curl, CURLOPT_COOKIEFILE, 'cookie.txt' );
curl_setopt($curl, CURLOPT_RETURNTRANSFER, 1);
curl_setopt($curl, CURLOPT_FOLLOWLOCATION, 0);
$values = curl_exec($curl);
curl_close($curl);
return $values;
}
?>
http://www.bkjia.com/PHPjc/324775.htmlwww.bkjia.comtruehttp: //www.bkjia.com/PHPjc/324775.htmlTechArticleThis program is actually an important part of the "thief program". This section of the program is just the section for downloading remote images. The program is relatively simple to write, and most parts are annotated. ...