Sina News Thief
1.
< ;title>Sina_News Crawler
$host="127.0.0.1"; // MYSQL host name
$namesql="????"; // MYSQL user name
$passsql="????"; // MYSQL password
$lib="news"; // Database name
$table="news"; // Database table name
$filename=" http://news.sina.com.cn/news1000/index.shtml"; // Fetched news page
$keytop="News start"; // News start keyword
$keybottom=" End of news"; // End of news keyword
$conn=mysql_connect($host,$namesql,$passsql);
mysql_select_db($lib,$conn);
$ fp=fopen($filename,"r",1);
$flag=0; $add=""; $found=0; $end=0; $i=0; $temp[4]=" ";
while(!$end==1){
while(!$flag==1){
$word=fgetc($fp);
$add=$add.$ word;
if($word=='<' and strlen($add)==1){ $flag=0; }
if($word=='<' and strlen($add )!=1){ $flag=1; $add=substr($add,0,strlen($add)-1); }
if($word=='>'){ $flag=1 ; } }
if(strchr($add,$keytop)){ $found=1; }
if(strchr($add,$keybottom)){ $found=0; $end= 1; }
if(((strchr($add,'<')==True and strchr($add,'href')==True) or strchr($add,'<')== False) and $found==1){ $text[$i]=$add; $i++; }
if($word=='<' and $flag==1){ $add=$word ; $flag=0; } else { $add=""; $flag=0; } }
fclose($fp);
for($i=1;$i$temp[1]=$text[$i+1]; $temp[2]=$text[$i+2]; $temp[3]= $text[$i+3]; $temp[4]=$text[$i+4];
$ins="select * from $table where TITLE='$temp[3]'";
$list=mysql_query($ins,$conn);
@$count=mysql_num_rows($list);
if($count==0){
$ins="insert into $table( TYPE,URL,TITLE,DATE) values ('$temp[1]','$temp[2]','$temp[3]','$temp[4]')";
$list= mysql_query($ins,$conn); } }
mysql_close($conn);
?>
2.
$handle = fopen("http://news.sina.com.cn/news1000/","r");
$sign = 0;
while(!feof($handle))
{
$message = fgets($handle,512);
if($sign == 1)
{
print( "$message");
}
if(ereg("News start",$message,$result))
{
$sign = 1;
}
else if (ereg("News End",$message,$result))
{
$sign = 0;
}
}
fclose($handle);
?>
http://www.bkjia.com/PHPjc/314626.htmlwww.bkjia.comtruehttp: //www.bkjia.com/PHPjc/314626.htmlTechArticleSina News Thief 1. html head meta http-equiv="Refresh" content="60" META HTTP-EQUIV ="Pragma" CONTENT="no-cache" meta http-equiv="Content-Type" content="text/html"; charset="?????...