我想实现通过淘宝号查询淘宝买家信誉的功能,就像131458.com的一样。
我的思路是通过curl采集这个网站的数据。但是却被防了,返回给我的是无用的信息。
代码如下:
1 | $parm = $data [ 'parm' ]. "&_=" .time(). "326" ;
|
Nach dem Login kopieren
请问还要添加什么,才能模拟浏览器访问,采集到正常数据。
回复讨论(解决方案)
抓包看看呢?一般都是header里面需要啥,就都传过去
首先,你的方法不对,则是chrome F12下跟踪的结果,CURL命令如下:
curl "http://www.131458.com/handler/TaobaoInfo.ashx?tbNickInfoJson=hades&token=736905fff9ee639a4b5f46b53526434b_0b32d3aec47ad29fef2d7ddb67831933&_=1447405734036" -H "Cookie: ASP.NET_SessionId=4ki3s2yhj2555bzxlbvdqhju; Hm_lvt_ccc93bebd5e7bdc84975186073891702=1447405635; Hm_lpvt_ccc93bebd5e7bdc84975186073891702=1447405635; bdshare_firstime=1447405634805" -H "Accept-Encoding: gzip, deflate, sdch" -H "Accept-Language: zh-CN,zh;q=0.8" -H "User-Agent: Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/46.0.2490.86 Safari/537.36" -H "Accept: application/json, text/javascript, */*; q=0.01" -H "Referer: http://www.131458.com/" -H "X-Requested-With: XMLHttpRequest" -H "Connection: keep-alive" --compressed
一次不好完成的curl.
截图:
1 | <?php $tb_nick_name = '1心一易' ; $cookie_file = './cookies.txt' ; $post = array ( 'nick' => escape( $tb_nick_name )); $url = 'http://www.131458.com/handler/load.aspx/Load' ;// 初始化 $curl = curl_init( $url ); $header = array (); $header [] = 'Content-Type: application/json; charset=UTF-8' ; $header [] = 'User-Agent: Mozilla/5.0 (Windows NT 6.1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/42.0.2311.90 Safari/537.36' ;curl_setopt( $curl , CURLOPT_HTTPHEADER, $header );curl_setopt( $curl , CURLOPT_HEADER, 0);curl_setopt( $curl , CURLOPT_RETURNTRANSFER, 1);curl_setopt( $curl , CURLOPT_POST, 1);curl_setopt( $curl , CURLOPT_POSTFIELDS, json_encode( $post ));curl_setopt( $curl , CURLOPT_COOKIEJAR, $cookie_file );curl_setopt( $curl , CURLOPT_FOLLOWLOCATION, 1); $response = curl_exec( $curl );curl_close( $curl ); $d = json_decode( $response )->d; $_token = md5( $d . 'vvl81' ); $url = 'http://www.131458.com/handler/TaobaoInfo.ashx?nickCode=' . escape( $tb_nick_name ) . '&token=' . $_token . '_' . $d . '&_=' . time() . rand(100, 999); $curl = curl_init( $url ); $header = array (); $header [] = 'Content-Type: application/json; charset=UTF-8' ; $header [] = 'User-Agent: Mozilla/5.0 (Windows NT 6.1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/42.0.2311.90 Safari/537.36' ;curl_setopt( $curl , CURLOPT_HTTPHEADER, $header );curl_setopt( $curl , CURLOPT_HEADER, 0);curl_setopt( $curl , CURLOPT_RETURNTRANSFER, 1);curl_setopt( $curl , CURLOPT_COOKIEFILE, $cookie_file ); $contents = curl_exec( $curl );curl_close( $curl ); echo htmlspecialchars( $contents ); function escape( $string ){ $n = $bn = $tn = 0; $output = '' ; $special = "-_.+@/*0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ" ; while ( $n < strlen ( $string )) { $ascii = ord( $string [ $n ]); if ( $ascii == 9 || $ascii == 10 || (32 <= $ascii && $ascii <= 126)) { $tn = 1; $n ++; } elseif (194 <= $ascii && $ascii <= 223) { $tn = 2; $n += 2; } elseif (224 <= $ascii && $ascii <= 239) { $tn = 3; $n += 3; } elseif (240 <= $ascii && $ascii <= 247) { $tn = 4; $n += 4; } elseif (248 <= $ascii && $ascii <= 251) { $tn = 5; $n += 5; } elseif ( $ascii == 252 || $ascii == 253) { $tn = 6; $n += 6; } else { $n ++; } $singleStr = substr ( $string , $bn , $tn ); $charVal = bin2hex(iconv( 'utf-8' , 'ucs-2' , $singleStr )); if ( base_convert ( $charVal , 16, 10) > 0xff) { if (! preg_match( "/win/i" , PHP_OS)) $charVal = substr ( $charVal , 2, 2) . substr ( $charVal , 0, 2); $output .= '%u' . $charVal ; } else { if (false !== strpos ( $special , $singleStr )) $output .= $singleStr ; else $output .= "%" . dechex (ord( $string [ $bn ])); } $bn = $n ; } return $output ;}
|
Nach dem Login kopieren
Nach dem Login kopieren
Nach dem Login kopieren
一次不好完成的curl.
截图:
1 | <?php $tb_nick_name = '1心一易' ; $cookie_file = './cookies.txt' ; $post = array ( 'nick' => escape( $tb_nick_name )); $url = 'http://www.131458.com/handler/load.aspx/Load' ;// 初始化 $curl = curl_init( $url ); $header = array (); $header [] = 'Content-Type: application/json; charset=UTF-8' ; $header [] = 'User-Agent: Mozilla/5.0 (Windows NT 6.1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/42.0.2311.90 Safari/537.36' ;curl_setopt( $curl , CURLOPT_HTTPHEADER, $header );curl_setopt( $curl , CURLOPT_HEADER, 0);curl_setopt( $curl , CURLOPT_RETURNTRANSFER, 1);curl_setopt( $curl , CURLOPT_POST, 1);curl_setopt( $curl , CURLOPT_POSTFIELDS, json_encode( $post ));curl_setopt( $curl , CURLOPT_COOKIEJAR, $cookie_file );curl_setopt( $curl , CURLOPT_FOLLOWLOCATION, 1); $response = curl_exec( $curl );curl_close( $curl ); $d = json_decode( $response )->d; $_token = md5( $d . 'vvl81' ); $url = 'http://www.131458.com/handler/TaobaoInfo.ashx?nickCode=' . escape( $tb_nick_name ) . '&token=' . $_token . '_' . $d . '&_=' . time() . rand(100, 999); $curl = curl_init( $url ); $header = array (); $header [] = 'Content-Type: application/json; charset=UTF-8' ; $header [] = 'User-Agent: Mozilla/5.0 (Windows NT 6.1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/42.0.2311.90 Safari/537.36' ;curl_setopt( $curl , CURLOPT_HTTPHEADER, $header );curl_setopt( $curl , CURLOPT_HEADER, 0);curl_setopt( $curl , CURLOPT_RETURNTRANSFER, 1);curl_setopt( $curl , CURLOPT_COOKIEFILE, $cookie_file ); $contents = curl_exec( $curl );curl_close( $curl ); echo htmlspecialchars( $contents ); function escape( $string ){ $n = $bn = $tn = 0; $output = '' ; $special = "-_.+@/*0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ" ; while ( $n < strlen ( $string )) { $ascii = ord( $string [ $n ]); if ( $ascii == 9 || $ascii == 10 || (32 <= $ascii && $ascii <= 126)) { $tn = 1; $n ++; } elseif (194 <= $ascii && $ascii <= 223) { $tn = 2; $n += 2; } elseif (224 <= $ascii && $ascii <= 239) { $tn = 3; $n += 3; } elseif (240 <= $ascii && $ascii <= 247) { $tn = 4; $n += 4; } elseif (248 <= $ascii && $ascii <= 251) { $tn = 5; $n += 5; } elseif ( $ascii == 252 || $ascii == 253) { $tn = 6; $n += 6; } else { $n ++; } $singleStr = substr ( $string , $bn , $tn ); $charVal = bin2hex(iconv( 'utf-8' , 'ucs-2' , $singleStr )); if ( base_convert ( $charVal , 16, 10) > 0xff) { if (! preg_match( "/win/i" , PHP_OS)) $charVal = substr ( $charVal , 2, 2) . substr ( $charVal , 0, 2); $output .= '%u' . $charVal ; } else { if (false !== strpos ( $special , $singleStr )) $output .= $singleStr ; else $output .= "%" . dechex (ord( $string [ $bn ])); } $bn = $n ; } return $output ;}
|
Nach dem Login kopieren
Nach dem Login kopieren
Nach dem Login kopieren
非常感谢你。
我现在有一个问题,就是获取了d之后,会调用一个$.TO函数,返回的结果你是怎么分析出来,是一个md5(d."vvl81")的,我只看到了定义了C="vvl",然后他手动添加了"8",最后一个"1"是怎么知道的。
主要是网站现在已经改了,在后面加个"1"再md5已经和网站的数据不一样了。
一次不好完成的curl.
截图:
1 | <?php $tb_nick_name = '1心一易' ; $cookie_file = './cookies.txt' ; $post = array ( 'nick' => escape( $tb_nick_name )); $url = 'http://www.131458.com/handler/load.aspx/Load' ;// 初始化 $curl = curl_init( $url ); $header = array (); $header [] = 'Content-Type: application/json; charset=UTF-8' ; $header [] = 'User-Agent: Mozilla/5.0 (Windows NT 6.1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/42.0.2311.90 Safari/537.36' ;curl_setopt( $curl , CURLOPT_HTTPHEADER, $header );curl_setopt( $curl , CURLOPT_HEADER, 0);curl_setopt( $curl , CURLOPT_RETURNTRANSFER, 1);curl_setopt( $curl , CURLOPT_POST, 1);curl_setopt( $curl , CURLOPT_POSTFIELDS, json_encode( $post ));curl_setopt( $curl , CURLOPT_COOKIEJAR, $cookie_file );curl_setopt( $curl , CURLOPT_FOLLOWLOCATION, 1); $response = curl_exec( $curl );curl_close( $curl ); $d = json_decode( $response )->d; $_token = md5( $d . 'vvl81' ); $url = 'http://www.131458.com/handler/TaobaoInfo.ashx?nickCode=' . escape( $tb_nick_name ) . '&token=' . $_token . '_' . $d . '&_=' . time() . rand(100, 999); $curl = curl_init( $url ); $header = array (); $header [] = 'Content-Type: application/json; charset=UTF-8' ; $header [] = 'User-Agent: Mozilla/5.0 (Windows NT 6.1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/42.0.2311.90 Safari/537.36' ;curl_setopt( $curl , CURLOPT_HTTPHEADER, $header );curl_setopt( $curl , CURLOPT_HEADER, 0);curl_setopt( $curl , CURLOPT_RETURNTRANSFER, 1);curl_setopt( $curl , CURLOPT_COOKIEFILE, $cookie_file ); $contents = curl_exec( $curl );curl_close( $curl ); echo htmlspecialchars( $contents ); function escape( $string ){ $n = $bn = $tn = 0; $output = '' ; $special = "-_.+@/*0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ" ; while ( $n < strlen ( $string )) { $ascii = ord( $string [ $n ]); if ( $ascii == 9 || $ascii == 10 || (32 <= $ascii && $ascii <= 126)) { $tn = 1; $n ++; } elseif (194 <= $ascii && $ascii <= 223) { $tn = 2; $n += 2; } elseif (224 <= $ascii && $ascii <= 239) { $tn = 3; $n += 3; } elseif (240 <= $ascii && $ascii <= 247) { $tn = 4; $n += 4; } elseif (248 <= $ascii && $ascii <= 251) { $tn = 5; $n += 5; } elseif ( $ascii == 252 || $ascii == 253) { $tn = 6; $n += 6; } else { $n ++; } $singleStr = substr ( $string , $bn , $tn ); $charVal = bin2hex(iconv( 'utf-8' , 'ucs-2' , $singleStr )); if ( base_convert ( $charVal , 16, 10) > 0xff) { if (! preg_match( "/win/i" , PHP_OS)) $charVal = substr ( $charVal , 2, 2) . substr ( $charVal , 0, 2); $output .= '%u' . $charVal ; } else { if (false !== strpos ( $special , $singleStr )) $output .= $singleStr ; else $output .= "%" . dechex (ord( $string [ $bn ])); } $bn = $n ; } return $output ;}
|
Nach dem Login kopieren
Nach dem Login kopieren
Nach dem Login kopieren
非常感谢你。
我现在有一个问题,就是获取了d之后,会调用一个$.TO函数,返回的结果你是怎么分析出来,是一个md5(d."vvl81")的,我只看到了定义了C="vvl",然后他手动添加了"8",最后一个"1"是怎么知道的。
主要是网站现在已经改了,在后面加个"1"再md5已经和网站的数据不一样了。
你可以在console里alert($.To);
return e = 2147483648 & a, f = 2147483648 & b, c = 1073741824
你看下他引用的jquery文件,在最后加了个函数。
看到这几个数字就是MD5加密了,再把格式化下,可以看出加了个1了。
要是被改了估计被发现采集了吧
你看下这网站定义的歌函数,现在变成了
_token=$.To(data.d+c+ "822")+"_"+data.d
所以还是一样的,
这里的值每天都在变,你不要写固定了
_token=$.To(data.d+c+ "822")+"_"+data.d
谢谢大家,已经写好了。 特别感谢u012771039,现在结贴