function get_dm_weight($h, $i)
{
//$h represents the url of the search results, $i represents the result ranking
$p = ceil($i / 10); //What page of the search results
$i_weight = array(1 => ; 52, 2 => 15, 3 => 10, 4 => 5, 5 => 5, 6 => 4, 7 => 1, 8 => 3, 9 => 2 , 0 => 3);
$h_str = preg_replace("/^.*?://(.*?)(#.*)?$/i", "$1$3", $h); //Remove the protocol (such as http) and #information fragment part in the url
$h_arr = explode("?", $h_str, 2);
$h0_arr = explode("/", $h_arr[0 ]);
if ($h0_arr[count($h0_arr) - 1] == '')
{
$level_l = count($h0_arr) - 1;
}
else
{
$level_l = count($h0_arr);
}
if (isset($h_arr[1]))
{
if ($h_arr[1] != '')
{
$level_r = count(explode("&", $h_arr[1]));
}
else
{
$level_r = 0;
}
}
else
{
$level_r = 0;
}
$dw = $i_weight[$i % 10] * pow(0.5, ($level_l + $level_r - 1)) * 9 / pow(10, $p);
return number_format($dw, 1);;
}
function fetch_baidu($d, $k)
{
$urlw = urlencode(iconv("utf-8","gbk//ignore",$k));
$max_srh_page = 2; //百度搜索结果50条/页,提取2页,也就是只在前100条搜索结果中检查排名,最大值可以设为16
$baidu_ids = array(); //存储百度系列子站点占据的排名位置
$baidu_mus = array(); //存储百度开放平台等优质站点占据的排名位置
$isrank = 0; //$isrank = 1 当前域名下这个关键词获得排名; $isrank = 0 当前域名下这个关键词没有排名
$all_count = 0; //测试变量,以确认匹配规则不会遗漏任何一条搜索结果
$dm_weight = 0; //分析搜索结果页面中顶级、次级、目录、内页的情况,粗略反映一个关键字的竞争激烈程度,非常不准,仅供参考
for ($page_no = 1; $page_no <= $max_srh_page; $page_no++)
{
if ($page_no > 16) break;
$fail_try = 1;
$pn = ($page_no - 1) * 50;
$url = "http://www.baidu.com/s?wd={$urlw}&pn={$pn}&rn=50";
$snoopy = new snoopy;
// $snoopy->proxy_host = "127.0.0.1"; //采集可选代理ip,以免频繁抓百度反被百度咬
// $snoopy->proxy_port = "80"; //proxy代理所用端口
$snoopy->fetch($url);
$contents = iconv("gbk","utf-8//ignore",$snoopy->results);
unset($snoopy);
// echo $contents;
if (!preg_match("/此内容系百度根据您的指令自动搜索的结果/i",$contents,$out))
{
if ($fail_try > 5)
{
continue;
}
else
{
$fail_try++;
$page_no--;
sleep(30);
continue;
}
}
if (!isset($ebaidu))
{
$ebaidu = array('lt' => 0, 'lb' => 0, 'r' => 0); //记录百度推广数量,分为左上、左下、右侧
if (preg_match_all("/ {
$ebaidu['lt'] = count($out_lt[0]);
}
if (preg_match_all("/