/** * 判断是否为搜索引擎蜘蛛 * * @author Eddy * @return bool */ function isCrawler() { $agent= strtolower($_SERVER['HTTP_USER_AGENT']); if (!empty($agent)) { $spiderSite= array( "TencentTraveler", "Baiduspider+", "BaiduGame", "Googlebot", "msnbot", "Sosospider+", "Sogou web spider", "ia_archiver", "Yahoo! Slurp", "YoudaoBot", "Yahoo Slurp", "MSNBot", "Java (Often spam bot)", "BaiDuSpider", "Voila", "Yandex bot", "BSpider", "twiceler", "Sogou Spider", "Speedy Spider", "Google AdSense", "Heritrix", "Python-urllib", "Alexa (IA Archiver)", "Ask", "Exabot", "Custo", "OutfoxBot/YodaoBot", "yacy", "SurveyBot", "legs", "lwp-trivial", "Nutch", "StackRambler", "The web archive (IA Archiver)", "Perl tool", "MJ12bot", "Netcraft", "MSIECrawler", "WGet tools", "larbin", "Fish search", ); foreach($spiderSite as $val) { $str = strtolower($val); if (strpos($agent, $str) !== false) { return true; } } } else { return false; } }
Vous pouvez en trouver beaucoup en ligne, mais ils sont tous copiés et copiés de la même manière, et ils ne sont pas assez complets. J'ai compilé un code plus complet ici :
function is_spider(){ $robot = 0; $USER_AGENT = strtolower($_SERVER['HTTP_USER_AGENT']); if(strpos($USER_AGENT,"bot")) $robot = 1; if(strpos($USER_AGENT,"spider")) $robot = 1; if(strpos($USER_AGENT,"slurp")) $robot = 1; if(strpos($USER_AGENT,"mediapartners-google")) $robot = 1; if(strpos($USER_AGENT,"fast-webcrawler")) $robot = 1; if(strpos($USER_AGENT,"altavista")) $robot = 1; if(strpos($USER_AGENT,"ia_archiver")) $robot = 1; if($robot == 1){ //do something } return ''; }