这种防爬虫机制怎么过?

WBOY
Libérer: 2016-06-06 20:10:52
original
1968 Les gens l'ont consulté

这个网站freeget.co/ 带了防爬机制,但不知道这是何种防爬这么厉害的。

不带header头返回的是

<code>

<meta name="robots" content="noindex,nofollow">
<script>
(function() {  function getSessionCookies() {   cookieArray = new Array();   var cName = /^\s?incap_ses_/;   var c = document.cookie.split(";");   for (var i = 0; i < c.length; i++) {    key = c[i].substr(0, c[i].indexOf("="));    value = c[i].substr(c[i].indexOf("=") + 1, c[i].length);    if (cName.test(key)) {     cookieArray[cookieArray.length] = value    }   }   return cookieArray  }  function setIncapCookie(vArray) {   try {    cookies = getSessionCookies();    digests = new Array(cookies.length);    for (var i = 0; i < cookies.length; i++) {     digests[i] = simpleDigest((vArray) + cookies[i])    }    res = vArray + ",digest=" + (digests.join())   } catch (e) {    res = vArray + ",digest=" + (encodeURIComponent(e.toString()))   }   createCookie("___utmvc", res, 20)  }  function simpleDigest(mystr) {   var res = 0;   for (var i = 0; i < mystr.length; i++) {    res += mystr.charCodeAt(i)   }   return res  }  function createCookie(name, value, seconds) {   if (seconds) {    var date = new Date();    date.setTime(date.getTime() + (seconds * 1000));    var expires = "; expires=" + date.toGMTString()   } else {    var expires = ""   }   document.cookie = name + "=" + value + expires + "; path=/"  }  function test(o) {   var res = "";   var vArray = new Array();   for (var j = 0; j < o.length; j++) {    var test = o[j][0]    switch (o[j][1]) {    case "exists":     try {      if(typeof(eval(test)) != "undefined"){         vArray[vArray.length] = encodeURIComponent(test + "=true")      }      else{         vArray[vArray.length] = encodeURIComponent(test + "=false")      }     } catch (e) {      vArray[vArray.length] = encodeURIComponent(test + "=false")     }     break;    case "value":     try {      vArray[vArray.length] = encodeURIComponent(test + "=" + eval(test).toString())     } catch (e) {      vArray[vArray.length] = encodeURIComponent(test + "=" + e)     }     break;     case "value_or_nil":         try{             if(typeof(eval(test)) != "undefined"){                 vArray[vArray.length] = encodeURIComponent(test + "=" + eval(test).toString())             }             else{                 vArray[vArray.length] = encodeURIComponent(test + "=nil")             }                     }         catch(e){             vArray[vArray.length] = encodeURIComponent(test + ":" + test_type + "=" + e)         }         break;     case "plugin_extentions":         try {             extentions = []             try {                 i = extentions.indexOf("i")             } catch (e) {                 vArray[vArray.length] = encodeURIComponent("plugin_ext=indexOf is not a function")                 break;             }                         for (var i=0;i<navigator.plugins.length;i++) {                 if (typeof(navigator.plugins[i]) == "undefined") {                     vArray[vArray.length] = encodeURIComponent("plugin_ext=plugins[i] is undefined");                     break;                 }                                 filename = navigator.plugins[i].filename                 if (typeof(filename) == "undefined") {                     vArray[vArray.length] = encodeURIComponent("plugin_ext=filename is undefined");                     break;                 }                 if (filename.split(".").length == 2) {                     ext = filename.split(".")[1]                     if (extentions.indexOf(ext) < 0) {                         extentions.push(ext)                     }                                 }                 }                          for    (i = 0; i < extentions.length; i++) {                 vArray[vArray.length] = encodeURIComponent("plugin_ext=" + extentions[i]);             }         } catch (e) {          vArray[vArray.length] = encodeURIComponent("plugin_ext=" + e)         }         break;     case "plugins":     try{         p=navigator.plugins         pres=""         for (a in p){pres+=(p[a]['description']+" ").substring(0,20)}         vArray[vArray.length] = encodeURIComponent("plugins=" + pres);         }     catch(e){         vArray[vArray.length] = encodeURIComponent("plugins=" +e);         }     break;      case "plugin":     try {      a = navigator.plugins;      for (i in a) {       f = a[i]["filename"].split(".");       if (f.length == 2) {        vArray[vArray.length] = encodeURIComponent("plugin=" + f[1]);        break       }      }     } catch (e) {      vArray[vArray.length] = encodeURIComponent("plugin=" + e)     }     break    }   }   vArray = vArray.join();   return vArray  }  var o = [   ["navigator", "exists"],   ["navigator.vendor", "value"],   ["navigator.vendor", "value_or_nil"],   ["opera", "exists"],   ["ActiveXObject", "exists"],   ["navigator.appName", "value"],   ["navigator.appName", "value_or_nil"],   ["platform", "plugin"],   ["platform", "plugin_extentions"],   ["webkitURL", "exists"],   ["navigator.plugins.length==0", "value"],   ["navigator.plugins.length==0", "value_or_nil"],   ["_phantom", "exists"] ];  try {   setIncapCookie(test(o));   document.createElement("img").src = "/_Incapsula_Resource?SWKMTFSR=1&e=" + Math.random()  } catch (e) {   img = document.createElement("img");   img.src = "/_Incapsula_Resource?SWKMTFSR=1&e=" + e  } })();
</script>
<script>
(function() { 
var z="";var bfor (var i=0;i<b.length;i+=2){z=z+parseInt(b.substring(i, i+2), 16)+",";}z = z.substring(0,z.length-1); eval(eval('String.fromCharCode('+z+')'));})();
</script>

<iframe style="display:none;visibility:hidden;" src="//content.incapsula.com/jsTest.html" id="gaIframe"></iframe>
</code>
Copier après la connexion
Copier après la connexion

还有下面的的带了header头的

<code>
    
    
        <meta name="ROBOTS" content="NOINDEX, NOFOLLOW">
        <meta name="format-detection" content="telephone=no">
        <meta name="viewport" content="initial-scale=1.0">
        <meta http-equiv="X-UA-Compatible" content="IE=edge,chrome=1">
    
    
    
        <iframe src="/_Incapsula_Resource?CWUDNSAI=9&xinfo=6-56800294-0%200NNN%20RT(1460895856602%200)%20q(0%20-1%20-1%20-1)%20r(0%20-1)%20B12(4,315,0)%20U10000&incident_id=406000510241030862-742862014603723574&edet=12&cinfo=04000000" frameborder="0" width="100%" height="100%" marginheight="0px" marginwidth="0px">
            Request unsuccessful. Incapsula incident ID: 406000510241030862-742862014603723574
        </iframe>
    

</code>
Copier après la connexion
Copier après la connexion

回复内容:

这个网站freeget.co/ 带了防爬机制,但不知道这是何种防爬这么厉害的。

不带header头返回的是

<code>

<meta name="robots" content="noindex,nofollow">
<script>
(function() {  function getSessionCookies() {   cookieArray = new Array();   var cName = /^\s?incap_ses_/;   var c = document.cookie.split(";");   for (var i = 0; i < c.length; i++) {    key = c[i].substr(0, c[i].indexOf("="));    value = c[i].substr(c[i].indexOf("=") + 1, c[i].length);    if (cName.test(key)) {     cookieArray[cookieArray.length] = value    }   }   return cookieArray  }  function setIncapCookie(vArray) {   try {    cookies = getSessionCookies();    digests = new Array(cookies.length);    for (var i = 0; i < cookies.length; i++) {     digests[i] = simpleDigest((vArray) + cookies[i])    }    res = vArray + ",digest=" + (digests.join())   } catch (e) {    res = vArray + ",digest=" + (encodeURIComponent(e.toString()))   }   createCookie("___utmvc", res, 20)  }  function simpleDigest(mystr) {   var res = 0;   for (var i = 0; i < mystr.length; i++) {    res += mystr.charCodeAt(i)   }   return res  }  function createCookie(name, value, seconds) {   if (seconds) {    var date = new Date();    date.setTime(date.getTime() + (seconds * 1000));    var expires = "; expires=" + date.toGMTString()   } else {    var expires = ""   }   document.cookie = name + "=" + value + expires + "; path=/"  }  function test(o) {   var res = "";   var vArray = new Array();   for (var j = 0; j < o.length; j++) {    var test = o[j][0]    switch (o[j][1]) {    case "exists":     try {      if(typeof(eval(test)) != "undefined"){         vArray[vArray.length] = encodeURIComponent(test + "=true")      }      else{         vArray[vArray.length] = encodeURIComponent(test + "=false")      }     } catch (e) {      vArray[vArray.length] = encodeURIComponent(test + "=false")     }     break;    case "value":     try {      vArray[vArray.length] = encodeURIComponent(test + "=" + eval(test).toString())     } catch (e) {      vArray[vArray.length] = encodeURIComponent(test + "=" + e)     }     break;     case "value_or_nil":         try{             if(typeof(eval(test)) != "undefined"){                 vArray[vArray.length] = encodeURIComponent(test + "=" + eval(test).toString())             }             else{                 vArray[vArray.length] = encodeURIComponent(test + "=nil")             }                     }         catch(e){             vArray[vArray.length] = encodeURIComponent(test + ":" + test_type + "=" + e)         }         break;     case "plugin_extentions":         try {             extentions = []             try {                 i = extentions.indexOf("i")             } catch (e) {                 vArray[vArray.length] = encodeURIComponent("plugin_ext=indexOf is not a function")                 break;             }                         for (var i=0;i<navigator.plugins.length;i++) {                 if (typeof(navigator.plugins[i]) == "undefined") {                     vArray[vArray.length] = encodeURIComponent("plugin_ext=plugins[i] is undefined");                     break;                 }                                 filename = navigator.plugins[i].filename                 if (typeof(filename) == "undefined") {                     vArray[vArray.length] = encodeURIComponent("plugin_ext=filename is undefined");                     break;                 }                 if (filename.split(".").length == 2) {                     ext = filename.split(".")[1]                     if (extentions.indexOf(ext) < 0) {                         extentions.push(ext)                     }                                 }                 }                          for    (i = 0; i < extentions.length; i++) {                 vArray[vArray.length] = encodeURIComponent("plugin_ext=" + extentions[i]);             }         } catch (e) {          vArray[vArray.length] = encodeURIComponent("plugin_ext=" + e)         }         break;     case "plugins":     try{         p=navigator.plugins         pres=""         for (a in p){pres+=(p[a]['description']+" ").substring(0,20)}         vArray[vArray.length] = encodeURIComponent("plugins=" + pres);         }     catch(e){         vArray[vArray.length] = encodeURIComponent("plugins=" +e);         }     break;      case "plugin":     try {      a = navigator.plugins;      for (i in a) {       f = a[i]["filename"].split(".");       if (f.length == 2) {        vArray[vArray.length] = encodeURIComponent("plugin=" + f[1]);        break       }      }     } catch (e) {      vArray[vArray.length] = encodeURIComponent("plugin=" + e)     }     break    }   }   vArray = vArray.join();   return vArray  }  var o = [   ["navigator", "exists"],   ["navigator.vendor", "value"],   ["navigator.vendor", "value_or_nil"],   ["opera", "exists"],   ["ActiveXObject", "exists"],   ["navigator.appName", "value"],   ["navigator.appName", "value_or_nil"],   ["platform", "plugin"],   ["platform", "plugin_extentions"],   ["webkitURL", "exists"],   ["navigator.plugins.length==0", "value"],   ["navigator.plugins.length==0", "value_or_nil"],   ["_phantom", "exists"] ];  try {   setIncapCookie(test(o));   document.createElement("img").src = "/_Incapsula_Resource?SWKMTFSR=1&e=" + Math.random()  } catch (e) {   img = document.createElement("img");   img.src = "/_Incapsula_Resource?SWKMTFSR=1&e=" + e  } })();
</script>
<script>
(function() { 
var z="";var bfor (var i=0;i<b.length;i+=2){z=z+parseInt(b.substring(i, i+2), 16)+",";}z = z.substring(0,z.length-1); eval(eval('String.fromCharCode('+z+')'));})();
</script>

<iframe style="display:none;visibility:hidden;" src="//content.incapsula.com/jsTest.html" id="gaIframe"></iframe>
</code>
Copier après la connexion
Copier après la connexion

还有下面的的带了header头的

<code>
    
    
        <meta name="ROBOTS" content="NOINDEX, NOFOLLOW">
        <meta name="format-detection" content="telephone=no">
        <meta name="viewport" content="initial-scale=1.0">
        <meta http-equiv="X-UA-Compatible" content="IE=edge,chrome=1">
    
    
    
        <iframe src="/_Incapsula_Resource?CWUDNSAI=9&xinfo=6-56800294-0%200NNN%20RT(1460895856602%200)%20q(0%20-1%20-1%20-1)%20r(0%20-1)%20B12(4,315,0)%20U10000&incident_id=406000510241030862-742862014603723574&edet=12&cinfo=04000000" frameborder="0" width="100%" height="100%" marginheight="0px" marginwidth="0px">
            Request unsuccessful. Incapsula incident ID: 406000510241030862-742862014603723574
        </iframe>
    

</code>
Copier après la connexion
Copier après la connexion

如果你搜搜Incapsula,你就会知道答案。

和CloudFlare一样,对访客进行分析,判断好坏。

如果你想知道怎么过,那么最简单的方法就是模拟一次真实访问。

一样的IP一样的UA以及自己先访问一次获取到所有COOKIE并使用在爬虫上。

Étiquettes associées:
php
source:php.cn
Déclaration de ce site Web
Le contenu de cet article est volontairement contribué par les internautes et les droits d'auteur appartiennent à l'auteur original. Ce site n'assume aucune responsabilité légale correspondante. Si vous trouvez un contenu suspecté de plagiat ou de contrefaçon, veuillez contacter admin@php.cn
Tutoriels populaires
Plus>
Derniers téléchargements
Plus>
effets Web
Code source du site Web
Matériel du site Web
Modèle frontal