脚本藏身之处不过有四: 1、标签、<link>标签、<style>标签、iframe标签 2、on开头的标签属性 3、javascript(vbscript)伪协议 4、css的epression</p> <p>下面是他们的字符串规则: <br>1、<(script|link|style|iframe)(.|\n)*<\/\1>\s* <br>2、\s*on[a-z]+\s*=\s*("[^"]+"|'[^']+'|[^\s]+)\s*(?=>) <br>3、\s*(href|src)\s*=\s*("\s*(javascript|vbscript):[^"]+"|'\s*(javascript|vbscript):[^']+'|(javascript|vbscript):[^\s]+)\s*(?=>) <br>4、epression\((.|\n)*\);? <br>了解他们的规则后,抓虫行动就水到渠成。 </p><div class="code" style="position:relative; padding:0px; margin:0px;"><pre class="brush:js;toolbar:false"><textarea id="bug" cols="80" rows="16"> <button id="kick">抓虫1</button> <script> function kickBug(str) { return str.replace(/<(script|link|style|iframe)(.|\n)*\/\1>\s*/ig,""); } a { height:expression(alert('hei')); } 抓虫1 function kickBug(str) { return str.replace(/<(script|link|style|iframe)(.|\n)*\/\1>\s*/ig,""); } if(!/msie/i.test(navigator.userAgent)){ HTMLElement.prototype.__defineGetter__("innerText",function(){ return this.textContent; }); HTMLElement.prototype.__defineSetter__("innerText",function(text){ this.textContent = text; }); } document.getElementById("kick").onclick = function() { var bug = document.getElementById("bug"); bug.innerText = kickBug(bug.innerText); } 로그인 후 복사 test 抓虫2 function kickBug(str) { return str.replace(/<[a-z][^>]*\s*on[a-z]+\s*=[^>]+/ig,function($0,$1){ return $0.replace(/\s*on[a-z]+\s*=\s*("[^"]+"|'[^']+'|[^\s]+)\s*/ig,""); }); } if(!/msie/i.test(navigator.userAgent)){ HTMLElement.prototype.__defineGetter__("innerText",function(){ return this.textContent; }); HTMLElement.prototype.__defineSetter__("innerText",function(text){ this.textContent = text; }); } document.getElementById("kick").onclick = function() { var bug = document.getElementById("bug"); bug.innerText = kickBug(bug.innerText); } 로그인 후 복사 test 抓虫3 function kickBug(str) { return str.replace(/<[a-z][^>]*\s*(href|src)\s*=[^>]+/ig,function($0,$1){ $0 = $0.replace(/&#(6[5-9]|[78][0-9]|9[0789]|1[01][0-9]|12[012]);?/g,function($0,$1){return String.fromCharCode($1);}); return $0.replace(/\s*(href|src)\s*=\s*("\s*(javascript|vbscript):[^"]+"|'\s*(javascript|vbscript):[^']+'|(javascript|vbscript):[^\s]+)/ig,""); }); } if(!/msie/i.test(navigator.userAgent)){ HTMLElement.prototype.__defineGetter__("innerText",function(){ return this.textContent; }); HTMLElement.prototype.__defineSetter__("innerText",function(text){ this.textContent = text; }); } document.getElementById("kick").onclick = function() { var bug = document.getElementById("bug"); bug.innerText = kickBug(bug.innerText); } 로그인 후 복사 expression() test 抓虫4 function kickBug(str) { return str.replace(/<[a-z][^>]*\s*style\s*=[^>]+/ig,function($0,$1){ $0 = $0.replace(/&#(6[5-9]|[78][0-9]|9[0789]|1[01][0-9]|12[012]);?/g,function($0,$1){return String.fromCharCode($1);}); return $0.replace(/\s*style\s*=\s*("[^"]+(expression)[^"]+"|'[^']+\2[^']+'|[^\s]+\2[^\s]+)\s*/ig,""); }); } if(!/msie/i.test(navigator.userAgent)){ HTMLElement.prototype.__defineGetter__("innerText",function(){ return this.textContent; }); HTMLElement.prototype.__defineSetter__("innerText",function(text){ this.textContent = text; }); } document.getElementById("kick").onclick = function() { var bug = document.getElementById("bug"); bug.innerText = kickBug(bug.innerText); } 로그인 후 복사这样调用就可以 k1(k2(k3(k4(str)))) 这样就是单纯地过滤脚本而已,所谓过滤“危险脚本”应该是能够判断哪些属于“危险"脚本,不危险的就不过滤才对……那可就难办了,相当于防火墙了。