1. Intercept the php class file, which mainly includes obtaining the html content of the url and then matching
include 'StringBuilder.php'; class CutPage{ function __construct(){ } //方法一:连接 获取真个文件的文本内容 function getAllContent($url){ $resouce=fopen($url, "r") or die("文件打开失败!"); if(!$resouce){ echo "请求文件不存在!"; } //$allc//长度太短了取不全用自购建的stringbuilder $sb=new StringBuilder(); while(!feof($resouce)){ //如果没有到文件的结尾则继续向下执行 $line=fgets($resouce,4096); $sb->append($line); } fclose($resouce); return $sb->toString(); } //方法二:获取所有的文本进行文本title的匹配---------格式为:xxxx(xx)xxx的形式表达式 function matchContentTitle($content,$regex_title){ //echo "regex:".$regex_title; if(preg_match($regex_title, $content)){ $array=preg_split($regex_title, $content,-1,PREG_SPLIT_DELIM_CAPTURE); return $array[1]; }else{ echo "匹配失败!"; } } //匹配章节 返回携带章节的array function matchContentChapter($content,$regex_chapter){ if(preg_match_all($regex_chapter, $content,$matcher)){ return $matcher[1]; }else{ echo "匹配失败!"; return ; } } }