PHP采集类-php教程-PHP中文網

首頁

後端開發

php教程

PHP采集类

WBOYWBOYWBOYWBOYWBOYWBOYWBOYWBOYWBOYWBOYWBOYWBOYWB

Jun 23, 2016 pm 02:34 PM

PHP代码

URL ); return $pageContent; } /** * 根据标记得到列表段 * @param $content 页面源数据 * @return String 列表段内容 */ function getContentPiece ( $content ) { $content = $this->getContent( $content, $this->startFlag, $this->endFlag ); if(!$content) $content=$this->cut ($content, $this->startFlag, $this->endFlag ); return $content; } /** * 得到一个字符串中的某一部分 * @param $sourceStr 源数据 * @param $startStr 分离部分的开始标记 * @param $endStart 分离部分的结束标记 * @return boolean 操作成功返回true */ function getContent ( $sourceStr, $startStr, $endStart ) { $s = preg_quote( decode( $startStr ) ); $e = preg_quote( decode( $endStart ) ); $s = str_replace( " ", "[[:space:]]", $s ); $e = str_replace( " ", "[[:space:]]", $e ); $s = str_replace( "\r\n", "[[:cntrl:]]", $s ); $e = str_replace( "\r\n", "[[:cntrl:]]", $e ); preg_match_all( "@" . $s . "(.*?)". $e ."@is", $sourceStr, $tpl ); $content = $tpl[1]; $content = implode( "", $content ); return $content; } function cut ( $sourceStr, $startStr, $endStr ) { return cut( $sourceStr ,decode( $startStr ) ,decode( $endStr) ); } /** * 得到只含有连接和内容的列表数组 * @param $sList 页面列表源数据 * @return array 列表段内容 */ function getSourceList ( $sList ) { preg_match_all( "//i", $sList, $list ); $list = $list[0]; //foreach($list as $l) echo $l; if(!$list || !is_array($list)){ return $this->getSourceListExtend($sList); }else{ return $this->getList ( $list ); } } function getSourceListExtend($sList) { $content=explode("",$sList); for($i=0;$iGetListExtend( $list ); } /** * 得到列表内容 * @param $list 列表段内容 * @return array 含有标题和连接的数组 */ function getList ( $list ) { for ( $i = 0; $i (.*?)/i", $list[$i], $templ ); //content preg_match_all( "/href=(\"|'|)(.*?)(\"|'|)/i", $list[$i], $tempc ); //获取的数据正确 if( !emptyempty( $templ[1][0] ) && !emptyempty( $tempc[2][0] ) ) { if( 0 == strpos( $tempc[2][0], "/" ) ) { preg_match( "@http://(.*?)/@i", $this->URL, $url ); $tempc[2][0] = substr( $url[0], 0, strlen( $url[0] ) - 1 ) . $tempc[2][0]; } $listContent[$i][0] = $templ[1][0]; $listContent[$i][1] = $tempc[2][0]; } } if(!$listContent || !is_array($listContent)){ return $this->GetListExtend ( $list ); }else{ return $listContent; } } function GetListExtend ( $list ) { $list=str_replace("\"","",$list); $list=str_replace("'","",$list); $list=str_replace("=","",$list); for ( $i = 0; $i cut($list[$i],"href"," "); echo $temp_link."
"; //title if(eregi(">",$list[$i])){ $temp_title=substr(strrchr($list[$i], ">"), 1 ); $temp_title=preg_replace( "@\@is","",$temp_title); $temp_title=str_replace( ">","",$temp_title); $temp_title=str_replace( "@is","",$temp_title); $temp_title=str_replace( ">","",$temp_title); $temp_title=str_replace( ""; }else{ $temp_title=$list[$i]; $temp_title=preg_replace( "@\@is","",$temp_title); $temp_title=str_replace( ">","",$temp_title); $temp_title=str_replace( ""; } //获取的数据正确 if( !emptyempty( $temp_link ) && !emptyempty( $temp_title) ) { if( 0 == strpos( $tempc[2][0], "/" ) ) { preg_match( "@http://(.*?)/@i", $this->URL, $url ); $temp_link = substr( $url[0], 0, strlen( $url[0] ) - 1 ) . $temp_link; } $listContent[$i][0] = trim($temp_title); $listContent[$i][1] = $temp_link; } } return $listContent; } /** * 得到正文中的图片路径信息 * @param $content 正文信息 * @return array 信息中图片路径的数组 */ function getImageList ( $content ) { preg_match_all( "/src=(\"|')(.*?)(\"|')/i", $content, $temp ); $imageList = $temp[2]; return array_unique($imageList); } /** * 下载图片时将页面中的路径替换成新的路径 * @param $content 需要替换路径的页面内容 * @return String 替换后的页面内容 */ function replaceImageParh ( $content ) { for ( $i = 0; $i ImageList ); $i++ ) { if($this->FileName[$i]){ $content = str_replace( $this->ImageList[$i], $this->imageURL.$this->FileName[$i], $content ); }else{ //$s=" /src=(\\\"|')".preg_quote($this->ImageList[$i])."(\\\"|')/i"; $content = str_replace($this->ImageList[$i], $GLOBALS[SET][webpath]."images/nopic.gif", $content ); } } return $content; } /** * 下载图片时读取图片文件后存储在相应路径 * @param $imageURL 需要读取的图片文件 * @return boolean 操作成功返回true */ function saveImage ( $imageURL ) { for ( $i = 0; $i saveFile( $imageURL[$i] ); if( !emptyempty( $fName ) ) { $filename[$i] = $fName; } } return $filename; } function saveFile( $fileName ) { $s_filename = basename( $fileName ); $ext_name = strtolower( strrchr( $s_filename, "." ) ); if( ( ".jpg" && ".gif" && ".swf" ) != strtolower( $ext_name ) ) { return ""; } if( 0 == strpos( $fileName, "/" ) ) { preg_match( "@http://(.*?)/@i", $this->URL, $url ); $url = $url[0]; } if( 0 == strpos( $fileName, "." ) ) { $url = substr( $this->URL, 0, strrpos( $fileName, "/" ) ); } $contents = @file_get_contents( $url . $fileName ); $s_filename = time(). rand( 1000, 9999 ) . $ext_name; //file_put_contents( $this->saveImagePath.$s_filename, $contents ); $handle = @fopen ( $this->saveImagePath.$s_filename, "w" ); @fwrite( $handle, $contents ); @fclose($handle); if(filesize($this->saveImagePath.$s_filename)>3072){ return $s_filename; }else{ @unlink($this->saveImagePath.$s_filename); return ""; } } /** * 不下载图片则格式化其路径为绝对路径 * 不能格式化变态路径 Eg: ./../ or /./../ 一类的不过不影响结果 * @param $imageURL 需要读取的图片文件 * @return $filename 返回格式化的图片路径 */ function ToPath($imageURL) { $PathArray=parse_url($this->URL); $webpath=$PathArray[scheme]."://".$PathArray[host] ; $filepath=$PathArray[path] ; for ( $i = 0; $i ImageList ); $i++ ) { $content = str_replace( $this->ImageList[$i], $this->FileName[$i], $content ); } return $content; } function setURL ( $u ) { $this->URL = $u; return true; } function setStartFlag ( $s ) { $this->startFlag = $s; return true; } function setEndFlag ( $e ) { $this->endFlag = $e; return true; } function setSaveImagePath ( $p ) { $this->saveImagePath = $p; return true; } function setImageURL ( $i ) { $this->imageURL = $i; return true; } } ?>

本網站聲明

本文內容由網友自願投稿，版權歸原作者所有。本站不承擔相應的法律責任。如發現涉嫌抄襲或侵權的內容，請聯絡admin@php.cn

熱AI工具

Undresser.AI Undress

人工智慧驅動的應用程序，用於創建逼真的裸體照片

AI Clothes Remover

用於從照片中去除衣服的線上人工智慧工具。

Undress AI Tool

免費脫衣圖片

Clothoff.io

AI脫衣器

AI Hentai Generator

免費產生 AI 無盡。

熱工具

記事本++7.3.1

好用且免費的程式碼編輯器

SublimeText3漢化版

中文版，非常好用

禪工作室 13.0.1

強大的PHP整合開發環境

Dreamweaver CS6

視覺化網頁開發工具

SublimeText3 Mac版

神級程式碼編輯軟體(SublimeText3)

熱門話題

gmail信箱登陸入口在哪裡

7454

CakePHP 教程

1374

steam的賬戶名稱是什麼格式

win11激活密鑰永久

NYT連接提示和答案

Related knowledge

PHP記錄：PHP日誌分析的最佳實踐 Mar 10, 2025 pm 02:32 PM

PHP日誌記錄對於監視和調試Web應用程序以及捕獲關鍵事件，錯誤和運行時行為至關重要。它為系統性能提供了寶貴的見解，有助於識別問題並支持更快的故障排除

在Laravel中使用Flash會話數據 Mar 12, 2025 pm 05:08 PM

Laravel使用其直觀的閃存方法簡化了處理臨時會話數據。這非常適合在您的應用程序中顯示簡短的消息，警報或通知。默認情況下，數據僅針對後續請求： $請求 -

php中的捲曲：如何在REST API中使用PHP捲曲擴展 Mar 14, 2025 am 11:42 AM

PHP客戶端URL（curl）擴展是開發人員的強大工具，可以與遠程服務器和REST API無縫交互。通過利用Libcurl（備受尊敬的多協議文件傳輸庫），PHP curl促進了有效的執行

簡化的HTTP響應在Laravel測試中模擬了 Mar 12, 2025 pm 05:09 PM

Laravel 提供简洁的 HTTP 响应模拟语法，简化了 HTTP 交互测试。这种方法显著减少了代码冗余，同时使您的测试模拟更直观。基本实现提供了多种响应类型快捷方式： use Illuminate\Support\Facades\Http; Http::fake([ 'google.com' => 'Hello World', 'github.com' => ['foo' => 'bar'], 'forge.laravel.com' =>