PHP采集类
PHP代码
URL ); return $pageContent; } /** * 根据标记得到列表段 * @param $content 页面源数据 * @return String 列表段内容 */ function getContentPiece ( $content ) { $content = $this->getContent( $content, $this->startFlag, $this->endFlag ); if(!$content) $content=$this->cut ($content, $this->startFlag, $this->endFlag ); return $content; } /** * 得到一个字符串中的某一部分 * @param $sourceStr 源数据 * @param $startStr 分离部分的开始标记 * @param $endStart 分离部分的结束标记 * @return boolean 操作成功返回true */ function getContent ( $sourceStr, $startStr, $endStart ) { $s = preg_quote( decode( $startStr ) ); $e = preg_quote( decode( $endStart ) ); $s = str_replace( " ", "[[:space:]]", $s ); $e = str_replace( " ", "[[:space:]]", $e ); $s = str_replace( "\r\n", "[[:cntrl:]]", $s ); $e = str_replace( "\r\n", "[[:cntrl:]]", $e ); preg_match_all( "@" . $s . "(.*?)". $e ."@is", $sourceStr, $tpl ); $content = $tpl[1]; $content = implode( "", $content ); return $content; } function cut ( $sourceStr, $startStr, $endStr ) { return cut( $sourceStr ,decode( $startStr ) ,decode( $endStr) ); } /** * 得到只含有连接和内容的列表数组 * @param $sList 页面列表源数据 * @return array 列表段内容 */ function getSourceList ( $sList ) { preg_match_all( "//i", $sList, $list ); $list = $list[0]; //foreach($list as $l) echo $l; if(!$list || !is_array($list)){ return $this->getSourceListExtend($sList); }else{ return $this->getList ( $list ); } } function getSourceListExtend($sList) { $content=explode("",$sList); for($i=0;$i"; //title if(eregi(">",$list[$i])){ $temp_title=substr(strrchr($list[$i], ">"), 1 ); $temp_title=preg_replace( "@\@is","",$temp_title); $temp_title=str_replace( ">","",$temp_title); $temp_title=str_replace( "@is","",$temp_title); $temp_title=str_replace( ">","",$temp_title); $temp_title=str_replace( ""; }else{ $temp_title=$list[$i]; $temp_title=preg_replace( "@\@is","",$temp_title); $temp_title=str_replace( ">","",$temp_title); $temp_title=str_replace( ""; } //获取的数据正确 if( !emptyempty( $temp_link ) && !emptyempty( $temp_title) ) { if( 0 == strpos( $tempc[2][0], "/" ) ) { preg_match( "@http://(.*?)/@i", $this->URL, $url ); $temp_link = substr( $url[0], 0, strlen( $url[0] ) - 1 ) . $temp_link; } $listContent[$i][0] = trim($temp_title); $listContent[$i][1] = $temp_link; } } return $listContent; } /** * 得到正文中的图片路径信息 * @param $content 正文信息 * @return array 信息中图片路径的数组 */ function getImageList ( $content ) { preg_match_all( "/src=(\"|')(.*?)(\"|')/i", $content, $temp ); $imageList = $temp[2]; return array_unique($imageList); } /** * 下载图片时将页面中的路径替换成新的路径 * @param $content 需要替换路径的页面内容 * @return String 替换后的页面内容 */ function replaceImageParh ( $content ) { for ( $i = 0; $i ImageList ); $i++ ) { if($this->FileName[$i]){ $content = str_replace( $this->ImageList[$i], $this->imageURL.$this->FileName[$i], $content ); }else{ //$s=" /src=(\\\"|')".preg_quote($this->ImageList[$i])."(\\\"|')/i"; $content = str_replace($this->ImageList[$i], $GLOBALS[SET][webpath]."images/nopic.gif", $content ); } } return $content; } /** * 下载图片时读取图片文件后存储在相应路径 * @param $imageURL 需要读取的图片文件 * @return boolean 操作成功返回true */ function saveImage ( $imageURL ) { for ( $i = 0; $i saveFile( $imageURL[$i] ); if( !emptyempty( $fName ) ) { $filename[$i] = $fName; } } return $filename; } function saveFile( $fileName ) { $s_filename = basename( $fileName ); $ext_name = strtolower( strrchr( $s_filename, "." ) ); if( ( ".jpg" && ".gif" && ".swf" ) != strtolower( $ext_name ) ) { return ""; } if( 0 == strpos( $fileName, "/" ) ) { preg_match( "@http://(.*?)/@i", $this->URL, $url ); $url = $url[0]; } if( 0 == strpos( $fileName, "." ) ) { $url = substr( $this->URL, 0, strrpos( $fileName, "/" ) ); } $contents = @file_get_contents( $url . $fileName ); $s_filename = time(). rand( 1000, 9999 ) . $ext_name; //file_put_contents( $this->saveImagePath.$s_filename, $contents ); $handle = @fopen ( $this->saveImagePath.$s_filename, "w" ); @fwrite( $handle, $contents ); @fclose($handle); if(filesize($this->saveImagePath.$s_filename)>3072){ return $s_filename; }else{ @unlink($this->saveImagePath.$s_filename); return ""; } } /** * 不下载图片则格式化其路径为绝对路径 * 不能格式化变态路径 Eg: ./../ or /./../ 一类的 不过不影响结果 * @param $imageURL 需要读取的图片文件 * @return $filename 返回格式化的图片路径 */ function ToPath($imageURL) { $PathArray=parse_url($this->URL); $webpath=$PathArray[scheme]."://".$PathArray[host] ; $filepath=$PathArray[path] ; for ( $i = 0; $i ImageList ); $i++ ) { $content = str_replace( $this->ImageList[$i], $this->FileName[$i], $content ); } return $content; } function setURL ( $u ) { $this->URL = $u; return true; } function setStartFlag ( $s ) { $this->startFlag = $s; return true; } function setEndFlag ( $e ) { $this->endFlag = $e; return true; } function setSaveImagePath ( $p ) { $this->saveImagePath = $p; return true; } function setImageURL ( $i ) { $this->imageURL = $i; return true; } } ?>

Hot AI Tools

Undresser.AI Undress
AI-powered app for creating realistic nude photos

AI Clothes Remover
Online AI tool for removing clothes from photos.

Undress AI Tool
Undress images for free

Clothoff.io
AI clothes remover

AI Hentai Generator
Generate AI Hentai for free.

Hot Article

Hot Tools

Notepad++7.3.1
Easy-to-use and free code editor

SublimeText3 Chinese version
Chinese version, very easy to use

Zend Studio 13.0.1
Powerful PHP integrated development environment

Dreamweaver CS6
Visual web development tools

SublimeText3 Mac version
God-level code editing software (SublimeText3)

Hot Topics

Laravel simplifies handling temporary session data using its intuitive flash methods. This is perfect for displaying brief messages, alerts, or notifications within your application. Data persists only for the subsequent request by default: $request-

This is the second and final part of the series on building a React application with a Laravel back-end. In the first part of the series, we created a RESTful API using Laravel for a basic product-listing application. In this tutorial, we will be dev

The PHP Client URL (cURL) extension is a powerful tool for developers, enabling seamless interaction with remote servers and REST APIs. By leveraging libcurl, a well-respected multi-protocol file transfer library, PHP cURL facilitates efficient execution of various network protocols, including HTTP, HTTPS, and FTP. This extension offers granular control over HTTP requests, supports multiple concurrent operations, and provides built-in security features.

Laravel provides concise HTTP response simulation syntax, simplifying HTTP interaction testing. This approach significantly reduces code redundancy while making your test simulation more intuitive. The basic implementation provides a variety of response type shortcuts: use Illuminate\Support\Facades\Http; Http::fake([ 'google.com' => 'Hello World', 'github.com' => ['foo' => 'bar'], 'forge.laravel.com' =>

Do you want to provide real-time, instant solutions to your customers' most pressing problems? Live chat lets you have real-time conversations with customers and resolve their problems instantly. It allows you to provide faster service to your custom

In this article, we're going to explore the notification system in the Laravel web framework. The notification system in Laravel allows you to send notifications to users over different channels. Today, we'll discuss how you can send notifications ov

Article discusses late static binding (LSB) in PHP, introduced in PHP 5.3, allowing runtime resolution of static method calls for more flexible inheritance.Main issue: LSB vs. traditional polymorphism; LSB's practical applications and potential perfo

PHP logging is essential for monitoring and debugging web applications, as well as capturing critical events, errors, and runtime behavior. It provides valuable insights into system performance, helps identify issues, and supports faster troubleshoot
