Home > php教程 > PHP源码 > body text

多线程 QQ 号码爬虫

PHP中文网
Release: 2016-05-25 16:58:34
Original
1339 people have browsed it

php代码

<?php
/*
Homepage: http://www.php.cn
*/
if(!extension_loaded(&#39;pthreads&#39;)) die (&#39;Please install pthreads&#39;);

include_once(&#39;Snoopy.class.php&#39;);

class CrawlerWorker extends Worker {

	protected  static $dbh;
	public function __construct() {

	}
	public function run(){
	/*
		$dbhost = &#39;db.example.com&#39;;			// 数据库服务器
	    $dbuser = &#39;example.com&#39;;        	// 数据库用户名
        $dbpw = &#39;password&#39;;             	// 数据库密码
		$dbname = &#39;example&#39;;				// 数据库名

		self::$dbh  = new PDO("mysql:host=$dbhost;port=3306;dbname=$dbname", $dbuser, $dbpw, array(
			PDO::MYSQL_ATTR_INIT_COMMAND => &#39;SET NAMES \&#39;UTF8\&#39;&#39;,
			PDO::MYSQL_ATTR_COMPRESS => true,
			PDO::ATTR_PERSISTENT => true
			)
		);
	*/
	}
	protected function getInstance(){
        return self::$dbh;
    }

}

/* the collectable class implements machinery for Pool::collect */
class Crawler extends Stackable {
	public $depth = 3;
	private static $level = 0;
	public function __construct($qq) {
		$this->qq = $qq;
	}
	public function run() {

		try {
			$dbh  = $this->worker->getInstance();
			$this->recursion(array($this->qq));
		}
		catch(PDOException $e) {
			$error = sprintf("%s,%s\n", $mobile, $id );
			file_put_contents("mobile_error.log", $error, FILE_APPEND);
		}
		//printf("runtime: %s, %s\n", date(&#39;Y-m-d H:i:s&#39;), $this->worker->getThreadId());
		//$lst = $this->qzone($this->qq);
		//print_r($lst);
	}
	public function recursion($qqs){
		
		if( self::$level <= $this->depth){
			self::$level++;
		}else if(self::$level > 0){
			self::$level--;
		}
		printf("Level: %s\n", self::$level);
		//sleep(1);
		usleep(mt_rand(10000,1000000));
		if(self::$level >= $this->depth){
			return;
		}
		
		foreach($qqs as $uin) {
			$lst = $this->qzone($uin);
			print_r($lst);
			$this->recursion($lst);
		}
	}

	public function qzone($qq){
		$url = &#39;http://m.qzone.com/mqz_get_visitor?g_tk=1191852101&res_mode=0&res_uin=&#39;.$qq.&#39;&offset=0&count=100&page=1&format=json&t=1401762986882&sid=dODKVcYv6azjN87cxXQ5mao1xgakYjHg18c8aa5e0201%3D%3D&#39;;
		$snoopy = new Snoopy;
		 
		// need an proxy?
		//$snoopy->proxy_host = "my.proxy.host";
		//$snoopy->proxy_port = "8080";
		 
		// set browser and referer:
		$snoopy->agent = "Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.1)";
		$snoopy->referer = "http://m.qzone.com/";
		 
		// set some cookies:
		//$snoopy->cookies["SessionID"] = &#39;238472834723489&#39;;
		//$snoopy->cookies["favoriteColor"] = "blue";
		 
		// set an raw-header:
		$snoopy->rawheaders["Pragma"] = "no-cache";
		 
		// set some internal variables:
		$snoopy->maxredirs = 2;
		$snoopy->offsiteok = false;
		$snoopy->expandlinks = false;
		 
		// set username and password (optional)
		//$snoopy->user = "joe";
		//$snoopy->pass = "bloe";
		 
		// fetch the text of the website www.google.com:
		if($snoopy->fetchtext($url)){ 
			// other methods: fetch, fetchform, fetchlinks, submittext and submitlinks

			// response code:
			//print "response code: ".$snoopy->response_code."<br/>\n";
		 
			// print the headers:
			//print "<b>Headers:</b><br/>";
			//while(list($key,$val) = each($snoopy->headers)){
			//	print $key.": ".$val."<br/>\n";
			//}

			// print the texts of the website:
			//print_r( json_decode($snoopy->results) );
			
			$results = array();
			$tmp = json_decode($snoopy->results);
			
			if($tmp){
				if(property_exists($tmp, &#39;data&#39;)){
					foreach( $tmp->data->list as $lst ){
						$results[] = $lst->uin;
					}
				}
			}
			return ($results);
			
		}
		else {
			print "Snoopy: error while fetching document: ".$snoopy->error."\n";
		}		
	}
}

$pool = new Pool(100, \CrawlerWorker::class, []);

#foreach (range(1000, 100000) as $number) {
#	$pool->submit(new Crawler($number));
#}

$pool->submit(new Crawler(&#39;13721218&#39;));
$pool->submit(new Crawler(&#39;291379&#39;));
//$pool->submit(new Crawler(&#39;xxx&#39;));
//$pool->submit(new Crawler(&#39;xxx&#39;));
//$pool->submit(new Crawler(&#39;xxx&#39;));
// 以此类推
//$pool->submit(new Crawler(&#39;nnn&#39;));

$pool->shutdown();
?>
Copy after login
Related labels:
source:php.cn
Statement of this Website
The content of this article is voluntarily contributed by netizens, and the copyright belongs to the original author. This site does not assume corresponding legal responsibility. If you find any content suspected of plagiarism or infringement, please contact admin@php.cn
Popular Recommendations
Popular Tutorials
More>
Latest Downloads
More>
Web Effects
Website Source Code
Website Materials
Front End Template
About us Disclaimer Sitemap
php.cn:Public welfare online PHP training,Help PHP learners grow quickly!