Heim > php教程 > PHP源码 > Hauptteil

多线程 QQ 号码爬虫

PHP中文网
Freigeben: 2016-05-25 16:58:34
Original
1386 Leute haben es durchsucht

php代码

<?php
/*
Homepage: http://www.php.cn
*/
if(!extension_loaded(&#39;pthreads&#39;)) die (&#39;Please install pthreads&#39;);

include_once(&#39;Snoopy.class.php&#39;);

class CrawlerWorker extends Worker {

	protected  static $dbh;
	public function __construct() {

	}
	public function run(){
	/*
		$dbhost = &#39;db.example.com&#39;;			// 数据库服务器
	    $dbuser = &#39;example.com&#39;;        	// 数据库用户名
        $dbpw = &#39;password&#39;;             	// 数据库密码
		$dbname = &#39;example&#39;;				// 数据库名

		self::$dbh  = new PDO("mysql:host=$dbhost;port=3306;dbname=$dbname", $dbuser, $dbpw, array(
			PDO::MYSQL_ATTR_INIT_COMMAND => &#39;SET NAMES \&#39;UTF8\&#39;&#39;,
			PDO::MYSQL_ATTR_COMPRESS => true,
			PDO::ATTR_PERSISTENT => true
			)
		);
	*/
	}
	protected function getInstance(){
        return self::$dbh;
    }

}

/* the collectable class implements machinery for Pool::collect */
class Crawler extends Stackable {
	public $depth = 3;
	private static $level = 0;
	public function __construct($qq) {
		$this->qq = $qq;
	}
	public function run() {

		try {
			$dbh  = $this->worker->getInstance();
			$this->recursion(array($this->qq));
		}
		catch(PDOException $e) {
			$error = sprintf("%s,%s\n", $mobile, $id );
			file_put_contents("mobile_error.log", $error, FILE_APPEND);
		}
		//printf("runtime: %s, %s\n", date(&#39;Y-m-d H:i:s&#39;), $this->worker->getThreadId());
		//$lst = $this->qzone($this->qq);
		//print_r($lst);
	}
	public function recursion($qqs){
		
		if( self::$level <= $this->depth){
			self::$level++;
		}else if(self::$level > 0){
			self::$level--;
		}
		printf("Level: %s\n", self::$level);
		//sleep(1);
		usleep(mt_rand(10000,1000000));
		if(self::$level >= $this->depth){
			return;
		}
		
		foreach($qqs as $uin) {
			$lst = $this->qzone($uin);
			print_r($lst);
			$this->recursion($lst);
		}
	}

	public function qzone($qq){
		$url = &#39;http://m.qzone.com/mqz_get_visitor?g_tk=1191852101&res_mode=0&res_uin=&#39;.$qq.&#39;&offset=0&count=100&page=1&format=json&t=1401762986882&sid=dODKVcYv6azjN87cxXQ5mao1xgakYjHg18c8aa5e0201%3D%3D&#39;;
		$snoopy = new Snoopy;
		 
		// need an proxy?
		//$snoopy->proxy_host = "my.proxy.host";
		//$snoopy->proxy_port = "8080";
		 
		// set browser and referer:
		$snoopy->agent = "Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.1)";
		$snoopy->referer = "http://m.qzone.com/";
		 
		// set some cookies:
		//$snoopy->cookies["SessionID"] = &#39;238472834723489&#39;;
		//$snoopy->cookies["favoriteColor"] = "blue";
		 
		// set an raw-header:
		$snoopy->rawheaders["Pragma"] = "no-cache";
		 
		// set some internal variables:
		$snoopy->maxredirs = 2;
		$snoopy->offsiteok = false;
		$snoopy->expandlinks = false;
		 
		// set username and password (optional)
		//$snoopy->user = "joe";
		//$snoopy->pass = "bloe";
		 
		// fetch the text of the website www.google.com:
		if($snoopy->fetchtext($url)){ 
			// other methods: fetch, fetchform, fetchlinks, submittext and submitlinks

			// response code:
			//print "response code: ".$snoopy->response_code."<br/>\n";
		 
			// print the headers:
			//print "<b>Headers:</b><br/>";
			//while(list($key,$val) = each($snoopy->headers)){
			//	print $key.": ".$val."<br/>\n";
			//}

			// print the texts of the website:
			//print_r( json_decode($snoopy->results) );
			
			$results = array();
			$tmp = json_decode($snoopy->results);
			
			if($tmp){
				if(property_exists($tmp, &#39;data&#39;)){
					foreach( $tmp->data->list as $lst ){
						$results[] = $lst->uin;
					}
				}
			}
			return ($results);
			
		}
		else {
			print "Snoopy: error while fetching document: ".$snoopy->error."\n";
		}		
	}
}

$pool = new Pool(100, \CrawlerWorker::class, []);

#foreach (range(1000, 100000) as $number) {
#	$pool->submit(new Crawler($number));
#}

$pool->submit(new Crawler(&#39;13721218&#39;));
$pool->submit(new Crawler(&#39;291379&#39;));
//$pool->submit(new Crawler(&#39;xxx&#39;));
//$pool->submit(new Crawler(&#39;xxx&#39;));
//$pool->submit(new Crawler(&#39;xxx&#39;));
// 以此类推
//$pool->submit(new Crawler(&#39;nnn&#39;));

$pool->shutdown();
?>
Nach dem Login kopieren
Verwandte Etiketten:
Quelle:php.cn
Erklärung dieser Website
Der Inhalt dieses Artikels wird freiwillig von Internetnutzern beigesteuert und das Urheberrecht liegt beim ursprünglichen Autor. Diese Website übernimmt keine entsprechende rechtliche Verantwortung. Wenn Sie Inhalte finden, bei denen der Verdacht eines Plagiats oder einer Rechtsverletzung besteht, wenden Sie sich bitte an admin@php.cn
Beliebte Empfehlungen
Beliebte Tutorials
Mehr>
Neueste Downloads
Mehr>
Web-Effekte
Quellcode der Website
Website-Materialien
Frontend-Vorlage