Share a piece of code for the collection program

WBOY
Release: 2016-07-25 09:11:20
Original
947 people have browsed it
看到总有人问curl的问题,这两天没事写了个采集论坛的小程序。里面包括了模拟登陆,获取页面源代码,正则匹配结果等部分,希望对大家有用。
  1. set_time_limit(0);
  2. //cookie保存目录
  3. $cookie_jar = '/tmp/cookie.tmp';
  4. /*函数------------------------------------------------------------------------------------------------------------*/
  5. //模拟请求数据
  6. function request($url,$postfields,$cookie_jar,$referer){
  7. $ch = curl_init();
  8. $options = array(CURLOPT_URL => $url,
  9. CURLOPT_HEADER => 0,
  10. CURLOPT_NOBODY => 0,
  11. CURLOPT_PORT => 80,
  12. CURLOPT_POST => 1,
  13. CURLOPT_POSTFIELDS => $postfields,
  14. CURLOPT_RETURNTRANSFER => 1,
  15. CURLOPT_FOLLOWLOCATION => 1,
  16. CURLOPT_COOKIEJAR => $cookie_jar,
  17. CURLOPT_COOKIEFILE => $cookie_jar,
  18. CURLOPT_REFERER => $referer
  19. );
  20. curl_setopt_array($ch, $options);
  21. $code = curl_exec($ch);
  22. curl_close($ch);
  23. return $code;
  24. }
  25. //获取帖子列表
  26. function getThreadsList($code){
  27. preg_match_all('/