Home > Backend Development > PHP Tutorial > PHP curl collects off-site content sample code

PHP curl collects off-site content sample code

WBOY
Release: 2016-07-25 08:54:22
Original
1204 people have browsed it
  1. //Initialize a cURL object
  2. $curl = curl_init();
  3. //Set the URL you need to crawl
  4. curl_setopt($curl, CURLOPT_URL, 'http://bbs.it -home.org');
  5. // Set header
  6. curl_setopt($curl, CURLOPT_HEADER, 1);
  7. // Set cURL parameters to ask whether the results are saved in a string or output to the screen.
  8. curl_setopt($curl, CURLOPT_RETURNTRANSFER, 1);
  9. // Run cURL and request the web page
  10. $data = curl_exec($curl);
  11. // Close the URL request
  12. curl_close($curl); 3. Find key data through regular matching

//$data is the value returned by curl_exec, which is the target content collected

preg_match_all("/
  • (.*?)/",$data, $out, PREG_SET_ORDER);
    foreach($out as $key => $value){
      //Here $value is an array, while recording the entire sentence and individual sentences with matching characters found Matching characters
    1. echo 'The whole sentence matched: '.$value[0].'
    2. ';
    3. echo 'Single matching: '.$value[1].'
    4. ';
    5. }
    6. Copy Code
    7. PHP curl tips 1. Timeout related settings Some timeout settings can be set through curl_setopt($ch, opt), mainly including: CURLOPT_TIMEOUT sets the maximum number of seconds cURL is allowed to execute. CURLOPT_TIMEOUT_MS sets the maximum number of milliseconds cURL is allowed to execute. (Added in cURL 7.16.2. Available as of PHP 5.2.3.) CURLOPT_CONNECTTIMEOUT The time to wait before initiating a connection. If set to 0, it will wait indefinitely. CURLOPT_CONNECTTIMEOUT_MS The time, in milliseconds, to wait for a connection attempt. If set to 0, wait infinitely. Added in cURL 7.16.2. Available starting with PHP 5.2.3. CURLOPT_DNS_CACHE_TIMEOUT sets the time to save DNS information in memory, the default is 120 seconds.
    2. Submit data through post and retain cookies
  • //Curl simulated login discuz program, suitable for DZ7.0

    !extension_loaded('curl') && die('The curl extension is not loaded.');

      $discuz_url = 'http://bbs.it-home.org';//Forum address

    1. $login_url = $discuz_url .'/logging. php?action=login';//Login page address
    2. $get_url = $discuz_url .'/my.php?item=threads'; //My post

    3. $post_fields = array( );

    4. //The following two items do not need to be modified
    5. $post_fields['loginfield'] = 'username';
    6. $post_fields['loginsubmit'] = 'true';
    7. //Username and password must be filled in
    8. $post_fields[ 'username'] = 'jbxue';
    9. $post_fields['password'] = '88888888';
    10. //Security question
    11. $post_fields['questionid'] = 0;
    12. $post_fields['answer'] = '';
    13. //@todo verification code
    14. $post_fields['seccoverify'] = '';

    15. //Get form FORMHASH

    16. $ch = curl_init($login_url);
    17. curl_setopt($ch, CURLOPT_HEADER, 0);
    18. curl_setopt($ch, CURLOPT_RETURNTRANSFER, 1);
    19. $contents = curl_exec($ch);
    20. curl_close($ch);
    21. preg_match('//i', $contents, $matches);
    22. if(!empty($matches)) {
    23. $formhash = $matches[1 ];
    24. } else {
    25. die('Not found the forumhash.');
    26. }

    27. //POST data, get COOKIE

    28. $cookie_file = dirname(__FILE__) . '/cookie. txt';
    29. //$cookie_file = tempnam('/tmp');
    30. $ch = curl_init($login_url);
    31. curl_setopt($ch, CURLOPT_HEADER, 0);
    32. curl_setopt($ch, CURLOPT_RETURNTRANSFER, 1);
    33. curl_setopt($ch, CURLOPT_POST, 1);
    34. curl_setopt($ch, CURLOPT_POSTFIELDS, $post_fields);
    35. curl_setopt($ch, CURLOPT_COOKIEJAR, $cookie_file);
    36. curl_exec($ch);
    37. curl_close($ch); < /p>
    38. //Use the COOKIE obtained above to obtain the page content that needs to be logged in to view

    39. $ch = curl_init($get_url);
    40. curl_setopt($ch, CURLOPT_HEADER, 0);
    41. curl_setopt($ ch, CURLOPT_RETURNTRANSFER, 0);
    42. curl_setopt($ch, CURLOPT_COOKIEFILE, $cookie_file);
    43. $contents = curl_exec($ch);
    44. curl_close($ch);

    45. var_dump($contents );

    46. Copy code


    source:php.cn
    Statement of this Website
    The content of this article is voluntarily contributed by netizens, and the copyright belongs to the original author. This site does not assume corresponding legal responsibility. If you find any content suspected of plagiarism or infringement, please contact admin@php.cn
    Popular Tutorials
    More>
    Latest Downloads
    More>
    Web Effects
    Website Source Code
    Website Materials
    Front End Template