-
- //Initialize a cURL object
- $curl = curl_init();
- //Set the URL you need to crawl
- curl_setopt($curl, CURLOPT_URL, 'http://bbs.it -home.org');
- // Set header
- curl_setopt($curl, CURLOPT_HEADER, 1);
- // Set cURL parameters to ask whether the results are saved in a string or output to the screen.
- curl_setopt($curl, CURLOPT_RETURNTRANSFER, 1);
- // Run cURL and request the web page
- $data = curl_exec($curl);
- // Close the URL request
- curl_close($curl); 3. Find key data through regular matching
//$data is the value returned by curl_exec, which is the target content collected preg_match_all("/ (.*?) li>/",$data, $out, PREG_SET_ORDER); foreach($out as $key => $value){ //Here $value is an array, while recording the entire sentence and individual sentences with matching characters found Matching characters - echo 'The whole sentence matched: '.$value[0].'
- ';
- echo 'Single matching: '.$value[1].'
- ';
- }
-
-
- Copy Code
-
-
- PHP curl tips
1. Timeout related settings
Some timeout settings can be set through curl_setopt($ch, opt), mainly including:
CURLOPT_TIMEOUT sets the maximum number of seconds cURL is allowed to execute.
CURLOPT_TIMEOUT_MS sets the maximum number of milliseconds cURL is allowed to execute. (Added in cURL 7.16.2. Available as of PHP 5.2.3.)
CURLOPT_CONNECTTIMEOUT The time to wait before initiating a connection. If set to 0, it will wait indefinitely.
CURLOPT_CONNECTTIMEOUT_MS The time, in milliseconds, to wait for a connection attempt. If set to 0, wait infinitely. Added in cURL 7.16.2. Available starting with PHP 5.2.3.
CURLOPT_DNS_CACHE_TIMEOUT sets the time to save DNS information in memory, the default is 120 seconds.
2. Submit data through post and retain cookies
//Curl simulated login discuz program, suitable for DZ7.0 !extension_loaded('curl') && die('The curl extension is not loaded.'); $discuz_url = 'http://bbs.it-home.org';//Forum address - $login_url = $discuz_url .'/logging. php?action=login';//Login page address
- $get_url = $discuz_url .'/my.php?item=threads'; //My post
$post_fields = array( );
- //The following two items do not need to be modified
- $post_fields['loginfield'] = 'username';
- $post_fields['loginsubmit'] = 'true';
- //Username and password must be filled in
- $post_fields[ 'username'] = 'jbxue';
- $post_fields['password'] = '88888888';
- //Security question
- $post_fields['questionid'] = 0;
- $post_fields['answer'] = '';
- //@todo verification code
- $post_fields['seccoverify'] = '';
//Get form FORMHASH
- $ch = curl_init($login_url);
- curl_setopt($ch, CURLOPT_HEADER, 0);
- curl_setopt($ch, CURLOPT_RETURNTRANSFER, 1);
- $contents = curl_exec($ch);
- curl_close($ch);
- preg_match('//i', $contents, $matches);
- if(!empty($matches)) {
- $formhash = $matches[1 ];
- } else {
- die('Not found the forumhash.');
- }
//POST data, get COOKIE
- $cookie_file = dirname(__FILE__) . '/cookie. txt';
- //$cookie_file = tempnam('/tmp');
- $ch = curl_init($login_url);
- curl_setopt($ch, CURLOPT_HEADER, 0);
- curl_setopt($ch, CURLOPT_RETURNTRANSFER, 1);
- curl_setopt($ch, CURLOPT_POST, 1);
- curl_setopt($ch, CURLOPT_POSTFIELDS, $post_fields);
- curl_setopt($ch, CURLOPT_COOKIEJAR, $cookie_file);
- curl_exec($ch);
- curl_close($ch); < /p>
//Use the COOKIE obtained above to obtain the page content that needs to be logged in to view
- $ch = curl_init($get_url);
- curl_setopt($ch, CURLOPT_HEADER, 0);
- curl_setopt($ ch, CURLOPT_RETURNTRANSFER, 0);
- curl_setopt($ch, CURLOPT_COOKIEFILE, $cookie_file);
- $contents = curl_exec($ch);
- curl_close($ch);
var_dump($contents );
-
-
-
- Copy code
-
-
-
|