An example of curl batch processing implemented in php php curl download file php curl.dll download php curl simulated login

WBOY
Release: 2016-07-29 08:52:20
Original
1347 people have browsed it

curl is an open source file transfer tool that uses URL syntax to work in command line mode.

This article implements an example of curl batch processing in PHP.

The code is as follows:

<span>  1</span><span>header</span>("Content-Type:text/html;charset=utf8"<span>);
</span><span>  2</span><span>  3</span><span>/*</span><span> 先获取两个页面的所有a标签 </span><span>*/</span><span>  4</span><span>//</span><span> 初始化两个简单处理句柄</span><span>  5</span><span>$ch1</span> =<span> curl_init();
</span><span>  6</span><span>$ch2</span> =<span> curl_init();
</span><span>  7</span> curl_setopt_array(<span>$ch1</span>,<span>array</span><span>(
</span><span>  8</span>     CURLOPT_URL => 'http://www.sina.com.cn',
<span>  9</span>     CURLOPT_HEADER => 0,
<span> 10</span>     CURLOPT_RETURNTRANSFER => 1,
<span> 11</span><span>));
</span><span> 12</span> curl_setopt_array(<span>$ch2</span>,<span>array</span><span>(
</span><span> 13</span>     CURLOPT_URL => 'http://www.baidu.com/',
<span> 14</span>     CURLOPT_HEADER => 0,
<span> 15</span>     CURLOPT_RETURNTRANSFER => 1,
<span> 16</span><span>));
</span><span> 17</span><span> 18</span><span>//</span><span> 初始化批处理句柄,并添加简单处理句柄</span><span> 19</span><span>$mh</span> =<span> curl_multi_init();
</span><span> 20</span> curl_multi_add_handle(<span>$mh</span>,<span>$ch1</span><span>);
</span><span> 21</span> curl_multi_add_handle(<span>$mh</span>,<span>$ch2</span><span>);
</span><span> 22</span><span> 23</span><span>//</span><span> 初始化执行状态</span><span> 24</span><span>$state</span> = <span>null</span><span>;
</span><span> 25</span><span> 26</span><span>//</span><span> 执行批处理</span><span> 27</span><span>do</span><span>{
</span><span> 28</span><span>$mc</span> = curl_multi_exec(<span>$mh</span>,<span>$state</span><span>);    
</span><span> 29</span> }<span>while</span>(<span>$mc</span> ==<span> CURLM_CALL_MULTI_PERFORM);
</span><span> 30</span><span>while</span>(<span>$mc</span> == CURLM_OK && <span>$state</span><span>) {
</span><span> 31</span><span>while</span> (curl_multi_exec(<span>$mh</span>, <span>$state</span>) ===<span> CURLM_CALL_MULTI_PERFORM);
</span><span> 32</span><span>//</span><span> 经过实验,发现curl_multi_select($mh)总是返回-1,意味着一下代码不会执行</span><span> 33</span><span>if</span>(curl_multi_select(<span>$mh</span>) != -1<span>) {
</span><span> 34</span><span>do</span><span>{
</span><span> 35</span><span>$mc</span> = curl_multi_exec(<span>$mh</span>,<span>$state</span><span>);
</span><span> 36</span>         }<span>while</span>(<span>$mc</span> ==<span> CURLM_CALL_MULTI_PERFORM);
</span><span> 37</span><span>    }    
</span><span> 38</span><span>}
</span><span> 39</span><span> 40</span><span>//</span><span> 获取内容</span><span> 41</span><span>$text</span>  = curl_multi_getcontent(<span>$ch1</span><span>);
</span><span> 42</span><span>$text</span> .= curl_multi_getcontent(<span>$ch2</span><span>);
</span><span> 43</span><span> 44</span><span>//</span><span> 找到页面中所有的a标签,保存到$matches</span><span> 45</span><span>$matches</span> = <span>null</span><span>;
</span><span> 46</span><span>preg_match_all</span>("/<a.*?href\s*?=\s*?[\'\"](.*?)[\'\"].*?>(.*?)<\/a>/",<span>$text</span>,<span>$matches</span><span>);
</span><span> 47</span><span> 48</span><span>//</span><span> 关闭各个句柄</span><span> 49</span> curl_multi_remove_handle(<span>$mh</span>,<span>$ch1</span><span>);
</span><span> 50</span> curl_multi_remove_handle(<span>$mh</span>,<span>$ch2</span><span>);
</span><span> 51</span> curl_multi_close(<span>$mh</span><span>);
</span><span> 52</span><span> 53</span><span>/*</span><span>在找到的连接中继续查找title标签 </span><span>*/</span><span> 54</span><span> 55</span><span>$handle</span> = <span>array</span>(); <span>//</span><span> 存储简单处理句柄的数组</span><span> 56</span><span>$mhandle</span> = curl_multi_init(); <span>//</span><span>批处理句柄
</span><span> 57</span><span>// 处理100个页面</span><span> 58</span><span>foreach</span>(<span>array_slice</span>(<span>$matches</span>[1],0,100) <span>as</span><span>$href</span><span>) {
</span><span> 59</span><span>$tmp_h</span> =<span> curl_init();
</span><span> 60</span>     curl_setopt_array(<span>$tmp_h</span>,<span>array</span><span>(
</span><span> 61</span>         CURLOPT_URL => <span>$href</span>,
<span> 62</span>         CURLOPT_HEADER => 0,
<span> 63</span>         CURLOPT_RETURNTRANSFER => 1,
<span> 64</span><span>    ));
</span><span> 65</span>     curl_multi_add_handle(<span>$mhandle</span>,<span>$tmp_h</span><span>);
</span><span> 66</span><span>$handle</span>[] = <span>$tmp_h</span><span>;
</span><span> 67</span><span>}
</span><span> 68</span><span>do</span><span>{
</span><span> 69</span><span>$mrc</span> = curl_multi_exec(<span>$mhandle</span>,<span>$active</span><span>);
</span><span> 70</span> }<span>while</span>(<span>$mrc</span> ==<span> CURLM_CALL_MULTI_PERFORM);
</span><span> 71</span><span>while</span>(<span>$mrc</span> == CURLM_OK && <span>$active</span><span>) {
</span><span> 72</span><span>while</span>(curl_multi_exec(<span>$mhandle</span>,<span>$active</span>) ==<span> CURLM_CALL_MULTI_PERFORM);
</span><span> 73</span><span>if</span>(curl_multi_select(<span>$mhandle</span>) != -1<span>) {
</span><span> 74</span><span>do</span><span>{
</span><span> 75</span><span>$mrc</span> = curl_multi_exec(<span>$mhandle</span>,<span>$active</span><span>);
</span><span> 76</span>         }<span>while</span>(<span>$mrc</span> ==<span> CURLM_CALL_MULTI_PERFORM);
</span><span> 77</span><span>    }
</span><span> 78</span><span>}
</span><span> 79</span><span> 80</span><span>//</span><span> 获取这些页面的内容</span><span> 81</span><span>$mtext</span> = <span>null</span><span>;
</span><span> 82</span><span>foreach</span>(<span>$handle</span><span>as</span><span>$tmp_h</span><span>) {
</span><span> 83</span><span>$mtext</span> .= curl_multi_getcontent(<span>$tmp_h</span><span>);
</span><span> 84</span>     curl_multi_remove_handle(<span>$mhandle</span>, <span>$tmp_h</span><span>);
</span><span> 85</span><span>}
</span><span> 86</span><span>$mmatches</span> = <span>array</span><span>();
</span><span> 87</span><span>preg_match_all</span>("/<title>(.*?)<\/title>/",<span>$mtext</span>, <span>$mmatches</span><span>);
</span><span> 88</span><span> 89</span><span>//</span><span> 编码转换</span><span> 90</span> mb_detect_order('GB2312,GBK,BIG5,GB18030,UNICODE ,CP936'<span>);
</span><span> 91</span><span>foreach</span>(<span>$mmatches</span>[1] <span>as</span><span>$key</span> => <span>$val</span><span>) {
</span><span> 92</span><span>$encoding</span> = mb_detect_encoding(<span>$val</span><span>);
</span><span> 93</span><span>if</span>(<span>$encoding</span> != 'UTF-8' && <span>$encoding</span> != 'CP936' && <span>$encoding</span> != 'GB18030' && <span>$encoding</span> !=''<span>) {
</span><span> 94</span><span>$mmatches</span>[1][<span>$key</span>] = <span>iconv</span>(<span>$encoding</span>,'UTF-8//IGNORE',<span>$val</span><span>);
</span><span> 95</span><span>    }
</span><span> 96</span><span>}
</span><span> 97</span><span> 98</span><span>//</span><span> 打印title信息</span><span> 99</span><span>var_dump</span>(<span>$mmatches</span>[1<span>]);
</span><span>100</span><span>101</span><span>//</span><span> 关闭批处理句柄</span><span>102</span> curl_multi_close(<span>$mhandle</span>);
Copy after login

The above introduces an example of curl batch processing implemented in PHP, including curl and PHP content. I hope it will be helpful to friends who are interested in PHP tutorials.

Related labels:
source:php.cn
Statement of this Website
The content of this article is voluntarily contributed by netizens, and the copyright belongs to the original author. This site does not assume corresponding legal responsibility. If you find any content suspected of plagiarism or infringement, please contact admin@php.cn
Popular Tutorials
More>
Latest Downloads
More>
Web Effects
Website Source Code
Website Materials
Front End Template