PHP에서 구현된 컬 일괄 처리의 예 php 컬 다운로드 파일 php 컬.dll 다운로드 php 컬 시뮬레이션 로그인

WBOY
풀어 주다: 2016-07-29 08:52:20
원래의
1346명이 탐색했습니다.

curl은 URL 구문을 사용하여 명령줄 모드에서 작동하는 오픈 소스 파일 전송 도구입니다.

이 문서에서는 PHP에서 컬 일괄 처리의 예를 구현합니다.

코드는 다음과 같습니다.

<span>  1</span><span>header</span>("Content-Type:text/html;charset=utf8"<span>);
</span><span>  2</span><span>  3</span><span>/*</span><span> 先获取两个页面的所有a标签 </span><span>*/</span><span>  4</span><span>//</span><span> 初始化两个简单处理句柄</span><span>  5</span><span>$ch1</span> =<span> curl_init();
</span><span>  6</span><span>$ch2</span> =<span> curl_init();
</span><span>  7</span> curl_setopt_array(<span>$ch1</span>,<span>array</span><span>(
</span><span>  8</span>     CURLOPT_URL => 'http://www.sina.com.cn',
<span>  9</span>     CURLOPT_HEADER => 0,
<span> 10</span>     CURLOPT_RETURNTRANSFER => 1,
<span> 11</span><span>));
</span><span> 12</span> curl_setopt_array(<span>$ch2</span>,<span>array</span><span>(
</span><span> 13</span>     CURLOPT_URL => 'http://www.baidu.com/',
<span> 14</span>     CURLOPT_HEADER => 0,
<span> 15</span>     CURLOPT_RETURNTRANSFER => 1,
<span> 16</span><span>));
</span><span> 17</span><span> 18</span><span>//</span><span> 初始化批处理句柄,并添加简单处理句柄</span><span> 19</span><span>$mh</span> =<span> curl_multi_init();
</span><span> 20</span> curl_multi_add_handle(<span>$mh</span>,<span>$ch1</span><span>);
</span><span> 21</span> curl_multi_add_handle(<span>$mh</span>,<span>$ch2</span><span>);
</span><span> 22</span><span> 23</span><span>//</span><span> 初始化执行状态</span><span> 24</span><span>$state</span> = <span>null</span><span>;
</span><span> 25</span><span> 26</span><span>//</span><span> 执行批处理</span><span> 27</span><span>do</span><span>{
</span><span> 28</span><span>$mc</span> = curl_multi_exec(<span>$mh</span>,<span>$state</span><span>);    
</span><span> 29</span> }<span>while</span>(<span>$mc</span> ==<span> CURLM_CALL_MULTI_PERFORM);
</span><span> 30</span><span>while</span>(<span>$mc</span> == CURLM_OK && <span>$state</span><span>) {
</span><span> 31</span><span>while</span> (curl_multi_exec(<span>$mh</span>, <span>$state</span>) ===<span> CURLM_CALL_MULTI_PERFORM);
</span><span> 32</span><span>//</span><span> 经过实验,发现curl_multi_select($mh)总是返回-1,意味着一下代码不会执行</span><span> 33</span><span>if</span>(curl_multi_select(<span>$mh</span>) != -1<span>) {
</span><span> 34</span><span>do</span><span>{
</span><span> 35</span><span>$mc</span> = curl_multi_exec(<span>$mh</span>,<span>$state</span><span>);
</span><span> 36</span>         }<span>while</span>(<span>$mc</span> ==<span> CURLM_CALL_MULTI_PERFORM);
</span><span> 37</span><span>    }    
</span><span> 38</span><span>}
</span><span> 39</span><span> 40</span><span>//</span><span> 获取内容</span><span> 41</span><span>$text</span>  = curl_multi_getcontent(<span>$ch1</span><span>);
</span><span> 42</span><span>$text</span> .= curl_multi_getcontent(<span>$ch2</span><span>);
</span><span> 43</span><span> 44</span><span>//</span><span> 找到页面中所有的a标签,保存到$matches</span><span> 45</span><span>$matches</span> = <span>null</span><span>;
</span><span> 46</span><span>preg_match_all</span>("/<a.*?href\s*?=\s*?[\'\"](.*?)[\'\"].*?>(.*?)<\/a>/",<span>$text</span>,<span>$matches</span><span>);
</span><span> 47</span><span> 48</span><span>//</span><span> 关闭各个句柄</span><span> 49</span> curl_multi_remove_handle(<span>$mh</span>,<span>$ch1</span><span>);
</span><span> 50</span> curl_multi_remove_handle(<span>$mh</span>,<span>$ch2</span><span>);
</span><span> 51</span> curl_multi_close(<span>$mh</span><span>);
</span><span> 52</span><span> 53</span><span>/*</span><span>在找到的连接中继续查找title标签 </span><span>*/</span><span> 54</span><span> 55</span><span>$handle</span> = <span>array</span>(); <span>//</span><span> 存储简单处理句柄的数组</span><span> 56</span><span>$mhandle</span> = curl_multi_init(); <span>//</span><span>批处理句柄
</span><span> 57</span><span>// 处理100个页面</span><span> 58</span><span>foreach</span>(<span>array_slice</span>(<span>$matches</span>[1],0,100) <span>as</span><span>$href</span><span>) {
</span><span> 59</span><span>$tmp_h</span> =<span> curl_init();
</span><span> 60</span>     curl_setopt_array(<span>$tmp_h</span>,<span>array</span><span>(
</span><span> 61</span>         CURLOPT_URL => <span>$href</span>,
<span> 62</span>         CURLOPT_HEADER => 0,
<span> 63</span>         CURLOPT_RETURNTRANSFER => 1,
<span> 64</span><span>    ));
</span><span> 65</span>     curl_multi_add_handle(<span>$mhandle</span>,<span>$tmp_h</span><span>);
</span><span> 66</span><span>$handle</span>[] = <span>$tmp_h</span><span>;
</span><span> 67</span><span>}
</span><span> 68</span><span>do</span><span>{
</span><span> 69</span><span>$mrc</span> = curl_multi_exec(<span>$mhandle</span>,<span>$active</span><span>);
</span><span> 70</span> }<span>while</span>(<span>$mrc</span> ==<span> CURLM_CALL_MULTI_PERFORM);
</span><span> 71</span><span>while</span>(<span>$mrc</span> == CURLM_OK && <span>$active</span><span>) {
</span><span> 72</span><span>while</span>(curl_multi_exec(<span>$mhandle</span>,<span>$active</span>) ==<span> CURLM_CALL_MULTI_PERFORM);
</span><span> 73</span><span>if</span>(curl_multi_select(<span>$mhandle</span>) != -1<span>) {
</span><span> 74</span><span>do</span><span>{
</span><span> 75</span><span>$mrc</span> = curl_multi_exec(<span>$mhandle</span>,<span>$active</span><span>);
</span><span> 76</span>         }<span>while</span>(<span>$mrc</span> ==<span> CURLM_CALL_MULTI_PERFORM);
</span><span> 77</span><span>    }
</span><span> 78</span><span>}
</span><span> 79</span><span> 80</span><span>//</span><span> 获取这些页面的内容</span><span> 81</span><span>$mtext</span> = <span>null</span><span>;
</span><span> 82</span><span>foreach</span>(<span>$handle</span><span>as</span><span>$tmp_h</span><span>) {
</span><span> 83</span><span>$mtext</span> .= curl_multi_getcontent(<span>$tmp_h</span><span>);
</span><span> 84</span>     curl_multi_remove_handle(<span>$mhandle</span>, <span>$tmp_h</span><span>);
</span><span> 85</span><span>}
</span><span> 86</span><span>$mmatches</span> = <span>array</span><span>();
</span><span> 87</span><span>preg_match_all</span>("/<title>(.*?)<\/title>/",<span>$mtext</span>, <span>$mmatches</span><span>);
</span><span> 88</span><span> 89</span><span>//</span><span> 编码转换</span><span> 90</span> mb_detect_order('GB2312,GBK,BIG5,GB18030,UNICODE ,CP936'<span>);
</span><span> 91</span><span>foreach</span>(<span>$mmatches</span>[1] <span>as</span><span>$key</span> => <span>$val</span><span>) {
</span><span> 92</span><span>$encoding</span> = mb_detect_encoding(<span>$val</span><span>);
</span><span> 93</span><span>if</span>(<span>$encoding</span> != 'UTF-8' && <span>$encoding</span> != 'CP936' && <span>$encoding</span> != 'GB18030' && <span>$encoding</span> !=''<span>) {
</span><span> 94</span><span>$mmatches</span>[1][<span>$key</span>] = <span>iconv</span>(<span>$encoding</span>,'UTF-8//IGNORE',<span>$val</span><span>);
</span><span> 95</span><span>    }
</span><span> 96</span><span>}
</span><span> 97</span><span> 98</span><span>//</span><span> 打印title信息</span><span> 99</span><span>var_dump</span>(<span>$mmatches</span>[1<span>]);
</span><span>100</span><span>101</span><span>//</span><span> 关闭批处理句柄</span><span>102</span> curl_multi_close(<span>$mhandle</span>);
로그인 후 복사

위 내용은 컬과 PHP 내용을 포함하여 PHP로 구현된 컬 일괄 처리의 예를 소개하고 있으며, PHP 튜토리얼에 관심이 있는 친구들에게 도움이 되기를 바랍니다.

관련 라벨:
원천:php.cn
본 웹사이트의 성명
본 글의 내용은 네티즌들의 자발적인 기여로 작성되었으며, 저작권은 원저작자에게 있습니다. 본 사이트는 이에 상응하는 법적 책임을 지지 않습니다. 표절이나 침해가 의심되는 콘텐츠를 발견한 경우 admin@php.cn으로 문의하세요.
인기 튜토리얼
더>
최신 다운로드
더>
웹 효과
웹사이트 소스 코드
웹사이트 자료
프론트엔드 템플릿