> 백엔드 개발 > PHP 튜토리얼 > curl抓取网页正则匹配问题

curl抓取网页正则匹配问题

WBOY
풀어 주다: 2016-06-06 20:16:12
원래의
1390명이 탐색했습니다.

在Mac上写的PHP代码,可正常执行,但是在Web Server上运行时会匹配不到。
Mac上PHP Version 5.5.30,CentOS 7服务器上的是PHP Version 5.6.15。
就是在执行到第二个if时会保未找到该用户!

<code>//正则匹配页面中的用户信息,如果匹配成功则进入编辑页面
          if ($c = preg_match_all ('/(\/UserManage\/edit\/id\/\d*)/is', $search_result, $matches))
          {
            $string1 = $matches[1][0];

            //获取页面中的用户信息,并进入编辑页面
             $match_user = curl_init();
            curl_setopt($match_user, CURLOPT_URL, 'https://crm.pzoom.com'.$string1);//登陆后要从哪个页面获取信息
            curl_setopt($match_user, CURLOPT_POST, 1);
            curl_setopt($match_user, CURLOPT_COOKIEFILE, PZOOM_COOKIE_FILE);// 获取COOKIE文件
            curl_setopt($match_user, CURLOPT_RETURNTRANSFER, 1);//设定返回 的数据是否自动显示
            curl_setopt($match_user, CURLOPT_HEADER, false);//设定是否显示头信 息
            curl_setopt($match_user, CURLOPT_NOBODY, false);//设定是否输出页面 内容
            $edit_info = curl_exec($match_user);//返回结果
            curl_close($match_user); //关闭

            if ($c2=preg_match_all ('/(.*?value=\"(.*)\")/is', $edit_info, $edit_info_match))
            {

                if ($c3=preg_match_all ('/(.*?value=\"(.*)\") (checked)/is', $edit_info, $match_radio))
                {
                    
                }else{
                    echo "匹配失败!";
                }

                if ($c4=preg_match_all ('/(textarea class="descript_cont" name="user\\[userDesc\\]" id="userDesc">)(.*)()/is', $edit_info, $match_textarea))

                {
                    
                }else{
                    echo "匹配失败!";
                }

                $call_id = $edit_info_match[2][0];
                $user_userid = $edit_info_match[2][1];
                $user_name = $edit_info_match[2][2];
                $user_sex = $match_radiop[1][0];
                $user_tel = $edit_info_match[2][5];
                $user_mobile = $edit_info_match[2][6];
                $user_mail = $edit_info_match[2][7];
                $user_departmentid = $edit_info_match[2][9];
                $user_address = $edit_info_match[2][10];
                $call_areacode = $edit_info_match[2][12];
                $call_pwd = $edit_info_match[2][14];
                $user_desc = $match_textarea[2][0];
                $user_id = substr($string1,-4);

                //发起新的请求,并提交用户输入的分机号
                $edit_userinfor = curl_init();
                curl_setopt($edit_userinfor, CURLOPT_URL, 'https://crm.pzoom.com/UserManage/edit');//登陆后要从哪个页面获取信息
                curl_setopt($edit_userinfor, CURLOPT_POST, 1);
                $submit_userinfo = "call[id2]=$call_id&user[userid]=$user_userid&user[username]=$user_name&user[sex]=$user_sex&user[usertel]=$user_tel&user[usermobile]=$user_mobile&user[useremail]=$user_mail&user[departmentid]=$user_departmentid&user[address]=$user_address&call[cno]=$tel_number&call[areaCode]=021&isManager=1&call[pwd]=Aa123456&user[userDesc]=$user_desc&user[num]=1&user[id]=$user_id";
                curl_setopt($edit_userinfor, CURLOPT_POSTFIELDS, $submit_userinfo);//传递数据
                curl_setopt($edit_userinfor, CURLOPT_COOKIEFILE, PZOOM_COOKIE_FILE);// 获取COOKIE文件
                curl_setopt($edit_userinfor, CURLOPT_RETURNTRANSFER, 1);//设定返回 的数据是否自动显示
                curl_setopt($edit_userinfor, CURLOPT_HEADER, false);//设定是否显示头信 息
                $submint_userinfo = curl_exec($edit_userinfor);//返回结果
                curl_close($edit_userinfor); //关闭

            }else{
                die("未找到该用户!");
            }
        }else{
            die("未找到该用户!");
        }
      }</code>
로그인 후 복사
로그인 후 복사

难道PHP版本不同,正则表达式也不一样?

回复内容:

在Mac上写的PHP代码,可正常执行,但是在Web Server上运行时会匹配不到。
Mac上PHP Version 5.5.30,CentOS 7服务器上的是PHP Version 5.6.15。
就是在执行到第二个if时会保未找到该用户!

<code>//正则匹配页面中的用户信息,如果匹配成功则进入编辑页面
          if ($c = preg_match_all ('/(\/UserManage\/edit\/id\/\d*)/is', $search_result, $matches))
          {
            $string1 = $matches[1][0];

            //获取页面中的用户信息,并进入编辑页面
             $match_user = curl_init();
            curl_setopt($match_user, CURLOPT_URL, 'https://crm.pzoom.com'.$string1);//登陆后要从哪个页面获取信息
            curl_setopt($match_user, CURLOPT_POST, 1);
            curl_setopt($match_user, CURLOPT_COOKIEFILE, PZOOM_COOKIE_FILE);// 获取COOKIE文件
            curl_setopt($match_user, CURLOPT_RETURNTRANSFER, 1);//设定返回 的数据是否自动显示
            curl_setopt($match_user, CURLOPT_HEADER, false);//设定是否显示头信 息
            curl_setopt($match_user, CURLOPT_NOBODY, false);//设定是否输出页面 内容
            $edit_info = curl_exec($match_user);//返回结果
            curl_close($match_user); //关闭

            if ($c2=preg_match_all ('/(.*?value=\"(.*)\")/is', $edit_info, $edit_info_match))
            {

                if ($c3=preg_match_all ('/(.*?value=\"(.*)\") (checked)/is', $edit_info, $match_radio))
                {
                    
                }else{
                    echo "匹配失败!";
                }

                if ($c4=preg_match_all ('/(textarea class="descript_cont" name="user\\[userDesc\\]" id="userDesc">)(.*)()/is', $edit_info, $match_textarea))

                {
                    
                }else{
                    echo "匹配失败!";
                }

                $call_id = $edit_info_match[2][0];
                $user_userid = $edit_info_match[2][1];
                $user_name = $edit_info_match[2][2];
                $user_sex = $match_radiop[1][0];
                $user_tel = $edit_info_match[2][5];
                $user_mobile = $edit_info_match[2][6];
                $user_mail = $edit_info_match[2][7];
                $user_departmentid = $edit_info_match[2][9];
                $user_address = $edit_info_match[2][10];
                $call_areacode = $edit_info_match[2][12];
                $call_pwd = $edit_info_match[2][14];
                $user_desc = $match_textarea[2][0];
                $user_id = substr($string1,-4);

                //发起新的请求,并提交用户输入的分机号
                $edit_userinfor = curl_init();
                curl_setopt($edit_userinfor, CURLOPT_URL, 'https://crm.pzoom.com/UserManage/edit');//登陆后要从哪个页面获取信息
                curl_setopt($edit_userinfor, CURLOPT_POST, 1);
                $submit_userinfo = "call[id2]=$call_id&user[userid]=$user_userid&user[username]=$user_name&user[sex]=$user_sex&user[usertel]=$user_tel&user[usermobile]=$user_mobile&user[useremail]=$user_mail&user[departmentid]=$user_departmentid&user[address]=$user_address&call[cno]=$tel_number&call[areaCode]=021&isManager=1&call[pwd]=Aa123456&user[userDesc]=$user_desc&user[num]=1&user[id]=$user_id";
                curl_setopt($edit_userinfor, CURLOPT_POSTFIELDS, $submit_userinfo);//传递数据
                curl_setopt($edit_userinfor, CURLOPT_COOKIEFILE, PZOOM_COOKIE_FILE);// 获取COOKIE文件
                curl_setopt($edit_userinfor, CURLOPT_RETURNTRANSFER, 1);//设定返回 的数据是否自动显示
                curl_setopt($edit_userinfor, CURLOPT_HEADER, false);//设定是否显示头信 息
                $submint_userinfo = curl_exec($edit_userinfor);//返回结果
                curl_close($edit_userinfor); //关闭

            }else{
                die("未找到该用户!");
            }
        }else{
            die("未找到该用户!");
        }
      }</code>
로그인 후 복사
로그인 후 복사

难道PHP版本不同,正则表达式也不一样?

好歹给个数据让测试一下啊,光看能看出什么来

因为抓取的是公司的OA系统,所以不方便透露全部代码,而且我在匹配后是要连接本地的数据库,进行数据匹配,填入相应的值!
下面是我要匹配的页面的表单:

<code><form action="" method="post" id="myform">
                        <input type="hidden" value="1010" name="call[id2]">
                        <ul class="clearfix">
                            <li><label><span><em>*</em>用户名:</span><input type="text" name="user[userid]" id="userid" class="ipt custom_ipt" onblur="checkName()" value="yangtao"></label></li>
                            <li><label><span><em>*</em>真实姓名:</span><input type="text" name="user[username]" id="username" class="ipt custom_ipt" value="杨涛"></label></li>
                            <li><label><span>性别:</span>
                                <input type="radio" name="user[sex]" id="sex1" value="男">男 
                                <input type="radio" name="user[sex]" id="sex2" value="女">女
                            </label></li>
                            <li><label><span>固定电话:</span><input type="text" name="user[usertel]" id="usertel" class="ipt" value=""></label></li>
                            <li><label><span>移动电话:</span><input type="text" name="user[usermobile]" id="usermobile" value="15800470152" class="ipt custom_ipt"></label></li>
                            <li><label><span><em>*</em>电子邮件:</span><input type="text" name="user[useremail]" id="useremail" value="yangtao@qiso360.com" class="ipt custom_ipt"></label></li>
                            <li style="position:relative;"><label><span><em>*</em>部门:</span>
                                <input type="text" class="ipt one_btn_client" id="departmentid" value="上分销售部">
                                <input type="hidden" class="one_btn_search" name="user[departmentid]" value="89">
                                <ul class="client_list">
                                </ul>
                            </label></li> 
                        <li><label><span>详细地址:</span><input type="text" name="user[address]" value="" class="ipt custom_ipt"></label></li>
                        <li>
<label><span>坐席工号:</span><input type="text" value="8451" name="call[cno]" class="ipt custom_ipt"></label>(工号取值范围2000-9999)</li>
                        <li>
<label><span>坐席区号:</span><input type="text" value="021" name="call[areaCode]" class="ipt custom_ipt"></label>(座席所在地区区号)</li>
                        <li><label><span>是否管理人员:</span><input type="radio" name="isManager" value="1" checked>否 <input type="radio" name="isManager" value="2">是</label></li>
                        <li>
<label><span>坐席密码:</span><input type="text" value="Aa123456" name="call[pwd]" class="ipt custom_ipt"></label>(不填为默认密码123456)</li>
                        <li><label><span>描述信息:</span><textarea class="descript_cont" name="user[userDesc]" id="userDesc"></textarea></label></li>
                        <li>
<label><span>状态:</span><input type="radio" name="user[num]" value="1" checked>正常  <input type="radio" name="user[num]" value="2">冻结  <input type="radio" name="user[num]" value="3">离职</label>
                        </li>
                        </ul>
                        <div class="add_custom_add">
                            <input type="hidden" name="user[id]" id="uid" value="3408">
                            <input type="button" value="保存" class="btn_add" id="bbs">
                            <a href="/UserManage/userinfo" class="btn_add">返回</a>
                        </div>
                    </form></code>
로그인 후 복사
원천:php.cn
본 웹사이트의 성명
본 글의 내용은 네티즌들의 자발적인 기여로 작성되었으며, 저작권은 원저작자에게 있습니다. 본 사이트는 이에 상응하는 법적 책임을 지지 않습니다. 표절이나 침해가 의심되는 콘텐츠를 발견한 경우 admin@php.cn으로 문의하세요.
최신 이슈
인기 튜토리얼
더>
최신 다운로드
더>
웹 효과
웹사이트 소스 코드
웹사이트 자료
프론트엔드 템플릿