以正方教务系统替例,用php模拟登陆抓取课表、空教室
以正方教务系统为例,用php模拟登陆抓取课表、空教室
课程格子和超级课程表这两个应用,想必大学生都很熟悉,使用自己的学号和教务系统的密码,就可以将自己的课表导入,随时随地都可以在手机上查看。
其实稍微了解一点php的话,我们也可以做一个类似这样的web 应用。
1,解决掉验证码
其实这是正方的一个小bug,当我们进入登陆界面时,浏览器会去请求服务器,服务器会生成一个验证码图片。如果我们不去请求这个图片,那么正方后台也不会生成相应的 验证码,于是这样我们就有了可乘之机,让我高兴会儿~这时,我们在不填写验证码的情况下,可以很流畅的进入。大家可以在自己的电脑上禁止访问验证码的地址,然后试试这 是不是真的~当然,这只对正方有效。
2,php 的curl 模拟登陆
这里直接贴一个脚本之家对 curl 的讲解吧 http://www.jb51.net/article/51299.htm
接下来就是相关代码了,相信很多人和我一样,只喜欢看例子,对于长篇大论的讲解,转头就走……不过这个习惯还是不好……废话不多说!
//模拟登陆 function curl_request($url,$post='',$cookie='', $returnCookie=0){ $curl = curl_init(); curl_setopt($curl, CURLOPT_URL, $url); curl_setopt($curl, CURLOPT_USERAGENT, 'Mozilla/5.0 (compatible; MSIE 10.0; Windows NT 6.1; Trident/6.0)'); curl_setopt($curl, CURLOPT_FOLLOWLOCATION, 1); curl_setopt($curl, CURLOPT_AUTOREFERER, 1); curl_setopt($curl, CURLOPT_REFERER, "这里一定要换成教务系统登陆的url"); //填写教务系统url if($post) { curl_setopt($curl, CURLOPT_POST, 1); curl_setopt($curl, CURLOPT_POSTFIELDS, http_build_query($post)); } if($cookie) { curl_setopt($curl, CURLOPT_COOKIE, $cookie); } curl_setopt($curl, CURLOPT_HEADER, $returnCookie); curl_setopt($curl, CURLOPT_TIMEOUT, 20); curl_setopt($curl, CURLOPT_RETURNTRANSFER, 1); $data = curl_exec($curl); if (curl_errno($curl)) { return curl_error($curl); } curl_close($curl); if($returnCookie){ list($header, $body) = explode("\r\n\r\n", $data, 2); preg_match_all("/Set\-Cookie:([^;]*);/", $header, $matches); $info['cookie'] = substr($matches[1][0], 1); $info['content'] = $body; return $info; }else{ return $data; } }
3,教务系统登陆页面的隐藏字段
举个栗子
<input type="hidden" name="__VIEWSTATE" value="dDwyODE2NTM0OTg7Oz61eIbnKVojBioGYtg2vsy2SklwiA==">
这些东西在登陆的时候也是需要带上的,顺便贴出函数,顺便暴漏了博主的学校……皇家种地大学(主要是正则表达式的运用)
<br /> //登陆页面的隐藏字段<br /> function getView(){ $url = 'http://jw.hzau.edu.cn/default2.aspx'; $result = curl_request($url); $pattern = '/<input type="hidden" name="__VIEWSTATE" value="(.*?)" \/>/is'; preg_match_all($pattern, $result, $matches); $res[0] = $matches[1][0]; return $res[0] ; }
//返回教室查询页面的隐藏值
private function getViewJs($cookie,$xh){
$url = "http://jw.hzau.edu.cn/xxjsjy.aspx?xh={$xh}";
$result = curl_request($url,'',$cookie);
$pattern = '//is';
preg_match_all($pattern, $result, $matches);
$res[0] = $matches[1][0];
return $res[0] ;
}
4,cookie 的获取
function login($xh,$pwd){ $url = 'http://jw.hzau.edu.cn/default2.aspx'; $post['__VIEWSTATE'] = $this->getView(); $post['txtUserName'] = $xh; //填写学号 $post['TextBox2'] = $pwd; //填写密码 $post['txtSecretCode'] = ''; $post['lbLanguage'] = ''; $post['hidPdrs'] = ''; $post['hidsc'] = ''; $post['RadioButtonList1'] = iconv('utf-8', 'gb2312', '学生'); $post['Button1'] = iconv('utf-8', 'gb2312', '登录'); $result = curl_request($url,$post,'', 1); return $result['cookie']; }
5,让我们来试试查课表的功能,格式有点乱额,大家凑合着看,我把课表转成了一个二维关联数组
//返回课表字符串private function classresult($xh,$pwd){ date_default_timezone_set("PRC"); //时区设置 $classList = "";//声明课表变量 $cookie = $this->login($xh,$pwd); $view = $this->getViewJs($cookie,$xh);//验证密码是否正确 //如果密码正确 if (!empty($view)) { $url = "http://jw.hzau.edu.cn/xskbcx.aspx?xh={$xh}"; $result = curl_request($url,'',$cookie); //保存的cookies preg_match_all('/<table id="Table1"[\w\W]*?>([\w\W]*?)<\/table>/',$result,$out); $table = $out[0][0]; //获取整个课表 preg_match_all('/<td [\w\W]*?>([\w\W]*?)<\/td>/',$table,$out); $td = $out[1]; $length = count($td); //获得课程列表 for ($i=0; $i < $length; $i++) { $td[$i] = str_replace("<br>", "", $td[$i]); $reg = "/{(.*)}/"; if (!preg_match_all($reg, $td[$i], $matches)) { unset($td[$i]); } } $td = array_values($td); //将课程列表数组重新索引 $tdLength = count($td); for ($i=0; $i < $tdLength; $i++) { $td[$i] = iconv('GB2312','UTF-8',$td[$i]); } //将课表转换成数组形式 function converttoTable($table){ $list = array( 'sun' => array( '1,2' => '', '3,4' => '', '5,6' => '', '7,8' => '', '9,10' => '' ), 'mon' => array( '1,2' => '', '3,4' => '', '5,6' => '', '7,8' => '', '9,10' => '' ), 'tues' => array( '1,2' => '', '3,4' => '', '5,6' => '', '7,8' => '', '9,10' => '' ), 'wed' => array( '1,2' => '', '3,4' => '', '5,6' => '', '7,8' => '', '9,10' => '' ), 'thur' => array( '1,2' => '', '3,4' => '', '5,6' => '', '7,8' => '', '9,10' => '' ), 'fri' => array( '1,2' => '', '3,4' => '', '5,6' => '', '7,8' => '', '9,10' => '' ), 'sat' => array( '1,2' => '', '3,4' => '', '5,6' => '', '7,8' => '', '9,10' => '' ) ); $week = array("sun"=>"周日","mon"=>"周一","tues"=>"周二","wed"=>"周三","thur"=>"周四","fri"=>"周五","sat"=>"周六"); $order = array('1,2','3,4','5,6','7,8','9,10'); foreach ($table as $key => $value) { $class = $value; foreach ($week as $key => $weekDay) { $pos = strpos($class,$weekDay); // echo $pos; if ($pos) { $weekArrayDay = $key; //获取list数组中的第一维key foreach ($order as $key => $orderClass) { $pos = strpos($class,$orderClass); if ($pos) { $weekArrayOrder = $orderClass; //获取该课程是第几节 break; } } break; } } $list[$weekArrayDay][$weekArrayOrder] = $class; } return $list; } //调用函数 return converttoTable($td); }else{ return 0; } }
6,再试试查询空教室的功能
//空教室查询结果 public function roomresult(){ $xh = ""; //设置学号 $pwd = ""; //学号对应的密码 $cookie = $this->login($xh,$pwd); $url = "http://jw.hzau.edu.cn/xs_main.aspx?xh={$xh}"; $result = curl_request($url,'',$cookie); //保存的cookies $url="http://jw.hzau.edu.cn/xxjsjy.aspx?xh={$xh}"; $post['Button2'] = iconv('utf-8', 'gb2312', '空教室查询'); $post['__EVENTARGUMENT']=''; $post['__EVENTTARGET']=''; $post['__VIEWSTATE'] = $this->getViewJs($cookie,$xh); $post['ddlDsz'] = iconv('utf-8', 'gb2312', '单'); $post['ddlSyXn'] = '2014-2015'; //学年 $post['ddlSyxq'] = '1'; $post['jslb'] = ''; $post['xiaoq'] = ''; $post['kssj']=$_GET['start']; //提交的开始查询时间 $post['sjd']=$_GET['class'];//提交的课程节次 $post['xn']='2014-2015';//所在学年 $post['xq']='2';//所在学期 $post['xqj']='6';//当天星期几 $post['dpDataGrid1:txtPageSize']=90;//每页显示条数 $result = curl_request($url,$post,$cookie,0); preg_match_all('/<span[^>]+>[^>]+span>/',$result,$out); $tip = iconv('gb2312', 'utf-8', $out[0][3]);//获取页面前部的提示内容 preg_match_all('/<table[\w\W]*?>([\w\W]*?)<\/table>/',$result,$out); $table = iconv('gb2312', 'utf-8', $out[0][0]); //获取查询列表 $this->load->view("classroom",array('tip'=>$tip,'table'=>$table)); }
总结起来就是这些了,每个学校的教务系统都不尽相同,这时我们可以借助火狐浏览器的 firebug 抓包,看看到底提交了哪些东西。如果不成功,要看看自己该提交的东西post 上去了没有,如果再不成功,额……可以联系我 [email protected]
就这些了,赶快去试试吧!
- 1楼——石头——
- 写得不错~赞一个

Hot AI Tools

Undresser.AI Undress
AI-powered app for creating realistic nude photos

AI Clothes Remover
Online AI tool for removing clothes from photos.

Undress AI Tool
Undress images for free

Clothoff.io
AI clothes remover

AI Hentai Generator
Generate AI Hentai for free.

Hot Article

Hot Tools

Notepad++7.3.1
Easy-to-use and free code editor

SublimeText3 Chinese version
Chinese version, very easy to use

Zend Studio 13.0.1
Powerful PHP integrated development environment

Dreamweaver CS6
Visual web development tools

SublimeText3 Mac version
God-level code editing software (SublimeText3)

Hot Topics

Many users will choose the Huawei brand when choosing smart watches. Among them, Huawei GT3pro and GT4 are very popular choices. Many users are curious about the difference between Huawei GT3pro and GT4. Let’s introduce the two to you. . What are the differences between Huawei GT3pro and GT4? 1. Appearance GT4: 46mm and 41mm, the material is glass mirror + stainless steel body + high-resolution fiber back shell. GT3pro: 46.6mm and 42.9mm, the material is sapphire glass + titanium body/ceramic body + ceramic back shell 2. Healthy GT4: Using the latest Huawei Truseen5.5+ algorithm, the results will be more accurate. GT3pro: Added ECG electrocardiogram and blood vessel and safety

To update the curl version under Linux, you can follow the steps below: Check the current curl version: First, you need to determine the curl version installed in the current system. Open a terminal and execute the following command: curl --version This command will display the current curl version information. Confirm available curl version: Before updating curl, you need to confirm the latest version available. You can visit curl's official website (curl.haxx.se) or related software sources to find the latest version of curl. Download the curl source code: Using curl or a browser, download the source code file for the curl version of your choice (usually .tar.gz or .tar.bz2

Cookies are usually stored in the cookie folder of the browser. Cookie files in the browser are usually stored in binary or SQLite format. If you open the cookie file directly, you may see some garbled or unreadable content, so it is best to use Use the cookie management interface provided by your browser to view and manage cookies.

Cookies on your computer are stored in specific locations on your browser, depending on the browser and operating system used: 1. Google Chrome, stored in C:\Users\YourUsername\AppData\Local\Google\Chrome\User Data\Default \Cookies etc.

Cookies on the mobile phone are stored in the browser application of the mobile device: 1. On iOS devices, Cookies are stored in Settings -> Safari -> Advanced -> Website Data of the Safari browser; 2. On Android devices, Cookies Stored in Settings -> Site settings -> Cookies of Chrome browser, etc.

How to handle 301 redirection of web pages in PHPCurl? When using PHPCurl to send network requests, you will often encounter a 301 status code returned by the web page, indicating that the page has been permanently redirected. In order to handle this situation correctly, we need to add some specific options and processing logic to the Curl request. The following will introduce in detail how to handle 301 redirection of web pages in PHPCurl, and provide specific code examples. 301 redirect processing principle 301 redirect means that the server returns a 30

With the popularity of the Internet, we use browsers to surf the Internet have become a way of life. In the daily use of browsers, we often encounter situations where we need to enter account passwords, such as online shopping, social networking, emails, etc. This information needs to be recorded by the browser so that it does not need to be entered again the next time you visit. This is when cookies come in handy. What are cookies? Cookie refers to a small data file sent by the server to the user's browser and stored locally. It contains user behavior of some websites.

The dangers of cookie leakage include theft of personal identity information, tracking of personal online behavior, and account theft. Detailed introduction: 1. Personal identity information is stolen, such as name, email address, phone number, etc. This information may be used by criminals to carry out identity theft, fraud and other illegal activities; 2. Personal online behavior is tracked and analyzed through cookies With the data in the account, criminals can learn about the user's browsing history, shopping preferences, hobbies, etc.; 3. The account is stolen, bypassing login verification, directly accessing the user's account, etc.
