Home > php教程 > php手册 > body text

分享蟧.gif" alt="脚本之家" />首页网页制作脚本专栏网络编程数据库脚本下载CMS教程电子书籍平面设计媒

WBOY
Release: 2016-06-13 09:44:04
Original
1312 people have browsed it

复制代码 代码如下:


/**
 * HOST: www.icbase.com
 */
//set_time_limit(0);
// base function
function curl_get($url, $data = array(), $header = array(), $timeout = 15, $port = 80, $reffer = '', $proxy = '')
{
 $ch = curl_init();
 if (!empty($data)) {
 $data = is_array($data)?http_build_query($data): $data;
 $url .= (strpos($url,'?')? '&': "?") . $data;
 }
 curl_setopt($ch, CURLOPT_URL, $url);
 curl_setopt($ch, CURLOPT_RETURNTRANSFER, true);
 curl_setopt($ch, CURLOPT_CONNECTTIMEOUT, $timeout);
 curl_setopt($ch, CURLOPT_POST, 0);
 curl_setopt($ch, CURLOPT_PORT, $port);
 curl_setopt($ch, CURLOPT_HTTPHEADER, $header);
 curl_setopt($ch, CURLOPT_FOLLOWLOCATION, 1); //是否抓取跳转后的页面
 $reffer && curl_setopt($ch, CURLOPT_REFERER, $reffer);
 if($proxy) {
 curl_setopt($ch, CURLOPT_PROXY, $proxy);
 curl_setopt($ch, CURLOPT_PROXYPORT, 1723);
 curl_setopt($ch, CURLOPT_PROXYUSERPWD,"andhm001:andhm123");
 }

$result = array();
 $result['result'] = curl_exec($ch);
 if (0 != curl_errno($ch)) {
 $result['error'] = "Error:\n" . curl_error($ch);

}
 curl_close($ch);
 return $result;
}

复制代码 代码如下:


function curl_post($url, $data = array(), $header = array(), $timeout = 5, $port = 80)
{
 $ch = curl_init();
 curl_setopt($ch, CURLOPT_URL, $url);
 curl_setopt($ch, CURLOPT_RETURNTRANSFER, true);
 curl_setopt($ch, CURLOPT_CONNECTTIMEOUT, $timeout);
 //curl_setopt($ch, CURLOPT_PORT, $port);
 !empty ($header) && curl_setopt($ch, CURLOPT_HTTPHEADER, $header);
 curl_setopt($ch, CURLOPT_POST, 1);
 curl_setopt($ch, CURLOPT_POSTFIELDS, $data);

$result = array();
 $result['result'] = curl_exec($ch);
 if (0 != curl_errno($ch)) {
 $result['error'] = "Error:\n" . curl_error($ch);

}
 curl_close($ch);

return $result;
}

/**
 * 获取列表页的html源码
 * @param string $keywords 搜索关键字
 * @param int $page 页数
 * @return boolean|array
 */
function getListHtml($keywords, $page=1)
{
 if ($page  {
 return false;
 }
 $page = $page == 0 ? 1 : intval($page);
 if ($page == 1)
 {
 $result = curl_get('http://www.icbase.com/ProResult.aspx', array('ProKey' => $keywords));
 if ( isset($result['error']) )
 {
 return false;
 //exit($result['error']);
 }
 $result = $result['result'];

 // asp.net post提交数据
 if(! defined('__VIEWSTATE') && preg_match('/ {
 define('__VIEWSTATE', $matches[1]);
 } else {
 return false;
 }

 if(! defined('__PREVIOUSPAGE') && preg_match('/ {
 define('__PREVIOUSPAGE', $matches[1]);
 } else {
 return false;
 }

 if(! defined('__EVENTVALIDATION') && preg_match('/ {
 define('__EVENTVALIDATION', $matches[1]);
 } else {
 return false;
 }

 return $result;
 }
 $data = array(
 '__EVENTTARGET' => 'pager',
 '__EVENTARGUMENT' => $page,
 '__VIEWSTATE' => __VIEWSTATE,
 '__PREVIOUSPAGE' => __PREVIOUSPAGE,
 '__EVENTVALIDATION' => __EVENTVALIDATION,
 );
 $result = curl_post('http://www.icbase.com/ProResult.aspx?ProKey=' . $keywords, $data);
 if ( isset($result['error']) )
 {
 return false;
 //exit($result['error']);
 }
 $result = $result['result'];
 return $result;
}

/**
 * 获取列表页 a链接的url
 * @param string $html html源码
 * @return array
 */
function getListHref($html)
{
 $pattern = '/[\s\n]*分享蟧.gif" alt="脚本之家" />首页网页制作脚本专栏网络编程数据库脚本下载CMS教程电子书籍平面设计媒]\/>/isU';
 if (preg_match_all($pattern, $html, $matches))
 {
 return $matches[1];
 } else {
 // 没有匹配项
 return array();
 }
}

/**
 * 获取下一页数字
 * @param string $html html源码
 * @return number
 */
function getListNextPage($html)
{
 $pattern = '/

]>.+>/isU';
 if (preg_match($pattern, $html, $matches))
 {
 return intval($matches[1]);
 } else {
 return -1;
 }
}

/**
 * 获取列表也所有的href
 * @param string $keywords 搜索关键字
 * @return boolean|array
 */
function getListHrefAll($keywords)
{
 if (empty($keywords))
 {
 return false;
 }

 $html = getListHtml($keywords);
 $hrefList = getListHref($html);
 if (empty($hrefList))
 {
 // 没有结果
 return array();
 }
 $nextPage = getListNextPage($html);
 while ($nextPage > 0)
 {
 $html = getListHtml($keywords, $nextPage);
 $tmpHrefList = getListHref($html);
 $hrefList = array_merge($hrefList, $tmpHrefList);
 $nextPage = getListNextPage($html);
 }
 return $hrefList;
}

/**
 * 获取详情页信息
 * @param string $url url地址或者是抓取到的html源代码 根据@see $is_url 区分
 * @param int $is_url 1使用的是url地址 0直接处理html源代码
 * @return boolean|multitype:|multitype:string
 */
function getDetail($url, $is_url = 1)
{
 if ( empty($url) )
 {
 return false;
 }
 $host = 'www.icbase.com';
 $html = $url;
 if ($is_url) {
 $url = '/' . ltrim($url, '/');
 $result = curl_get($host . $url);
 if ( isset($result['error']) )
 {
 exit($result['error']);
 }
 $html = $result['result'];
 }

 $result = array(
 'sup_part' => '', // 供应商型号
 'sup_id' => '', // 供应商ID
 'mfg_part' => '', // 制造商型号
 'mfg_name' => '', // 制造商名称
 'cat_name' => '', // 分类名称
 'para' => '', // 属性
 'desc' => '', // 描述
 'pdf_url' => '', // PDF地址
 'sup_stock' => '', // 库存
 'min_purch' => '', // 最小订购量
 'price' => '', // 价格
 'img_url' => '', // 图片地址
 'createtime' => '', // 创建时间
 'datacode' => '', // 批号
 'package' => '', // 封装
 'page_url' => '', // 页面地址
 );

// mfg_part
 $pattern = '/
产品型号 (.[^ if (preg_match($pattern, $html, $matches))
 {
 $result['mfg_part'] = trim($matches[1]);
 } else {
 // 此项木有,说明也没处处了
 return array();
 }

 // mfg_name
 $pattern = '/ 厂商[\s\n]* (.+)/isU';
 if (preg_match($pattern, $html, $matches))
 {
 $result['mfg_name'] = trim($matches[1]);
 }

 // para
 $pattern = '/ (.+)/isU';
 if (preg_match($pattern, $html, $matches))
 {
 if (preg_match_all('/(.+)/isU', $matches[1], $matches))
 {
 $count = count($matches[1]);
 $count = intval($count / 2 );
 foreach ($matches[1] as $k=>$v)
 {
 if ($k >= $count)
 {
 break;
 }
 if (trim($v) == '描述')
 {
 // desc
 $result['desc'] = trim($matches[1][$count + $k]);
 continue;
 }
 $v = trim($v);
 $result['para'][$v] = trim($matches[1][$count + $k]);
 }
 }
 }

 // pdf_url
 $pattern = '/ 详细资料  if (preg_match($pattern, $html, $matches))
 {
 $result['pdf_url'] = trim($matches[1]);
 }

 // sup_stock
 $pattern = '/
库存数量[\s\n]* (\d+)/isU';
 if (preg_match($pattern, $html, $matches))
 {
 $result['sup_stock'] = trim($matches[1]);
 }

 // price
 $pattern = '/ ]+>(\d+)\+]+>.[^\d]*([\d.]+)/isU';
 if (preg_match_all($pattern, $html, $matches))
 {
 foreach ($matches[1] as $k=>$v)
 {
 $result['price'][$v] = '¥' . $matches[2][$k];
 }
 }

 //img_url
 $pattern = '/图片 分享蟧.gif" alt="脚本之家" />首页网页制作脚本专栏网络编程数据库脚本下载CMS教程电子书籍平面设计媒 if (preg_match($pattern, $html, $matches))
 {
 $result['img_url'] = trim($matches[1]);
 }

 // page_url
 if ($is_url)
 {
 $result['page_url'] = $host . $url;
 }

return $result;
}

/**
 * 最终调用函数
 * @param string $keywords 搜索关键字
 * @return array
 */
function getData($keywords)
{
 $hrefList = getListHrefAll($keywords);
 $result = array();

 foreach ($hrefList as $k=>$v)
 {
 $result[] = getDetail($v);
 }

 return $result;
}

// Test Script
$keywords = trim($_GET['keywords']);
$result = getData($keywords);

print_r($result);

Related labels:
source:php.cn
Statement of this Website
The content of this article is voluntarily contributed by netizens, and the copyright belongs to the original author. This site does not assume corresponding legal responsibility. If you find any content suspected of plagiarism or infringement, please contact admin@php.cn
Popular Recommendations
Popular Tutorials
More>
Latest Downloads
More>
Web Effects
Website Source Code
Website Materials
Front End Template