复制代码 代码如下:
/** * HOST: www.icbase.com */ //set_time_limit(0); // base function function curl_get($url, $data = array(), $header = array(), $timeout = 15, $port = 80, $reffer = '', $proxy = '') { $ch = curl_init(); if (!empty($data)) { $data = is_array($data)?http_build_query($data): $data; $url .= (strpos($url,'?')? '&': "?") . $data; } curl_setopt($ch, CURLOPT_URL, $url); curl_setopt($ch, CURLOPT_RETURNTRANSFER, true); curl_setopt($ch, CURLOPT_CONNECTTIMEOUT, $timeout); curl_setopt($ch, CURLOPT_POST, 0); curl_setopt($ch, CURLOPT_PORT, $port); curl_setopt($ch, CURLOPT_HTTPHEADER, $header); curl_setopt($ch, CURLOPT_FOLLOWLOCATION, 1); //是否抓取跳转后的页面 $reffer && curl_setopt($ch, CURLOPT_REFERER, $reffer); if($proxy) { curl_setopt($ch, CURLOPT_PROXY, $proxy); curl_setopt($ch, CURLOPT_PROXYPORT, 1723); curl_setopt($ch, CURLOPT_PROXYUSERPWD,"andhm001:andhm123"); } $result = array(); $result['result'] = curl_exec($ch); if (0 != curl_errno($ch)) { $result['error'] = "Error:n" . curl_error($ch); } curl_close($ch); return $result; }
复制代码 代码如下:
function curl_post($url, $data = array(), $header = array(), $timeout = 5, $port = 80)
{
$ch = curl_init();
curl_setopt($ch, CURLOPT_URL, $url);
curl_setopt($ch, CURLOPT_RETURNTRANSFER, true);
curl_setopt($ch, CURLOPT_CONNECTTIMEOUT, $timeout);
//curl_setopt($ch, CURLOPT_PORT, $port);
!empty ($header) && curl_setopt($ch, CURLOPT_HTTPHEADER, $header);
curl_setopt($ch, CURLOPT_POST, 1);
curl_setopt($ch, CURLOPT_POSTFIELDS, $data);
$result = array();
$result['result'] = curl_exec($ch);
if (0 != curl_errno($ch)) {
$result['error'] = "Error:n" . curl_error($ch);
}
curl_close($ch);
return $result;
}
/**
* Get the html source code of the list page
* @param string $keywords Search keywords
* @param int $page page number
* @return boolean|array
*/
function getListHtml($keywords, $page=1)
{
if ($page < 0)
{
return false;
}
$page = $page == 0 ? 1 : intval($page);
if ($page == 1)
{
$result = curl_get('http://www.icbase.com/ProResult.aspx', array('ProKey' => $keywords));
if ( isset($result['error']) )
{
return false;
//exit($result['error']);
}
$result = $result['result'];
// asp.net post提交数据
if(! defined('__VIEWSTATE') && preg_match('/
{ define('__VIEWSTATE', $matches[1]); } else { return false; } if(! defined('__PREVIOUSPAGE') && preg_match('/ { define('__PREVIOUSPAGE', $matches[1]); } else { return false; } if(! defined('__EVENTVALIDATION') && preg_match('/ { define('__EVENTVALIDATION', $matches[1]); } else { return false; } return $result; } $data = array( '__EVENTTARGET' => 'pager', '__EVENTARGUMENT' => $page, '__VIEWSTATE' => __VIEWSTATE, '__PREVIOUSPAGE' => __PREVIOUSPAGE, '__EVENTVALIDATION' => __EVENTVALIDATION, ); $result = curl_post('http://www.icbase.com/ProResult.aspx?ProKey=' . $keywords, $data); if ( isset($result['error']) ) { return false; //exit($result['error']); } $result = $result['result']; return $result; } /** * Get the url of the list page a link * @param string $html html source code * @return array */ function getListHref($html) { $pattern = '/[sn]*]/>/isU'; if (preg_match_all($pattern, $html, $matches)) { return $matches[1]; } else { // 没有匹配项 return array(); } } /** * Get the next page number * @param string $html html source code * @return number */ function getListNextPage($html) { $pattern = '/]>.+>/isU'; if (preg_match($pattern, $html, $matches)) { return intval($matches[1]); } else { return -1; } } /** * Get all hrefs in the list * @param string $keywords Search keywords * @return boolean|array */ function getListHrefAll($keywords) { if (empty($keywords)) { return false; } $html = getListHtml($keywords); $hrefList = getListHref($html); if (empty($hrefList)) { // 没有结果 return array(); } $nextPage = getListNextPage($html); while ($nextPage > 0) { $html = getListHtml($keywords, $nextPage); $tmpHrefList = getListHref($html); $hrefList = array_merge($hrefList, $tmpHrefList); $nextPage = getListNextPage($html); } return $hrefList; } /** * Get details page information * @param string $url The url address or the captured html source code is distinguished according to @see $is_url * @param int $is_url 1 uses url address 0 Directly process html source code * @return boolean|multitype:|multitype:string */ function getDetail($url, $is_url = 1) { if ( empty($url) ) { return false; } $host = 'www.icbase.com'; $html = $url; if ($is_url) { $url = '/' .ltrim ($url, '/'); $result = curl_get($host . $url); if ( isset($result['error']) ) { exit($ result['error']); } $html = $result['result']; } $result = array( 'sup_part' => '', // Supplier model 'sup_id' => '', // Supplier ID 'mfg_part' => '', // Manufacturer model 'mfg_name' => ; '', //Manufacturer name 'cat_name' => '', // Category name 'para' => '', // Attribute 'desc' => ' ', // Description 'pdf_url' => '', // PDF address 'sup_stock' => '', // Stock 'min_purch' => '', // Minimum order quantity 'price' => '', // Price 'img_url' => '', // Image address 'createtime' => '', // Creation time 'datacode' => '', // Batch number 'package' => '', // Package 'page_url' => '', // Page address ) ; // mfg_part $pattern = '/Product Model (.[^<]+) if (preg_match($pattern, $html, $matches)) { $result['mfg_part'] = trim($matches[1]); } else { // This item does not exist, and the instructions are nowhere to be found return array(); } // mfg_name $pattern = '/ Manufacturer [ sn]*(.+) /isU'; if (preg_match($pattern, $html, $matches)) { $result['mfg_name' ] = trim($matches[1]); } // para $pattern = '/(.+)/isU'; if (preg_match($pattern, $html, $matches)) { if (preg_match_all('/(.+) /isU', $matches[1], $matches)) { $count = count($matches[1 ]); $count = intval($count / 2 ); foreach ($matches[1] as $k=>$v) { if ($k >= $count) { break; } if (trim($v) == 'Description') { // desc $result['desc '] = trim($matches[1][$count + $k]); continue; } $v = trim($v); $result['para'] [$v] = trim($matches[1][$count + $k]); } } } // pdf_url $pattern = '/ Details if (preg_match($pattern, $html, $matches )) { $result['pdf_url'] = trim($matches[1]); } // sup_stock $pattern = '/[sn]* (d+) /isU'; if (preg_match($pattern, $html, $matches)) { $result['sup_stock'] = trim($matches[1]); } // price $pattern = '/]+>(d+)+]+>.[^d]*([d.]+) /isU'; if (preg_match_all($pattern, $html, $matches)) { foreach ($matches[1] as $k=>$v) { $result['price'][$v] = '¥' . $matches[2][$k]; } } //img_url $pattern = '/Pictures if (preg_match($pattern, $html, $ matches)) { $result['img_url'] = trim($matches[1]); } // page_url if ($is_url) { $result['page_url'] = $host . $url; } return $result; } /** * Final call function * @param string $keywords Search keywords * @return array * / function getData($keywords) { $hrefList = getListHrefAll($keywords); $result = array(); foreach ($hrefList as $k= >$v) { $result[] = getDetail($v); } return $result; } // Test Script $keywords = trim($_GET['keywords']); $result = getData($keywords); print_r($result);
http://www.bkjia.com/PHPjc/728095.html www.bkjia.com true http://www.bkjia.com/PHPjc/728095.html TechArticle 复制代码 代码如下: ?php /*** HOST: www.icbase.com*/ //set_time_limit(0); // base function function curl_get($url, $data = array(), $header = array(), $timeout = 15, $port...
Statement of this Website
The content of this article is voluntarily contributed by netizens, and the copyright belongs to the original author. This site does not assume corresponding legal responsibility. If you find any content suspected of plagiarism or infringement, please contact admin@php.cn
Latest Articles by Author
2024-10-22 09:46:29
2024-10-13 13:53:41
2024-10-12 12:15:51
2024-10-11 22:47:31
2024-10-11 19:36:51
2024-10-11 15:50:41
2024-10-11 15:07:41
2024-10-11 14:21:21
2024-10-11 12:59:11
2024-10-11 12:17:31