Home > Backend Development > PHP Tutorial > Share the page keyword capture components.arrow.com site code_PHP tutorial

Share the page keyword capture components.arrow.com site code_PHP tutorial

WBOY
Release: 2016-07-13 10:39:49
Original
862 people have browsed it

复制代码 代码如下:

 /**
 * HOST: components.arrow.com
 */
 //set_time_limit(0);
 // base function
 function curl_get($url, $data = array(), $header = array(), $timeout = 15, $port = 80, $reffer = '', $proxy = '')
 {
 $ch = curl_init();
 if (!empty($data)) {
 $data = is_array($data)?http_build_query($data): $data;
 $url .= (strpos($url,'?')? '&': "?") . $data;
 }
 curl_setopt($ch, CURLOPT_URL, $url);
 curl_setopt($ch, CURLOPT_RETURNTRANSFER, true);
 curl_setopt($ch, CURLOPT_CONNECTTIMEOUT, $timeout);
 curl_setopt($ch, CURLOPT_POST, 0);
 curl_setopt($ch, CURLOPT_PORT, $port);
 curl_setopt($ch, CURLOPT_HTTPHEADER, $header);
 curl_setopt($ch, CURLOPT_FOLLOWLOCATION, 1); //是否抓取跳转后的页面
 $reffer && curl_setopt($ch, CURLOPT_REFERER, $reffer);
 if($proxy) {
 curl_setopt($ch, CURLOPT_PROXY, $proxy);
 curl_setopt($ch, CURLOPT_PROXYPORT, 1723);
 curl_setopt($ch, CURLOPT_PROXYUSERPWD,"andhm001:andhm123");
 }

$result = array();
 $result['result'] = curl_exec($ch);
 if (0 != curl_errno($ch)) {
 $result['error'] = "Error:n" . curl_error($ch);

}
 curl_close($ch);
 return $result;
 }

复制代码 代码如下:

function curl_post($url, $data = array(), $header = array(), $timeout = 15, $port = 80)
 {
 $ch = curl_init();
 curl_setopt($ch, CURLOPT_URL, $url);
 curl_setopt($ch, CURLOPT_RETURNTRANSFER, true);
 curl_setopt($ch, CURLOPT_CONNECTTIMEOUT, $timeout);
 curl_setopt($ch, CURLOPT_PORT, $port);
 !empty ($header) && curl_setopt($ch, CURLOPT_HTTPHEADER, $header);
 curl_setopt($ch, CURLOPT_POST, 1);
 curl_setopt($ch, CURLOPT_POSTFIELDS, $data);

$result = array();
 $result['result'] = curl_exec($ch);
 if (0 != curl_errno($ch)) {
 $result['error'] = "Error:n" . curl_error($ch);

}
 curl_close($ch);

return $result;
 }

/**
* Get the html source code of the list page
* @param string $keywords Search keywords
* @param int $start Number of starting records
* @return boolean|array
*/
 function getListHtml($keywords, $start = 0)
 {
 if ($start < 0)
 {
 return false;
 }

$postData = array(
 'search_token' => $keywords,
 'start' => $start,
 'limit' => 100,
 );

$result = curl_post('http://components.arrow.com/part/search/' . $keywords, http_build_query($postData));
 if ( isset($result['error']) )
 {
 return false;
 //exit($result['error']);
 }
 $result = $result['result'];

return $result;
 }

/**
* Get the list page connection href
* @param string $html html source code
* @return array
*/
 function getListHref($html)
 {
 $pattern = '/]+)">/isU';
 if (preg_match_all($pattern, $html, $matches))
 {
 return $matches[1];
 } else {
 // 没有匹配项
 return array();
 }
 }

/**
* Get the next page number start
* @param string $html html source code
* @return number
*/
 function getListNextPage($html)
 {
 $pattern = '/buildPagination('d+','d+','(d+)',d+);/isU';
 if (preg_match($pattern, $html, $matches))
 {
 return intval($matches[1]);
 } else {
 return -1;
 }
 }

/**
* Get the list and all detailed lists
* @param string $keywords Search keywords
* @return boolean|array
*/
 function getListHrefAll($keywords)
 {
 if (empty($keywords))
 {
 return false;
 }

$html = getListHtml($keywords);
 $hrefList = getListHref($html);
 if (empty($hrefList))
 {
 // 没有结果
 return array();
 }
 $nextPage = getListNextPage($html);
 $loop =0;
 while ($nextPage > 0)
 {
 $html = getListHtml($keywords, $nextPage);
 $tmpHrefList = getListHref($html);
 $hrefList = array_merge($hrefList, $tmpHrefList);
 $nextPage = getListNextPage($html);
 $loop ++;
 }
 return $hrefList;
 }

/**
* Get details page information
* @param string $url url address
* @return array()
*/
 function getDetail($url)
 {
 if ( empty($url) )
 {
 return false;
 }
 $host = 'http://components.arrow.com';

$url = $host . $url;
 $result = curl_get($url);
 if ( isset($result['error']) )
 {
 return array();
 //exit($result['error']);
 }
 $html = $result['result'];

$result = array(
 'sup_part' => '', // 供应商型
 'sup_id' => '', // 供应商ID
 'mfg_part' => '', // 制造商型号
 'mfg_name' => '', // 制造商名称
 'cat_name' => '', // 分类名称
 'para' => '', // 属性
 'desc' => '', // 描述
 'pdf_url' => '', // PDF地址
 'sup_stock' => '', // 库存
 'min_purch' => '', // 最小订购量
 'price' => '', // 价格
 'img_url' => '', // 图片地址
 'createtime' => '', // 创建时间
 'datacode' => '', // 批号
 'package' => '', // 封装
 'page_url' => '', // 页面地址
 );

// mfg_part
 $pattern = '/
  • [sn]*Part No:s*(.+)
  • /isU';
     if (preg_match($pattern, $html, $matches))
     {
     $result['mfg_part'] = trim($matches[1]);
     }else {file_put_contents('page.txt', $html);die('xxx');
     return array();
     }

    // mfg_name
     $pattern = '/
  • [sn]*Manufacturer: (.+)
  • /isU';
     if (preg_match($pattern, $html, $matches))
     {
     $result['mfg_name'] = trim($matches[1]);
     }

    // cat_name
     $pattern = '/displayCategory('(.[^']+)');/isU';
     if (preg_match($pattern, $html, $matches))
     {
     $result['cat_name'] = trim($matches[1]);
     $result['cat_name'] = str_replace('|', '>', $result['cat_name']);
     }

    // para
     $tablepattern = '/]*>(.+)/isU';
     if (preg_match($tablepattern, $html, $matches))
     {
     $pattern = '/[sn]*(.+)(.+)[sn]*/isU';
     if (preg_match_all($pattern, $matches[1], $matches))
     {
     foreach($matches[1] as $k=>$v)
     {
     $v = trim($v);
     if ('Package Type' == $v)
     {
     $result['package'] = trim($matches[2][$k]);
     continue;
     }
     $result['para'][$v] = trim($matches[2][$k]);
     }
     }
     }

    // desc
     $pattern = '/.+

    (.+)

    [sn]*
    /isU';
     if (preg_match($pattern, $html, $matches))
     {
     $result['desc'] = trim($matches[1]);
     }

    // pdf_url
     $pattern = '/[sn]*Datasheet: if (preg_match($pattern, $html, $matches))
     {
     $result['pdf_url'] = $host . trim($matches[1]);
     }

    // sup_stock
     $pattern = '/([d,]+)/isU';
     if (preg_match($pattern, $html, $matches))
     {
     $result['sup_stock'] = trim($matches[1]);
     $result['sup_stock'] = str_replace(',', '', $result['sup_stock']);
     }

    // min_purch
     $pattern = '/[sn]*Multiple:s*(.+)/isU';
     if (preg_match($pattern, $html, $matches))
     {
     $result['min_purch'] = trim($matches[1]);
     }

    // price
     $pattern = '/(.[^<]+)
    /isU';
     if (preg_match($pattern, $html, $matches))
     {
     $result['price'][1] = trim($matches[1]);
     }
     $pattern = '/[sn]*]+title="(.[^"]+)">/isU';
     if (preg_match($pattern, $html, $matches))
     {
     $priceurl = str_replace('&', '&', $matches[1]);
     $json = curl_get($priceurl);
     $json = $json['result'];
     if (! empty($json))
     {
     $jsonresult = json_decode($json, true);
     foreach ($jsonresult['parts'][0]['webprice']['resale'] as $k=>$v)
     {
     $result['price'][$v['minqty']] = $v['price'];
     }
     }
     }

    // img_url
     $pattern = '/[sn]* if (preg_match($pattern, $html, $matches))
     {
     $result['img_url'] = trim($matches[1]);
     }

    // page_url
     $result['page_url'] = $url;

    return $result;
     }

    /**
    * Final call function
    * @param string $keywords Search keywords
    * @return array
    */
     function getData($keywords)
     {
     $hrefList = getListHrefAll($keywords);
     $result = array();

    foreach ($hrefList as $k=>$v)
     {
     $result[] = getDetail($v);
     }

    return $result;
     }

    // Test Script
     $keywords = trim($_GET['keywords']);
     $result = getData($keywords);

    print_r($result);

    www.bkjia.comtruehttp://www.bkjia.com/PHPjc/728094.htmlTechArticle复制代码 代码如下: ?php /*** HOST: components.arrow.com*/ //set_time_limit(0); // base function function curl_get($url, $data = array(), $header = array(), $timeout = 15,...
    Related labels:
    source:php.cn
    Previous article:Examples of bitwise AND and bitwise OR operations in PHP_PHP Tutorial Next article:How to get the milliseconds of the current time in php_PHP tutorial
    Statement of this Website
    The content of this article is voluntarily contributed by netizens, and the copyright belongs to the original author. This site does not assume corresponding legal responsibility. If you find any content suspected of plagiarism or infringement, please contact admin@php.cn
    Latest Articles by Author
    Latest Issues
    Related Topics
    More>
    Popular Recommendations
    Popular Tutorials
    More>
    Latest Downloads
    More>
    Web Effects
    Website Source Code
    Website Materials
    Front End Template