用正则表达式,抓取京东的商品列表
保存到服务器
抓取文件
<!--抓取文件--> <?php require 'function.php'; $url = 'https://list.jd.com/list.html?cat=737,794,878'; $data = curl_data($url); //echo $data; $pattem = '/<li class="gl-item">(.*?)<i class="promo-words">/si'; //.*匹配前边原子以后的所有的内容 i 不分大小写 //中间数值必须为字符串,不能是数组 $preg = preg_match_all($pattem,$data,$matches); //print_r($preg); //print_r($matches); $contents = preg_replace('/data-lazy-img/','src',$matches[0]); //print_r($con); //用循环匹配每个数组里面的汉字说明 $pattem1 = '/<img.*src="(.+)"/i';//图片正则 $pattem2 = '/(<em>)(.*?)<\/em>/si'; //文字正则 foreach ($contents as $content){ $preg_img = preg_match_all($pattem1,$content,$contents_img); $preg_text = preg_match_all($pattem2,$content,$contents_text); $fridge_img = trim($contents_img[1][0]); $fridge_text = trim($contents_text[2][2]); $data1=[ 'fridge_img'=>$fridge_img, 'fridge_text'=>$fridge_text ]; // print_r($data1); insert('fridge_list',$data1); } //find() 查询一条 select()查询所有 //$data2 = find('fridge_list','fridge_id,fridge_img,fridge_text'); $data2 = select('fridge_list','fridge_id,fridge_img,fridge_text'); print_r($data2);
点击 "运行实例" 按钮查看在线实例
显示文件
<!--显示文件--> <!doctype html> <html lang="en"> <head> <meta charset="UTF-8"> <meta name="viewport" content="width=device-width, user-scalable=no, initial-scale=1.0, maximum-scale=1.0, minimum-scale=1.0"> <meta http-equiv="X-UA-Compatible" content="ie=edge"> <title>冰箱</title> <style> *{ margin: 0; padding: 0; } h2{ text-align: center; margin:20px 0; color:green; } ul{ width: 1000px; margin:0 auto; text-align: center; list-style: none; overflow: hidden; } li{ float: left; width: 300px; height: 280px; display: block; padding: 15px; border: 1px solid lightgray; } li a{ float: left; font-size: 15px; text-decoration: none; } </style> </head> <body> <h2>冰箱列表</h2> <ul> <?php require 'function.php'; $contents = select('fridge_list','fridge_img,fridge_text'); foreach ($contents as $content){ echo "<li><img src='{$content['fridge_img']}'>"."<a href='#'>{$content['fridge_text']}</a></li>"; } ?> </ul> </body> </html>
点击 "运行实例" 按钮查看在线实例