function
crawlUrl(
$url
){
import(
'PhpQuery.Curl'
);
$curl
=
new
\Curl();
$result
=
$curl
->read(
$url
);
$content
= mb_convert_encoding(
$result
[
'content'
],
'UTF-8'
,
'UTF-8,GBK,GB2312,BIG5'
);
$myres
=
array
();
if
(
strrpos
(
$url
,
'taobao.com'
)!=false) {
if
(
strpos
(
$content
,
'此宝贝已下架'
)!==false){
return
false;
}
preg_match(
"|itemId : '(.*)'|isU"
,
$content
,
$match
);
$item_id
=
$match
[1];
preg_match(
"|sellerId : '(.*)'|isU"
,
$content
,
$match
);
$sellet_id
=
$match
[1];
preg_match(
"|<title>(.*)</title>|isU"
,
$content
,
$match
);
$title
=
$match
[1];
$ch
= curl_init();
curl_setopt (
$ch
, CURLOPT_URL,
'https://detailskip.taobao.com/service/getData/1/p1/item/detail/sib.htm?itemId='
.
$item_id
.
'&sellerId='
.
$sellet_id
.
'&modules=dynStock,qrcode,viewer,price,duty,xmpPromotion,delivery,upp,activity,fqg,zjys,amountRestriction,couponActivity,soldQuantity,originalPrice,tradeContract&callback=onSibRequestSuccess'
);
$opt
[CURLOPT_HEADER]=false;
$opt
[CURLOPT_CONNECTTIMEOUT]=15;
$opt
[CURLOPT_TIMEOUT]=300;
$opt
[CURLOPT_AUTOREFERER]=true;
$opt
[CURLOPT_USERAGENT]=
'Mozilla/5.0 (Windows NT 6.1) AppleWebKit/536.11 (KHTML, like Gecko) Chrome/20.0.1132.47 Safari/536.11'
;
curl_setopt_array(
$ch
,
$opt
);
curl_setopt (
$ch
, CURLOPT_RETURNTRANSFER, 1);
curl_setopt (
$ch
,CURLOPT_REFERER,
$url
);
curl_setopt(
$ch
, CURLOPT_SSL_VERIFYPEER, false);
$out_put
=curl_exec (
$ch
);
curl_close (
$ch
);
$res
=
str_replace
(
'onSibRequestSuccess('
,
""
,
$out_put
);
$res
=rtrim(
$res
,
');1'
);
$result
=json_decode(
$res
,true);
preg_match(
'|<ul id="J_UlThumb" class="tb-thumb tb-clearfix">(.*)</ul>|isU'
,
$content
,
$match
);
preg_match_all(
'/<img src="(.*?)" \//'
,
$match
[1],
$images
);
$myres
[
'title'
]=
str_replace
(
'-淘宝网'
,
''
,
$title
);
$myres
[
'price'
]=current(
$result
[
'data'
][
'originalPrice'
]);
$myres
[
'act_price'
]=current(
$result
[
'data'
][
'promotion'
][
'promoData'
]);
$myres
[
'stock'
]=
$result
[
'data'
][
'dynStock'
][
'stock'
];
$myres
[
'banners'
]=
$images
[1];
}
else
{
if
(
strpos
(
$content
,
'此宝贝已下架'
)!==false){
return
false;
}
$start
=
strpos
(
$url
,
'&id='
);
$item_id
=
substr
(
$url
,
$start
+4,12);
if
(!
is_numeric
(
$item_id
)){
$start
=
strpos
(
$url
,
'?id='
);
$end
=
strpos
(
$url
,
'&spm'
);
$item_id
=
substr
(
$url
,
$start
+4,
$end
-
$start
-4);
}
preg_match(
"|<title>(.*)</title>|isU"
,
$content
,
$match
);
$title
=
$match
[1];
$myurl
=
'https://mdskip.taobao.com/core/initItemDetail.htm?cachedTimestamp=1500562177777&queryMemberRight=true&cartEnable=true&offlineShop=false&addressLevel=2&itemId='
.
$item_id
.
'&tryBeforeBuy=false&isAreaSell=false&tmallBuySupport=true&isPurchaseMallPage=false&household=false&isForbidBuyItem=false&service3C=false&isRegionLevel=false&showShopProm=false&isSecKill=false&sellerPreview=false&isUseInventoryCenter=false&isApparel=true&callback=setMdskip×tamp=1500562172109&isg=AiUlDZFWmP/sMgVurQSILU3Ytet/Zdis&isg2=Ajk51JIhRFqKzxmiNPP6dkYxSKXT7iySkzSTeVtu9WDf4ll0o5Y9yKdyEtHu'
;
$ch
= curl_init();
curl_setopt (
$ch
, CURLOPT_URL,
$myurl
);
$opt
[CURLOPT_HEADER]=false;
$opt
[CURLOPT_CONNECTTIMEOUT]=15;
$opt
[CURLOPT_TIMEOUT]=300;
$opt
[CURLOPT_AUTOREFERER]=true;
$opt
[CURLOPT_USERAGENT]=
'Mozilla/5.0 (Windows NT 6.1) AppleWebKit/536.11 (KHTML, like Gecko) Chrome/20.0.1132.47 Safari/536.11'
;
curl_setopt_array(
$ch
,
$opt
);
curl_setopt (
$ch
, CURLOPT_RETURNTRANSFER, 1);
curl_setopt (
$ch
,CURLOPT_REFERER,
$url
);
curl_setopt(
$ch
, CURLOPT_SSL_VERIFYPEER, false);
$out_put
=curl_exec (
$ch
);
curl_close (
$ch
);
$res
= mb_convert_encoding(
$out_put
,
'UTF-8'
,
'UTF-8,GBK,GB2312,BIG5'
);
$res
=
str_replace
(
'setMdskip'
,
""
,
$res
);
$res
=
str_replace
(
'('
,
""
,
$res
);
$res
=
str_replace
(
')'
,
""
,
$res
);
$result
=json_decode(
$res
,true);
$nowk
=
""
;
$nowstore
=
""
;
foreach
(
$result
[
'defaultModel'
][
'inventoryDO'
][
'skuQuantity'
]
as
$k
=>
$val
){
$nowk
=
$k
;
$nowstore
=
$val
;
break
;
}
$myres
[
'title'
]=
str_replace
(
'-tmall.com天猫'
,
''
,
$title
);
$myres
[
'price'
]=
$result
[
'defaultModel'
][
'itemPriceResultDO'
][
'priceInfo'
][
$nowk
][
'price'
];
$myres
[
'act_price'
]=isset(
$result
[
'defaultModel'
][
'itemPriceResultDO'
][
'priceInfo'
][
$nowk
][
'suggestivePromotionList'
])?
$result
[
'defaultModel'
][
'itemPriceResultDO'
][
'priceInfo'
][
$nowk
][
'suggestivePromotionList'
]:
$result
[
'defaultModel'
][
'itemPriceResultDO'
][
'priceInfo'
][
$nowk
];
$myres
[
'stock'
]=
$result
[
'defaultModel'
][
'inventoryDO'
][
'totalQuantity'
]?
$result
[
'defaultModel'
][
'inventoryDO'
][
'totalQuantity'
]:
$nowstore
[
'quantity'
];
preg_match(
'|<ul id="J_UlThumb" class="tb-thumb tm-clear">(.*)</ul>|isU'
,
$content
,
$match
);
preg_match_all(
'/<img src="(.*?)" \//'
,
$match
[1],
$images
);
$myres
[
'banners'
]=
$images
[1];
}
return
$myres
;
}