-encoding:utf-8--
import requests, xlwt, sys
from bs4 import BeautifulSoup
reload(sys)
sys.setdefaultencoding('utf-8')
header = {'user-agent':'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/56.0.2924.87 Safari/537.36',
}
def main():
url = 'https://play.google.com/store/apps/category/GAME/collection/topselling_free?hl=zh-TW'
data = {
'start': '0',
'num' : '100',
}
a = []
b = []
req = requests.post(url,headers = header,data = data).content
soup = BeautifulSoup(req,'html.parser')
titles = soup.find_all('p',{'class':'details'})
for title in titles :
name = title.find('a',{'class':'title'}).get('title')
host_url = title.find('a',{'class':'title'}).get('href')
print name , host_url
a.append(name)
b.append(host_url)
for i in range(len(a)):
sheet.write(i,0,a[i])
for n in range(len(b)):
sheet.write(n,1,b[n])
if name == '__main__':
wb = xlwt.Workbook()
sheet = wb.add_sheet("top_100")
main()
wb.save('gametop100.xls')
# login()
服务器可以检测请求最终来源,你设置了代理也不管事
或者,你需要更深的代理
你的爬虫没设置代理吧