网页爬虫 - 为什么python模拟登陆 appannie一直返回503 code
高洛峰
高洛峰 2017-04-18 10:33:46
0
1
728
#-*-encoding:utf-8-*-
import requests,  xlwt, sys
from bs4 import BeautifulSoup

reload(sys)
referer = "https://www.appannie.com/account/login/?_ref=header"
user_agent = ('Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/56.0.2924.87 Safari/537.36')
sys.setdefaultencoding('utf-8')
header = {"User-Agent": user_agent,
           "Referer": referer,
           "Host": "www.appannie.com",
           'Connection': 'keep-alive',
           'Accept': 'application/json, text/plain,*/*',
           'Accept-Encoding': 'gzip, deflate, sdch',
           'Accept-Language': 'zh-CN,zh;q=0.8',
           'X-NewRelic-ID': 'VwcPUFJXGwEBUlJSDgc=',
           'X-Requested-With': 'XMLHttpRequest',
           }


def main():
    url = 'https://www.appannie.com/account/login/'
    # content = requests.get(url,headers = header).content
    # soup = BeautifulSoup(content,'lxml')
    # key = soup.select()
    s = requests.Session()
    s.get(url,headers = header)
    key = s.cookies['csrftoken']
    data = {
           'csrfmiddlewaretoken': key ,
           'next': '/dashboard/home/' ,
           'username':'1195615991@qq.com' ,
           'password':'xxxxx'
    }
    req = s.post(url,data = data)
    if 2 != req.status_code / 100 :
        raise Exception("Error while logging in, code: %d" % (req.status_code))
    cookies = req.cookies
    n = '2017-04-11'
    url_1 = 'https://www.appannie.com/apps/google-play/top-chart/?country=US&category=game&device=&date={}'.format(n)

    req_1 = s.get(url_1,headers = header,cookies = cookies).content
    #print req_1
    soup = BeautifulSoup(req_1,'lxml')
    print soup
    # ids = soup.find_all('span')
    # for id in ids :
    #     name = id.get('title')
    #     print name

if __name__ == '__main__':
    main()
高洛峰
高洛峰

拥有18年软件开发和IT教学经验。曾任多家上市公司技术总监、架构师、项目经理、高级软件工程师等职务。 网络人气名人讲师,...

reply all(1)
刘奇
两个关键点:
1. headers的user-agent
2. csrfmiddlewaretoken参数
# coding: utf-8

import requests
url = 'https://www.appannie.com/account/login'

session = requests.Session()
session.headers['user-agent'] = 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_11_6) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/57.0.2987.133 Safari/537.36'

session.get(url)

token = session.cookies.get('csrftoken')

data = {
    'csrfmiddlewaretoken': token,
    'next':'/dashboard/home/',
    'username':'XXXX',
    'password':'XXXX'
}

r = session.post(url, data)
print r.status_code
Latest Downloads
More>
Web Effects
Website Source Code
Website Materials
Front End Template