from pyspider.libs.base_handler import *
class Handler(BaseHandler):
user_url = "http://weibo.cn/"
crawl_config = {
'itag': 'v1',
'headers': {
'User-Agent':'Mozilla/5.0 (Linux; U; Android 2.3.6; en-us; Nexus S Build/GRK39F) AppleWebKit/533.1 (KHTML, like Gecko) Version/4.0 Mobile Safari/533.1',
"Host": "weibo.cn",
"Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8",
"Accept-Language": "zh-CN,zh-TW;q=0.8,zh-HK;q=0.6,en-US;q=0.4,en;q=0.2",
"Accept-Encoding": "gzip, deflate",
"DNT": "1",
"Connection": "keep-alive"
},
'cookies':{
"_T_WM":"791e0d5962c38c757bead1a106a4dcc5",
" ALF":"1489404939",
" SCF":"AmNkSminRmi2L6WiP0tbn2H_p-TOZQIRRTLwEL5OhwHOohm56wHRk_9Jy1w7iXftduUAJihNuU3B-8cYnWBT3Lk.",
" SUB":"_2A251modoDeRxGeNG7VEV9ibIyT6IHXVXZCkgrDV6PUJbktBeLXXZkW2HyTgXsruSYnviSU7hXUjfdGTOig..",
" SUBP":"0033WrSXqPxfM725Ws9jqgMF55529P9D9W5wqBjzJ2m1XohsTfpMwPVx5JpX5o2p5NHD95Qf1hq0ShqRShzEWs4Dqcjci--fi-i8iK.7i--fi-2Xi-2Ni--fi-2Xi-2Ni--fi-2Xi-2Ni--fi-2Xi-2Ni--fi-zRiKnf",
" SUHB":"0tKqtAdy5rivMy",
" SSOLoginState":"1486812984"
}
}
@every(minutes=60*24 * 60)
def on_start(self):
self.crawl(Handler.user_url, callback=self.index_page,method="GET")
@config(age=1 * 24 * 60 * 60)
def index_page(self, response):
print(response.text)
用requests可以实现登录效果,但是用pyspider是一直访问的是默认的未登录的网页
在crawl_config 和在self.crawl 中设置cookies都不行
It is recommended that you use selenium to do it. It seems that you need to go through its js to log in to Weibo. Anyway, I don’t understand how to verify the login
Is pyspider running on the same machine?
I’m not sure what the difference is if there is a space before the cookie. Use pyspider and requests to send requests to http://httpbin.org/get to see what the difference is.
In addition: crawl_config does not take effect when debugging version 0.3.8.