在python中,如何取得短鏈對應最終網址,現在有很多淘寶短鏈,我需要取得短鏈跳到的最終網址,這個有什麼好辦法嗎?有的是302 有的是直接在網頁上用js跳轉,,這些如何取得
拥有18年软件开发和IT教学经验。曾任多家上市公司技术总监、架构师、项目经理、高级软件工程师等职务。 网络人气名人讲师,...
用selenium+phantonjs...
http://stackoverflow.com/ques...
#!/usr/bin/python2.7 from twisted.internet import reactor from twisted.internet.defer import Deferred, DeferredList, DeferredLock from twisted.internet.defer import inlineCallbacks from twisted.web.client import Agent, HTTPConnectionPool from twisted.web.http_headers import Headers from pprint import pprint from collections import defaultdict from urlparse import urlparse from random import randrange import fileinput pool = HTTPConnectionPool(reactor) pool.maxPersistentPerHost = 16 agent = Agent(reactor, pool) locks = defaultdict(DeferredLock) locations = {} def getLock(url, simultaneous = 1): return locks[urlparse(url).netloc, randrange(simultaneous)] @inlineCallbacks def getMapping(url): # Limit ourselves to 4 simultaneous connections per host # Tweak this as desired, but make sure that it no larger than # pool.maxPersistentPerHost lock = getLock(url,4) yield lock.acquire() try: resp = yield agent.request('HEAD', url) locations[url] = resp.headers.getRawHeaders('location',[None])[0] except Exception as e: locations[url] = str(e) finally: lock.release()
而且可以試試pip包
https://pypi.python.org/pypi/...
from urlunshort import resolve resolve("http://bit.ly/qlKaI") 结果 'http://bitbucket.org/runeh/urlunshort/'
用selenium+phantonjs...
http://stackoverflow.com/ques...
而且可以試試pip包
https://pypi.python.org/pypi/...