Perangkak web - modul permintaan python3.4.1 melaporkan ralat objek 'senarai' tidak mempunyai atribut 'dapat'
世界只因有你
世界只因有你 2017-06-12 09:24:16
0
3
3517

Saya menggunakan ular sawa untuk menulis perangkak yang merangkak alamat IP Memandangkan tapak web itu anti-perangkak, saya menulis proksi
Menggunakan kumpulan benang untuk membuka 10 utas untuk merangkak alamat IP
Walau bagaimanapun, ralat dilaporkan terus 'senarai' objek. tidak mempunyai atribut 'dapat'
Saya tidak tahu bagaimana untuk menyelesaikannya, jadi saya akan menghantar kod saya.
daripada import bs4 BeautifulSoup
permintaan import
import semula
masa import
daripada Pool import berbilang pemprosesan
import pymysql
import rawak
dari import threadpool *

Pengepala permintaan rawak

def randHeader():

head_connection = ['Keep-Alive', 'close']
head_accept = ['text/html, application/xhtml+xml, */*']
head_accept_language = ['zh-CN,fr-FR;q=0.5', 'en-US,en;q=0.8,zh-Hans-CN;q=0.5,zh-Hans;q=0.3']
head_user_agent = ['Mozilla/5.0 (Windows NT 6.3; WOW64; Trident/7.0; rv:11.0) like Gecko',
                   'Mozilla/5.0 (Windows NT 5.1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/28.0.1500.95 Safari/537.36',
                   'Mozilla/5.0 (Windows NT 6.1; WOW64; Trident/7.0; SLCC2; .NET CLR 2.0.50727; .NET CLR 3.5.30729; .NET CLR 3.0.30729; Media Center PC 6.0; .NET4.0C; rv:11.0) like Gecko)',
                   'Mozilla/5.0 (Windows; U; Windows NT 5.2) Gecko/2008070208 Firefox/3.0.1',
                   'Mozilla/5.0 (Windows; U; Windows NT 5.1) Gecko/20070309 Firefox/2.0.0.3',
                   'Mozilla/5.0 (Windows; U; Windows NT 5.1) Gecko/20070803 Firefox/1.5.0.12',
                   'Opera/9.27 (Windows NT 5.2; U; zh-cn)',
                   'Mozilla/5.0 (Macintosh; PPC Mac OS X; U; en) Opera 8.0',
                   'Opera/8.0 (Macintosh; PPC Mac OS X; U; en)',
                   'Mozilla/5.0 (Windows; U; Windows NT 5.1; en-US; rv:1.8.1.12) Gecko/20080219 Firefox/2.0.0.12 Navigator/9.0.0.6',
                   'Mozilla/4.0 (compatible; MSIE 8.0; Windows NT 6.1; Win64; x64; Trident/4.0)',
                   'Mozilla/4.0 (compatible; MSIE 8.0; Windows NT 6.1; Trident/4.0)',
                   'Mozilla/5.0 (compatible; MSIE 10.0; Windows NT 6.1; WOW64; Trident/6.0; SLCC2; .NET CLR 2.0.50727; .NET CLR 3.5.30729; .NET CLR 3.0.30729; Media Center PC 6.0; InfoPath.2; .NET4.0C; .NET4.0E)',
                   'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.1 (KHTML, like Gecko) Maxthon/4.0.6.2000 Chrome/26.0.1410.43 Safari/537.1 ',
                   'Mozilla/5.0 (compatible; MSIE 10.0; Windows NT 6.1; WOW64; Trident/6.0; SLCC2; .NET CLR 2.0.50727; .NET CLR 3.5.30729; .NET CLR 3.0.30729; Media Center PC 6.0; InfoPath.2; .NET4.0C; .NET4.0E; QQBrowser/7.3.9825.400)',
                   'Mozilla/5.0 (Windows NT 6.1; WOW64; rv:21.0) Gecko/20100101 Firefox/21.0 ',
                   'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.1 (KHTML, like Gecko) Chrome/21.0.1180.92 Safari/537.1 LBBROWSER',
                   'Mozilla/5.0 (compatible; MSIE 10.0; Windows NT 6.1; WOW64; Trident/6.0; BIDUBrowser 2.x)',
                   'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/536.11 (KHTML, like Gecko) Chrome/20.0.1132.11 TaoBrowser/3.0 Safari/536.11']

header = {
    'Connection': head_connection[0],
    'Accept': head_accept[0],
    'Accept-Language': head_accept_language[1],
    'User-Agent': head_user_agent[random.randrange(0, len(head_user_agent))]
}
return header

def randproxy():

config = {
    'host': '127.0.0.1',
    'port': 3306,
    'user': 'root',
    'password': '',
    'db': 'autohome',
    'charset': 'utf8',
    # 'cursorclass': pymysql.cursors.DictCursor,
}
# 创建连接
list_ip = []
connection = pymysql.connect(**config)
cursor = connection.cursor()
sql = 'select ip,port from can_use'
try:
    cursor.execute(sql)
    results = cursor.fetchall()
    for row in results:
        data = {
            'ip': row[0],
            'port': row[1]
        }
        list_ip.append(data)

except:
    print("error")
# time.sleep(1)
finally:
    connection.close()

return random.choice(list_ip)

def muat turun(url):

proxy = randproxy()
proxy_host = "http://" + proxy['ip'] + ":" + proxy['port']
proxy_temp = {"http": proxy_host}
parse_url = requests.get(url[0],headers=randHeader(),timeout=12,proxies=proxy_temp)
soup = BeautifulSoup(parse_url.text,'lxml')
pre_proxys = soup.find('table', id='ip_list').find_all('tr')
for i in pre_proxys[1:]:
    try:
        td = i.find_all('td')
        id = td[1].get_text()
        port = td[2].get_text()
        # 执行sql语句
        config = {
            'host': '127.0.0.1',
            'port': 3306,
            'user': 'root',
            'password': '',
            'db': 'autohome',
            'charset': 'utf8',
            'cursorclass': pymysql.cursors.DictCursor,
        }
        # 创建连接
        connection = pymysql.connect(**config)

        data = {
            'ip':id,
            'port':port,

        }
        with connection.cursor() as cursor:
            # 执行sql语句,插入记录
            sql = 'INSERT INTO proxyip (ip,port) VALUES (%s,%s)'
            cursor.execute(sql, (data['ip'],data['port']))
            # 没有设置默认自动提交,需要主动提交,以保存所执行的语句
        connection.commit()

    except:
        print("error")
    # time.sleep(1)
    finally:
        connection.close()
time.sleep(2)

def proxy_url_list():

url = "http://www.xicidaili.com/wt/{}"
url_list = []
for i in range(1,1387):
    new_url = url.format(i)
    url_list.append(new_url)

return url_list

jika nama == "__utama__":

pool = ThreadPool(2)
requests = makeRequests(download,proxy_url_list())
[pool.putRequest(req) for req in requests]
pool.wait()
# url = "http://www.baidu.com"
# proxy = randproxy()
# proxy_host = "http://" + proxy['ip'] + ":" + proxy['port']
# proxy_temp = {"http": proxy_host}
# test = requests.get(url,headers=randHeader(),timeout=10,proxies=proxy_temp)
# soup = BeautifulSoup(test.text,'lxml')
# print(soup)

图片发布不了,现在只能贴上错误提示了
  File "C:\Python\lib\site-packages\threadpool.py", line 158, in run
result = request.callable(*request.args, **request.kwds)

Fail "C:/qichezhijia/proxyspider.py", baris 80, dalam muat turun

parse_url = requests.get(url[0],headers=randHeader(),timeout=12,proxies=proxy_temp)

AttributeError: objek 'list' tidak mempunyai atribut 'get'
Traceback (panggilan terbaharu terakhir):
Fail "C:Pythonlibsite-packagesthreadpool.py", baris 158, sedang dijalankan

result = request.callable(*request.args, **request.kwds)

Fail "C:/qichezhijia/proxyspider.py", baris 80, dalam muat turun

parse_url = requests.get(url[0],headers=randHeader(),timeout=12,proxies=proxy_temp)

AttributeError: objek 'list' tidak mempunyai atribut 'get'
Traceback (panggilan terbaharu terakhir):
Fail "C:Pythonlibsite-packagesthreadpool.py", baris 158, sedang dijalankan

result = request.callable(*request.args, **request.kwds)

Fail "C:/qichezhijia/proxyspider.py", baris 80, dalam muat turun

parse_url = requests.get(url[0],headers=randHeader(),timeout=12,proxies=proxy_temp)

AttributeError: objek 'list' tidak mempunyai atribut 'get'
Traceback (panggilan terbaharu terakhir):
Fail "C:Pythonlibsite-packagesthreadpool.py", baris 158, sedang dijalankan

result = request.callable(*request.args, **request.kwds)

Fail "C:/qichezhijia/proxyspider.py", baris 80, dalam muat turun

parse_url = requests.get(url[0],headers=randHeader(),timeout=12,proxies=proxy_temp)

AttributeError: objek 'list' tidak mempunyai atribut 'get'
Traceback (panggilan terbaharu terakhir):
Fail "C:Pythonlibsite-packagesthreadpool.py", baris 158, sedang dijalankan

result = request.callable(*request.args, **request.kwds)

Fail "C:/qichezhijia/proxyspider.py", baris 80, dalam muat turun

parse_url = requests.get(url[0],headers=randHeader(),timeout=12,proxies=proxy_temp)

AttributeError: objek 'senarai' tidak mempunyai atribut 'dapat'

世界只因有你
世界只因有你

membalas semua(3)
女神的闺蜜爱上我

Apakah yang dilakukan oleh makeRequests? Adakah anda telah menetapkan permintaan kepada jenis senarai?

代言

MakeRequests serupa dengan fungsi peta Python Ia mempunyai dua parameter (fungsi, senarai() Entri dalam senarai dibekalkan kepada fungsi sebelumnya untuk operasi...
Permintaan di dalam adalah kaedah modul permintaan mereka mempunyai nama yang sama, atau kaedah penulisan url[0] adalah salah, sila kembali dan nyahpepijat kemudian...

三叔

Nama pun sama, saya suggest line ni

requests = makeRequests(download,proxy_url_list())
[pool.putRequest(req) for req in requests]

Tukar kepada

dahulu
myrequests = makeRequests(download,proxy_url_list())
[pool.putRequest(req) for req in myrequests]

Cuba lagi

Muat turun terkini
Lagi>
kesan web
Kod sumber laman web
Bahan laman web
Templat hujung hadapan