新手刚开始学爬虫,不知道问题出在哪,requests已经是最新版本,用get获取公司内部网页不会报错,是否是防火墙的问题?
安装了Anaconda3后,在Jupyter notebook里写入如下代码:
import requests
res = requests.get('http://www.sina.com.cn/china')
res.encoding='utf-8'
print(res.text)
ConnectionRefusedError Traceback (most recent call last)
D:Anaconda3libsite-packagesrequestspackagesurllib3connection.py in _new_conn(self)
140 conn = connection.create_connection(
--> 141 (self.host, self.port), self.timeout, **extra_kw)
142
D:Anaconda3libsite-packagesrequestspackagesurllib3utilconnection.py in create_connection(address, timeout, source_address, socket_options)
82 if err is not None:
---> 83 raise err
84
D:Anaconda3libsite-packagesrequestspackagesurllib3utilconnection.py in create_connection(address, timeout, source_address, socket_options)
72 sock.bind(source_address)
---> 73 sock.connect(sa)
74 return sock
ConnectionRefusedError: [WinError 10061] No connection could be made because the target machine actively refused it
During handling of the above exception, another exception occurred:
NewConnectionError Traceback (most recent call last)
D:Anaconda3libsite-packagesrequestspackagesurllib3connectionpool.py in urlopen(self, method, url, body, headers, retries, redirect, assert_same_host, timeout, pool_timeout, release_conn, chunked, body_pos, **response_kw)
599 body=body, headers=headers,
--> 600 chunked=chunked)
601
D:Anaconda3libsite-packagesrequestspackagesurllib3connectionpool.py in _make_request(self, conn, method, url, timeout, chunked, **httplib_request_kw)
355 else:
--> 356 conn.request(method, url, **httplib_request_kw)
357
D:Anaconda3libhttpclient.py in request(self, method, url, body, headers, encode_chunked)
1238 """Send a complete request to the server."""
-> 1239 self._send_request(method, url, body, headers, encode_chunked)
1240
D:Anaconda3libhttpclient.py in _send_request(self, method, url, body, headers, encode_chunked)
1284 body = _encode(body, 'body')
-> 1285 self.endheaders(body, encode_chunked=encode_chunked)
1286
D:Anaconda3libhttpclient.py in endheaders(self, message_body, encode_chunked)
1233 raise CannotSendHeader()
-> 1234 self._send_output(message_body, encode_chunked=encode_chunked)
1235
D:Anaconda3libhttpclient.py in _send_output(self, message_body, encode_chunked)
1025 del self._buffer[:]
-> 1026 self.send(msg)
1027
D:Anaconda3libhttpclient.py in send(self, data)
963 if self.auto_open:
--> 964 self.connect()
965 else:
D:Anaconda3libsite-packagesrequestspackagesurllib3connection.py in connect(self)
165 def connect(self):
--> 166 conn = self._new_conn()
167 self._prepare_conn(conn)
D:Anaconda3libsite-packagesrequestspackagesurllib3connection.py in _new_conn(self)
149 raise NewConnectionError(
--> 150 self, "Failed to establish a new connection: %s" % e)
151
NewConnectionError: <requests.packages.urllib3.connection.HTTPConnection object at 0x00000000065376A0>: Failed to establish a new connection: [WinError 10061] No connection could be made because the target machine actively refused it
During handling of the above exception, another exception occurred:
MaxRetryError Traceback (most recent call last)
D:Anaconda3libsite-packagesrequestsadapters.py in send(self, request, stream, timeout, verify, cert, proxies)
422 retries=self.max_retries,
--> 423 timeout=timeout
424 )
D:Anaconda3libsite-packagesrequestspackagesurllib3connectionpool.py in urlopen(self, method, url, body, headers, retries, redirect, assert_same_host, timeout, pool_timeout, release_conn, chunked, body_pos, **response_kw)
648 retries = retries.increment(method, url, error=e, _pool=self,
--> 649 _stacktrace=sys.exc_info()[2])
650 retries.sleep()
D:Anaconda3libsite-packagesrequestspackagesurllib3utilretry.py in increment(self, method, url, response, error, _pool, _stacktrace)
375 if new_retry.is_exhausted():
--> 376 raise MaxRetryError(_pool, url, error or ResponseError(cause))
377
MaxRetryError: HTTPConnectionPool(host='www.sina.com.cn', port=80): Max retries exceeded with url: /china (Caused by NewConnectionError('<requests.packages.urllib3.connection.HTTPConnection object at 0x00000000065376A0>: Failed to establish a new connection: [WinError 10061] No connection could be made because the target machine actively refused it',))
During handling of the above exception, another exception occurred:
ConnectionError Traceback (most recent call last)
<ipython-input-12-03b138755a7e> in <module>()
1 import requests
----> 2 res = requests.get('http://www.sina.com.cn/china')
3 res.encoding='utf-8'
4 print(res.text)
D:Anaconda3libsite-packagesrequestsapi.py in get(url, params, **kwargs)
68
69 kwargs.setdefault('allow_redirects', True)
---> 70 return request('get', url, params=params, **kwargs)
71
72
D:Anaconda3libsite-packagesrequestsapi.py in request(method, url, **kwargs)
54 # cases, and look like a memory leak in others.
55 with sessions.Session() as session:
---> 56 return session.request(method=method, url=url, **kwargs)
57
58
D:Anaconda3libsite-packagesrequestssessions.py in request(self, method, url, params, data, headers, cookies, files, auth, timeout, allow_redirects, proxies, hooks, stream, verify, cert, json)
486 }
487 send_kwargs.update(settings)
--> 488 resp = self.send(prep, **send_kwargs)
489
490 return resp
D:Anaconda3libsite-packagesrequestssessions.py in send(self, request, **kwargs)
607
608 # Send the request
--> 609 r = adapter.send(request, **kwargs)
610
611 # Total elapsed time of the request (approximately)
D:Anaconda3libsite-packagesrequestsadapters.py in send(self, request, stream, timeout, verify, cert, proxies)
485 raise ProxyError(e, request=request)
486
--> 487 raise ConnectionError(e, request=request)
488
489 except ClosedPoolError as e:
ConnectionError: HTTPConnectionPool(host='www.sina.com.cn', port=80): Max retries exceeded with url: /china (Caused by NewConnectionError('<requests.packages.urllib3.connection.HTTPConnection object at 0x00000000065376A0>: Failed to establish a new connection: [WinError 10061] No connection could be made because the target machine actively refused it',))
建立連線失敗,有可能是你們 ip 是黑名單,有可能是你違法了 robot.txt,有可能就是不讓你的 UA 建立連線。
從報錯訊息看,被拒絕訪問了。
建議用官方Python 不要使用第三方安裝套件。