首页 > 解决方案 > 运行 Python 脚本时出现连接错误

问题描述

下面的代码已经被修改为处理 HTTPErrors,但是在执行代码时,会弹出一个连接错误,如下所示:

    ---------------------------------------------------------------------------
gaierror                                  Traceback (most recent call last)
~\Anaconda3\lib\site-packages\urllib3\connection.py in _new_conn(self)
    156             conn = connection.create_connection(
--> 157                 (self._dns_host, self.port), self.timeout, **extra_kw
    158             )

~\Anaconda3\lib\site-packages\urllib3\util\connection.py in create_connection(address, timeout, source_address, socket_options)
     60 
---> 61     for res in socket.getaddrinfo(host, port, family, socket.SOCK_STREAM):
     62         af, socktype, proto, canonname, sa = res

~\Anaconda3\lib\socket.py in getaddrinfo(host, port, family, type, proto, flags)
    751     addrlist = []
--> 752     for res in _socket.getaddrinfo(host, port, family, type, proto, flags):
    753         af, socktype, proto, canonname, sa = res

gaierror: [Errno 11002] getaddrinfo failed

During handling of the above exception, another exception occurred:

NewConnectionError                        Traceback (most recent call last)
~\Anaconda3\lib\site-packages\urllib3\connectionpool.py in urlopen(self, method, url, body, headers, retries, redirect, assert_same_host, timeout, pool_timeout, release_conn, chunked, body_pos, **response_kw)
    671                 headers=headers,
--> 672                 chunked=chunked,
    673             )

~\Anaconda3\lib\site-packages\urllib3\connectionpool.py in _make_request(self, conn, method, url, timeout, chunked, **httplib_request_kw)
    386         else:
--> 387             conn.request(method, url, **httplib_request_kw)
    388 

~\Anaconda3\lib\http\client.py in request(self, method, url, body, headers, encode_chunked)
   1251         """Send a complete request to the server."""
-> 1252         self._send_request(method, url, body, headers, encode_chunked)
   1253 

~\Anaconda3\lib\http\client.py in _send_request(self, method, url, body, headers, encode_chunked)
   1297             body = _encode(body, 'body')
-> 1298         self.endheaders(body, encode_chunked=encode_chunked)
   1299 

~\Anaconda3\lib\http\client.py in endheaders(self, message_body, encode_chunked)
   1246             raise CannotSendHeader()
-> 1247         self._send_output(message_body, encode_chunked=encode_chunked)
   1248 

~\Anaconda3\lib\http\client.py in _send_output(self, message_body, encode_chunked)
   1025         del self._buffer[:]
-> 1026         self.send(msg)
   1027 

~\Anaconda3\lib\http\client.py in send(self, data)
    965             if self.auto_open:
--> 966                 self.connect()
    967             else:

~\Anaconda3\lib\site-packages\urllib3\connection.py in connect(self)
    183     def connect(self):
--> 184         conn = self._new_conn()
    185         self._prepare_conn(conn)

~\Anaconda3\lib\site-packages\urllib3\connection.py in _new_conn(self)
    168             raise NewConnectionError(
--> 169                 self, "Failed to establish a new connection: %s" % e
    170             )

NewConnectionError: <urllib3.connection.HTTPConnection object at 0x000001F9B39F7588>: Failed to establish a new connection: [Errno 11002] getaddrinfo failed

During handling of the above exception, another exception occurred:

MaxRetryError                             Traceback (most recent call last)
~\Anaconda3\lib\site-packages\requests\adapters.py in send(self, request, stream, timeout, verify, cert, proxies)
    448                     retries=self.max_retries,
--> 449                     timeout=timeout
    450                 )

~\Anaconda3\lib\site-packages\urllib3\connectionpool.py in urlopen(self, method, url, body, headers, retries, redirect, assert_same_host, timeout, pool_timeout, release_conn, chunked, body_pos, **response_kw)
    719             retries = retries.increment(
--> 720                 method, url, error=e, _pool=self, _stacktrace=sys.exc_info()[2]
    721             )

~\Anaconda3\lib\site-packages\urllib3\util\retry.py in increment(self, method, url, response, error, _pool, _stacktrace)
    435         if new_retry.is_exhausted():
--> 436             raise MaxRetryError(_pool, url, error or ResponseError(cause))
    437 

MaxRetryError: HTTPConnectionPool(host='jackmaryetc.com', port=80): Max retries exceeded with url: /Travel/US/Colorado/images/052cat.jpg (Caused by NewConnectionError('<urllib3.connection.HTTPConnection object at 0x000001F9B39F7588>: Failed to establish a new connection: [Errno 11002] getaddrinfo failed'))

During handling of the above exception, another exception occurred:

ConnectionError                           Traceback (most recent call last)
<ipython-input-42-bbe9df161c0c> in <module>
     12             RQs = 0
     13 
---> 14         response = requests.get(row[0])
     15         if response.status_code == 200:
     16             with open(filename, 'wb') as f:

~\Anaconda3\lib\site-packages\requests\api.py in get(url, params, **kwargs)
     73 
     74     kwargs.setdefault('allow_redirects', True)
---> 75     return request('get', url, params=params, **kwargs)
     76 
     77 

~\Anaconda3\lib\site-packages\requests\api.py in request(method, url, **kwargs)
     58     # cases, and look like a memory leak in others.
     59     with sessions.Session() as session:
---> 60         return session.request(method=method, url=url, **kwargs)
     61 
     62 

~\Anaconda3\lib\site-packages\requests\sessions.py in request(self, method, url, params, data, headers, cookies, files, auth, timeout, allow_redirects, proxies, hooks, stream, verify, cert, json)
    531         }
    532         send_kwargs.update(settings)
--> 533         resp = self.send(prep, **send_kwargs)
    534 
    535         return resp

~\Anaconda3\lib\site-packages\requests\sessions.py in send(self, request, **kwargs)
    644 
    645         # Send the request
--> 646         r = adapter.send(request, **kwargs)
    647 
    648         # Total elapsed time of the request (approximately)

~\Anaconda3\lib\site-packages\requests\adapters.py in send(self, request, stream, timeout, verify, cert, proxies)
    514                 raise SSLError(e, request=request)
    515 
--> 516             raise ConnectionError(e, request=request)
    517 
    518         except ClosedPoolError as e:

ConnectionError: HTTPConnectionPool(host='jackmaryetc.com', port=80): Max retries exceeded with url: /Travel/US/Colorado/images/052cat.jpg (Caused by NewConnectionError('<urllib3.connection.HTTPConnection object at 0x000001F9B39F7588>: Failed to establish a new connection: [Errno 11002] getaddrinfo failed'))

我尝试通过设置睡眠功能来解决此问题,因为我在某处读到此错误可能是由于快速连续发送的请求过多引起的。你的脚本应该从 csv 文件中获取 url,下载它并将图像放在正确的文件夹中,如 csv 文件中所述。下面我粘贴了代码:

with open('images.csv') as csv_file:
csv_reader = csv.reader(csv_file, delimiter=',')
next(csv_reader)
RQs = 0
for row in csv_reader:
    basename = os.path.basename(urlparse(row[0]).path)
    filename = '{}/{}/{}'.format(row[2], row[1], basename)

    if RQs == 200:
        time.sleep(1)
        print("sleeping for 1 second")
        RQs = 0

    response = requests.get(row[0])
    if response.status_code == 200:
        with open(filename, 'wb') as f:
            f.write(response.content)

    RQs += 1

先感谢您!

标签: pythonscriptingurllib

解决方案


我想发布代码的工作版本:

with open('images.csv') as csv_file:
csv_reader = csv.reader(csv_file, delimiter=',')
next(csv_reader)
RQs = 0
for row in csv_reader:
    basename = os.path.basename(urlparse(row[0]).path)
    filename = '{}/{}/{}'.format(row[2], row[1], basename)

    if RQs == 200:
        time.sleep(1)
        print("sleeping for 1 second")
        RQs = 0

    try:
        response = requests.get(row[0])
        if response.status_code == 200:
            print(row[0])
            with open(filename, 'wb') as f:
                f.write(response.content)
    except ConnectionError as e:
        print(e)
        r = "No response"

    RQs += 1

推荐阅读