• 카테고리

    질문 & 답변
  • 세부 분야

    데이터 분석

  • 해결 여부

    미해결

beautiful soup 명령 실행 오류 관련 문의

23.10.10 16:28 작성 조회수 393

0

안녕하세요

파이썬 입문과크롤링 기초 부트캠프 수강 중

beautiful soup 명령 (크롤링) 실행 오류 발생하여 문의드립니다.

 

beautiful soup 명령 실행 시 아래와 같은 오류가 발생하네요.

(아나콘다 및 bs 재설치 해도 동일 문제 발생)

 

개인 노트북으로 학습 할 때는 잘되는데 회사 pc로 하니 이런 오류가 발생하네요..

사정상 회사pc로도 학습을 해야하는데 난감하네요 도움 부탁드립니다.

 


[명령문]


import requests

from bs4 import BeautifulSoup

res = requests.get('https://davelee-fun.github.io/blog/crawl_test_css.html')

soup = BeautifulSoup(res.content, 'html.parser')


[오류내용]


SSLCertVerificationError                  Traceback (most recent call last)
File ~\AppData\Local\anaconda3\Lib\site-packages\urllib3\connectionpool.py:714, in HTTPConnectionPool.urlopen(self, method, url, body, headers, retries, redirect, assert_same_host, timeout, pool_timeout, release_conn, chunked, body_pos, **response_kw)
    713 # Make the request on the httplib connection object.
--> 714 httplib_response = self._make_request(
    715     conn,
    716     method,
    717     url,
    718     timeout=timeout_obj,
    719     body=body,
    720     headers=headers,
    721     chunked=chunked,
    722 )
    724 # If we're going to release the connection in ``finally:``, then
    725 # the response doesn't need to know about the connection. Otherwise
    726 # it will also try to release it and we'll have a double-release
    727 # mess.

File ~\AppData\Local\anaconda3\Lib\site-packages\urllib3\connectionpool.py:403, in HTTPConnectionPool._make_request(self, conn, method, url, timeout, chunked, **httplib_request_kw)
    402 try:
--> 403     self._validate_conn(conn)
    404 except (SocketTimeout, BaseSSLError) as e:
    405     # Py2 raises this as a BaseSSLError, Py3 raises it as socket timeout.

File ~\AppData\Local\anaconda3\Lib\site-packages\urllib3\connectionpool.py:1053, in HTTPSConnectionPool._validate_conn(self, conn)
   1052 if not getattr(conn, "sock", None):  # AppEngine might not have  `.sock`
-> 1053     conn.connect()
   1055 if not conn.is_verified:

File ~\AppData\Local\anaconda3\Lib\site-packages\urllib3\connection.py:419, in HTTPSConnection.connect(self)
    417     context.load_default_certs()
--> 419 self.sock = ssl_wrap_socket(
    420     sock=conn,
    421     keyfile=self.key_file,
    422     certfile=self.cert_file,
    423     key_password=self.key_password,
    424     ca_certs=self.ca_certs,
    425     ca_cert_dir=self.ca_cert_dir,
    426     ca_cert_data=self.ca_cert_data,
    427     server_hostname=server_hostname,
    428     ssl_context=context,
    429     tls_in_tls=tls_in_tls,
    430 )
    432 # If we're using all defaults and the connection
    433 # is TLSv1 or TLSv1.1 we throw a DeprecationWarning
    434 # for the host.

File ~\AppData\Local\anaconda3\Lib\site-packages\urllib3\util\ssl_.py:449, in ssl_wrap_socket(sock, keyfile, certfile, cert_reqs, ca_certs, server_hostname, ssl_version, ciphers, ssl_context, ca_cert_dir, key_password, ca_cert_data, tls_in_tls)
    448 if send_sni:
--> 449     ssl_sock = _ssl_wrap_socket_impl(
    450         sock, context, tls_in_tls, server_hostname=server_hostname
    451     )
    452 else:

File ~\AppData\Local\anaconda3\Lib\site-packages\urllib3\util\ssl_.py:493, in _ssl_wrap_socket_impl(sock, ssl_context, tls_in_tls, server_hostname)
    492 if server_hostname:
--> 493     return ssl_context.wrap_socket(sock, server_hostname=server_hostname)
    494 else:

File ~\AppData\Local\anaconda3\Lib\ssl.py:517, in SSLContext.wrap_socket(self, sock, server_side, do_handshake_on_connect, suppress_ragged_eofs, server_hostname, session)
    511 def wrap_socket(self, sock, server_side=False,
    512                 do_handshake_on_connect=True,
    513                 suppress_ragged_eofs=True,
    514                 server_hostname=None, session=None):
    515     # SSLSocket class handles server_hostname encoding before it calls
    516     # ctx._wrap_socket()
--> 517     return self.sslsocket_class._create(
    518         sock=sock,
    519         server_side=server_side,
    520         do_handshake_on_connect=do_handshake_on_connect,
    521         suppress_ragged_eofs=suppress_ragged_eofs,
    522         server_hostname=server_hostname,
    523         context=self,
    524         session=session
    525     )

File ~\AppData\Local\anaconda3\Lib\ssl.py:1108, in SSLSocket._create(cls, sock, server_side, do_handshake_on_connect, suppress_ragged_eofs, server_hostname, context, session)
   1107             raise ValueError("do_handshake_on_connect should not be specified for non-blocking sockets")
-> 1108         self.do_handshake()
   1109 except (OSError, ValueError):

File ~\AppData\Local\anaconda3\Lib\ssl.py:1379, in SSLSocket.do_handshake(self, block)
   1378         self.settimeout(None)
-> 1379     self._sslobj.do_handshake()
   1380 finally:

SSLCertVerificationError: [SSL: CERTIFICATE_VERIFY_FAILED] certificate verify failed: self-signed certificate in certificate chain (_ssl.c:1006)

During handling of the above exception, another exception occurred:

MaxRetryError                             Traceback (most recent call last)
File ~\AppData\Local\anaconda3\Lib\site-packages\requests\adapters.py:486, in HTTPAdapter.send(self, request, stream, timeout, verify, cert, proxies)
    485 try:
--> 486     resp = conn.urlopen(
    487         method=request.method,
    488         url=url,
    489         body=request.body,
    490         headers=request.headers,
    491         redirect=False,
    492         assert_same_host=False,
    493         preload_content=False,
    494         decode_content=False,
    495         retries=self.max_retries,
    496         timeout=timeout,
    497         chunked=chunked,
    498     )
    500 except (ProtocolError, OSError) as err:

File ~\AppData\Local\anaconda3\Lib\site-packages\urllib3\connectionpool.py:798, in HTTPConnectionPool.urlopen(self, method, url, body, headers, retries, redirect, assert_same_host, timeout, pool_timeout, release_conn, chunked, body_pos, **response_kw)
    796     e = ProtocolError("Connection aborted.", e)
--> 798 retries = retries.increment(
    799     method, url, error=e, _pool=self, _stacktrace=sys.exc_info()[2]
    800 )
    801 retries.sleep()

File ~\AppData\Local\anaconda3\Lib\site-packages\urllib3\util\retry.py:592, in Retry.increment(self, method, url, response, error, _pool, _stacktrace)
    591 if new_retry.is_exhausted():
--> 592     raise MaxRetryError(_pool, url, error or ResponseError(cause))
    594 log.debug("Incremented Retry for (url='%s'): %r", url, new_retry)

MaxRetryError: HTTPSConnectionPool(host='davelee-fun.github.io', port=443): Max retries exceeded with url: /blog/crawl_test_css.html (Caused by SSLError(SSLCertVerificationError(1, '[SSL: CERTIFICATE_VERIFY_FAILED] certificate verify failed: self-signed certificate in certificate chain (_ssl.c:1006)')))

During handling of the above exception, another exception occurred:

SSLError                                  Traceback (most recent call last)
Cell In[3], line 4
      1 import requests
      2 from bs4 import BeautifulSoup
----> 4 res = requests.get('https://davelee-fun.github.io/blog/crawl_test_css.html')
      5 soup = BeautifulSoup(res.content, 'html.parser')

File ~\AppData\Local\anaconda3\Lib\site-packages\requests\api.py:73, in get(url, params, **kwargs)
     62 def get(url, params=None, **kwargs):
     63     r"""Sends a GET request.
     64 
     65     :param url: URL for the new :class:`Request` object.
   (...)
     70     :rtype: requests.Response
     71     """
---> 73     return request("get", url, params=params, **kwargs)

File ~\AppData\Local\anaconda3\Lib\site-packages\requests\api.py:59, in request(method, url, **kwargs)
     55 # By using the 'with' statement we are sure the session is closed, thus we
     56 # avoid leaving sockets open which can trigger a ResourceWarning in some
     57 # cases, and look like a memory leak in others.
     58 with sessions.Session() as session:
---> 59     return session.request(method=method, url=url, **kwargs)

File ~\AppData\Local\anaconda3\Lib\site-packages\requests\sessions.py:589, in Session.request(self, method, url, params, data, headers, cookies, files, auth, timeout, allow_redirects, proxies, hooks, stream, verify, cert, json)
    584 send_kwargs = {
    585     "timeout": timeout,
    586     "allow_redirects": allow_redirects,
    587 }
    588 send_kwargs.update(settings)
--> 589 resp = self.send(prep, **send_kwargs)
    591 return resp

File ~\AppData\Local\anaconda3\Lib\site-packages\requests\sessions.py:703, in Session.send(self, request, **kwargs)
    700 start = preferred_clock()
    702 # Send the request
--> 703 r = adapter.send(request, **kwargs)
    705 # Total elapsed time of the request (approximately)
    706 elapsed = preferred_clock() - start

File ~\AppData\Local\anaconda3\Lib\site-packages\requests\adapters.py:517, in HTTPAdapter.send(self, request, stream, timeout, verify, cert, proxies)
    513         raise ProxyError(e, request=request)
    515     if isinstance(e.reason, _SSLError):
    516         # This branch is for urllib3 v1.22 and later.
--> 517         raise SSLError(e, request=request)
    519     raise ConnectionError(e, request=request)
    521 except ClosedPoolError as e:

SSLError: HTTPSConnectionPool(host='davelee-fun.github.io', port=443): Max retries exceeded with url: /blog/crawl_test_css.html (Caused by SSLError(SSLCertVerificationError(1, '[SSL: CERTIFICATE_VERIFY_FAILED] certificate verify failed: self-signed certificate in certificate chain (_ssl.c:1006)')))

 

 

답변 1

답변을 작성해보세요.

0

김건호님의 프로필

김건호

질문자

2023.10.10

구글에 에러난 내용 검색해서 알아본 조치사항을 적용해보니 해보니 아래와 같이 나오네요

사내 보안 같은것 때문에 안되는건지..

 

import requests

from bs4 import BeautifulSoup

res = requests.get('https://calendar.google.com/calendar/u/0/r/month?pli=1', verify=False)

soup = BeautifulSoup(res.content, 'html.parser')

 


 

C:\Users\6804314\AppData\Local\anaconda3\Lib\site-packages\urllib3\connectionpool.py:1056: InsecureRequestWarning: Unverified HTTPS request is being made to host 'calendar.google.com'. Adding certificate verification is strongly advised. See: https://urllib3.readthedocs.io/en/1.26.x/advanced-usage.html#ssl-warnings
  warnings.warn(

 

안녕하세요. 답변 도우미입니다.

답글이 이미 있다고 나와서 문의를 놓쳤습니다. 죄송합니다.

말씀하신대로 , SSL 과 관련된 에러로 보이는데요. SSL 관련 에러는 보통 사내 PC 를 사용할 때 사내 PC 는 보안처리가 되어 있어서, 관련 에러가 나는 경우가 많습니다. 이 경우에는 부득이 개인 PC 로 사용하셔야 하는데요. 사실 개발용 회사 PC 가 아니라면, 개발 관련 작업은 사내 PC로는 다 막아놓은 회사들이 많아서, 개인 PC 로 익혀보시는 것이 장기적으로도 좋을 것 같습니다.

감사합니다.