작성
·
710
1
로그인 > 해시태그 검색 까지는 작동되는데... 이후 스크롤부터 링크 추출까지 막혔습니다. 어떤 문제가 있는지 알수 있을까요?
---------------------------------------------------------
import time
import chromedriver_autoinstaller
chromedriver_autoinstaller.install()
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.common.keys import Keys
from selenium.webdriver import ActionChains
driver = webdriver.Chrome()
driver.get("https://www.instagram.com/")
# time.sleep(2)
id_selector = "#loginForm > div > div:nth-child(1) > div > label > input"
WebDriverWait(driver, 10).until(EC.presence_of_element_located(
(By.CSS_SELECTOR, id_selector)
))
import mdata
id_input = driver.find_element(By.CSS_SELECTOR,id_selector)
id_input.send_keys(mdata.id)
time.sleep(1)
pw_selector = "#loginForm > div > div:nth-child(2) > div > label > input"
pw_input = driver.find_element(By.CSS_SELECTOR,pw_selector)
pw_input.send_keys(mdata.pw)
time.sleep(1)
login_btn_selector = "#loginForm > div > div:nth-child(3) > button"
login_btn = driver.find_element(By.CSS_SELECTOR, login_btn_selector)
login_btn.click()
time.sleep(10)
from urllib import parse
keyword = "사업가"
keyword = parse.quote(keyword)
driver.get(f"https://www.instagram.com/explore/tags/{keyword}/")
time.sleep(10)
all_posting_sel = "div[id^='mount_0_0'} > div > div > div.x9f619.x1n2onr6.x1ja2u2z > div > div > div > div.x78zum5.xdt5ytf.x10cihs4.x1t2pt76.x1n2onr6.x1ja2u2z > div.x9f619.xnz67gz.x78zum5.x168nmei.x13lgxp2.x5pf9jr.xo71vjh.x1uhb9sk.x1plvlek.xryxfnj.x1c4vz4f.x2lah0s.x1q0g3np.xqjyukv.x1qjc9v5.x1oa3qoh.x1qughib > div.xh8yej3.x1gryazu.x10o80wk.x14k21rp.x1porb0y.x17snn68.x6osk4m > section > main > article > div:nth-child(3) > div"
time.sleep(3)
all_posting_box = driver.find_element(By.CSS_SELECTOR, all_posting_sel)
time.sleep(3)
'''링크 100개 추출'''
links = []
while len(links) < 100 :
for _ in range(6):
driver.execute_script("window.scrollBy(0.600);")
time.sleep(1)
all_posting_box = driver.find_element(By.CSS_SELECTOR, all_posting_sel)
posk_links = all_posting_box.find_elements(By.TAG_NAME,"a")
for eachLink in posk_links:
link = eachLink.get_attribute('href')
links.append(link)
links = set(links)
links = list(links)
for link in links:
print(link)
print("******")
print(len(links), "개의 링크를 추출")
input()
------------------------------------------------------
DevTools listening on ws://127.0.0.1:50287/devtools/browser/ed0f17f2-033d-4ba2-80b8-8f3d2f886171
Traceback (most recent call last):
File "c:\Users\문소희\Desktop\project\insta_auto\insta_web.py", line 50, in <module>
all_posting_box = driver.find_element(By.CSS_SELECTOR, all_posting_sel)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "C:\Users\문소희\AppData\Local\Programs\Python\Python311\Lib\site-packages\selenium\webdriver\remote\webdriver.py", line 831, in find_element
return self.execute(Command.FIND_ELEMENT, {"using": by, "value": value})["value"]
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "C:\Users\문소희\AppData\Local\Programs\Python\Python311\Lib\site-packages\selenium\webdriver\remote\webdriver.py", line 440, in execute
self.error_handler.check_response(response)
File "C:\Users\문소희\AppData\Local\Programs\Python\Python311\Lib\site-packages\selenium\webdriver\remote\errorhandler.py", line 245, in check_response
raise exception_class(message, screen, stacktrace)
selenium.common.exceptions.InvalidSelectorException: Message: invalid selector: An invalid or illegal selector was specified
(Session info: chrome=113.0.5672.127)
Stacktrace:
Backtrace:
GetHandleVerifier [0x005C6DF3+48691]
(No symbol) [0x00558CC1]
(No symbol) [0x00465068]
(No symbol) [0x00468401]
(No symbol) [0x00469641]
(No symbol) [0x004696E0]
(No symbol) [0x004900D0]
(No symbol) [0x004906AB]
(No symbol) [0x004BDD62]
(No symbol) [0x004AA314]
(No symbol) [0x004BC452]
(No symbol) [0x004AA0C6]
(No symbol) [0x00487E18]
(No symbol) [0x00488F3D]
GetHandleVerifier [0x00824EAA+2531050]
GetHandleVerifier [0x00864B60+2792352]
GetHandleVerifier [0x0085E6EC+2766636]
GetHandleVerifier [0x00650820+612448]
(No symbol) [0x005625BC]
(No symbol) [0x0055E808]
(No symbol) [0x0055E8EB]
(No symbol) [0x00551C77]
BaseThreadInitThunk [0x754900C9+25]
RtlGetAppContainerNamedObjectPath [0x772E7B4E+286]
RtlGetAppContainerNamedObjectPath [0x772E7B1E+238]
답변 1
2
작성해주신 코드 중 일부입니다
all_posting_sel = "div[id^='mount_0_0'} > div > div > div.x9f619.x1n2onr6.x1ja2u2z > div > div > div > div.x78zum5.xdt5ytf.x10cihs4.x1t2pt76.x1n2onr6.x1ja2u2z > div.x9f619.xnz67gz.x78zum5.x168nmei.x13lgxp2.x5pf9jr.xo71vjh.x1uhb9sk.x1plvlek.xryxfnj.x1c4vz4f.x2lah0s.x1q0g3np.xqjyukv.x1qjc9v5.x1oa3qoh.x1qughib > div.xh8yej3.x1gryazu.x10o80wk.x14k21rp.x1porb0y.x17snn68.x6osk4m > section > main > article > div:nth-child(3) > div"
앞 부분에 div[id^='mount_0_0'} 라고 되어있는데 -> div[id^='mount_0_0'] 로 변경해주셔야합니다.
all_posting_sel = "div[id^='mount_0_0'] > div > div > div.x9f619.x1n2onr6.x1ja2u2z > div > div > div > div.x78zum5.xdt5ytf.x10cihs4.x1t2pt76.x1n2onr6.x1ja2u2z > div.x9f619.xnz67gz.x78zum5.x168nmei.x13lgxp2.x5pf9jr.xo71vjh.x1uhb9sk.x1plvlek.xryxfnj.x1c4vz4f.x2lah0s.x1q0g3np.xqjyukv.x1qjc9v5.x1oa3qoh.x1qughib > div.xh8yej3.x1gryazu.x10o80wk.x14k21rp.x1porb0y.x17snn68.x6osk4m > section > main > article > div:nth-child(3) > div"
# 이렇게 사용해보시지요 !