• 카테고리

    질문 & 답변
  • 세부 분야

    업무 자동화

  • 해결 여부

    미해결

해시태그 추출

23.05.25 18:21 작성 조회수 536

1

로그인 > 해시태그 검색 까지는 작동되는데... 이후 스크롤부터 링크 추출까지 막혔습니다. 어떤 문제가 있는지 알수 있을까요?

---------------------------------------------------------

import time

import chromedriver_autoinstaller

chromedriver_autoinstaller.install()

from selenium import webdriver

from selenium.webdriver.common.by import By

from selenium.webdriver.support.ui import WebDriverWait

from selenium.webdriver.support import expected_conditions as EC

from selenium.webdriver.common.keys import Keys

from selenium.webdriver import ActionChains

driver = webdriver.Chrome()

driver.get("https://www.instagram.com/")

# time.sleep(2)

id_selector = "#loginForm > div > div:nth-child(1) > div > label > input"

WebDriverWait(driver, 10).until(EC.presence_of_element_located(

(By.CSS_SELECTOR, id_selector)

))

import mdata

id_input = driver.find_element(By.CSS_SELECTOR,id_selector)

id_input.send_keys(mdata.id)

time.sleep(1)

pw_selector = "#loginForm > div > div:nth-child(2) > div > label > input"

pw_input = driver.find_element(By.CSS_SELECTOR,pw_selector)

pw_input.send_keys(mdata.pw)

time.sleep(1)

login_btn_selector = "#loginForm > div > div:nth-child(3) > button"

login_btn = driver.find_element(By.CSS_SELECTOR, login_btn_selector)

login_btn.click()

time.sleep(10)

from urllib import parse

keyword = "사업가"

keyword = parse.quote(keyword)

driver.get(f"https://www.instagram.com/explore/tags/{keyword}/")

time.sleep(10)

all_posting_sel = "div[id^='mount_0_0'} > div > div > div.x9f619.x1n2onr6.x1ja2u2z > div > div > div > div.x78zum5.xdt5ytf.x10cihs4.x1t2pt76.x1n2onr6.x1ja2u2z > div.x9f619.xnz67gz.x78zum5.x168nmei.x13lgxp2.x5pf9jr.xo71vjh.x1uhb9sk.x1plvlek.xryxfnj.x1c4vz4f.x2lah0s.x1q0g3np.xqjyukv.x1qjc9v5.x1oa3qoh.x1qughib > div.xh8yej3.x1gryazu.x10o80wk.x14k21rp.x1porb0y.x17snn68.x6osk4m > section > main > article > div:nth-child(3) > div"

time.sleep(3)

all_posting_box = driver.find_element(By.CSS_SELECTOR, all_posting_sel)

time.sleep(3)

'''링크 100개 추출'''

links = []

while len(links) < 100 :

for _ in range(6):

driver.execute_script("window.scrollBy(0.600);")

time.sleep(1)

all_posting_box = driver.find_element(By.CSS_SELECTOR, all_posting_sel)

posk_links = all_posting_box.find_elements(By.TAG_NAME,"a")

for eachLink in posk_links:

link = eachLink.get_attribute('href')

links.append(link)

links = set(links)

links = list(links)

for link in links:

print(link)

print("******")

print(len(links), "개의 링크를 추출")

input()

------------------------------------------------------
DevTools listening on ws://127.0.0.1:50287/devtools/browser/ed0f17f2-033d-4ba2-80b8-8f3d2f886171

Traceback (most recent call last):

File "c:\Users\문소희\Desktop\project\insta_auto\insta_web.py", line 50, in <module>

all_posting_box = driver.find_element(By.CSS_SELECTOR, all_posting_sel)

^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^

File "C:\Users\문소희\AppData\Local\Programs\Python\Python311\Lib\site-packages\selenium\webdriver\remote\webdriver.py", line 831, in find_element

return self.execute(Command.FIND_ELEMENT, {"using": by, "value": value})["value"]

^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^

File "C:\Users\문소희\AppData\Local\Programs\Python\Python311\Lib\site-packages\selenium\webdriver\remote\webdriver.py", line 440, in execute

self.error_handler.check_response(response)

File "C:\Users\문소희\AppData\Local\Programs\Python\Python311\Lib\site-packages\selenium\webdriver\remote\errorhandler.py", line 245, in check_response

raise exception_class(message, screen, stacktrace)

selenium.common.exceptions.InvalidSelectorException: Message: invalid selector: An invalid or illegal selector was specified

(Session info: chrome=113.0.5672.127)

Stacktrace:

Backtrace:

GetHandleVerifier [0x005C6DF3+48691]

(No symbol) [0x00558CC1]

(No symbol) [0x00465068]

(No symbol) [0x00468401]

(No symbol) [0x00469641]

(No symbol) [0x004696E0]

(No symbol) [0x004900D0]

(No symbol) [0x004906AB]

(No symbol) [0x004BDD62]

(No symbol) [0x004AA314]

(No symbol) [0x004BC452]

(No symbol) [0x004AA0C6]

(No symbol) [0x00487E18]

(No symbol) [0x00488F3D]

GetHandleVerifier [0x00824EAA+2531050]

GetHandleVerifier [0x00864B60+2792352]

GetHandleVerifier [0x0085E6EC+2766636]

GetHandleVerifier [0x00650820+612448]

(No symbol) [0x005625BC]

(No symbol) [0x0055E808]

(No symbol) [0x0055E8EB]

(No symbol) [0x00551C77]

BaseThreadInitThunk [0x754900C9+25]

RtlGetAppContainerNamedObjectPath [0x772E7B4E+286]

RtlGetAppContainerNamedObjectPath [0x772E7B1E+238]

답변 1

답변을 작성해보세요.

2

작성해주신 코드 중 일부입니다

all_posting_sel = "div[id^='mount_0_0'} > div > div > div.x9f619.x1n2onr6.x1ja2u2z > div > div > div > div.x78zum5.xdt5ytf.x10cihs4.x1t2pt76.x1n2onr6.x1ja2u2z > div.x9f619.xnz67gz.x78zum5.x168nmei.x13lgxp2.x5pf9jr.xo71vjh.x1uhb9sk.x1plvlek.xryxfnj.x1c4vz4f.x2lah0s.x1q0g3np.xqjyukv.x1qjc9v5.x1oa3qoh.x1qughib > div.xh8yej3.x1gryazu.x10o80wk.x14k21rp.x1porb0y.x17snn68.x6osk4m > section > main > article > div:nth-child(3) > div"

 

앞 부분에 div[id^='mount_0_0'} 라고 되어있는데 -> div[id^='mount_0_0'] 로 변경해주셔야합니다.

all_posting_sel = "div[id^='mount_0_0'] > div > div > div.x9f619.x1n2onr6.x1ja2u2z > div > div > div > div.x78zum5.xdt5ytf.x10cihs4.x1t2pt76.x1n2onr6.x1ja2u2z > div.x9f619.xnz67gz.x78zum5.x168nmei.x13lgxp2.x5pf9jr.xo71vjh.x1uhb9sk.x1plvlek.xryxfnj.x1c4vz4f.x2lah0s.x1q0g3np.xqjyukv.x1qjc9v5.x1oa3qoh.x1qughib > div.xh8yej3.x1gryazu.x10o80wk.x14k21rp.x1porb0y.x17snn68.x6osk4m > section > main > article > div:nth-child(3) > div"

# 이렇게 사용해보시지요 !