首页 > 解决方案 > 在 Selenium python 中抓取关注者列表

问题描述

import time
import random
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.common.action_chains import ActionChains
from selenium.webdriver.common.keys import Keys
PATH = r"C:\Users\welcome\chromedriver.exe"
driver = webdriver.Chrome(PATH)

driver.get("https://www.instagram.com/")
WebDriverWait(driver, 10).until(EC.element_to_be_clickable((By.XPATH, "//input[@name='username']"))).send_keys("username") #replace with your username
driver.find_element_by_xpath("//input[@name='password']").send_keys("some_password") #replace with your password

driver.find_element_by_xpath("//button/div[text()='Log In']").click()

#driver.find_element_by_class_name("_2dbep qNELH").click()

element = WebDriverWait(driver, 10).until(
                EC.presence_of_element_located((By.CLASS_NAME, "qNELH"))
            )
element.click()

driver.find_element_by_xpath("//div[text()='Profile']").click()
driver.implicitly_wait(5)
driver.find_element_by_xpath("//a[text() = ' followers']").click()
driver.implicitly_wait(3)


FList = driver.find_element_by_css_selector('div[role=\'dialog\'] ul')
numberOfFollowersInList = len(FList.find_elements_by_css_selector('li'))
FList.click()
divL=driver.find_element_by_class_name("eiUFA")
actionChain = ActionChains(driver)
time.sleep(random.randint(2,4))

while (numberOfFollowersInList < 290):      
        actionChain.key_down(Keys.PAGE_DOWN,element=FList).key_up(Keys.PAGE_DOWN,element=FList).perform()        
        numberOfFollowersInList = len(FList.find_elements_by_css_selector('li'))
        time.sleep(0.4)
        print(numberOfFollowersInList)
        actionChain.key_down(Keys.PAGE_DOWN).key_up(Keys.PAGE_DOWN).perform()
        divL.click()
        time.sleep(2)

执行此脚本时,页面会滚动几次,但单击关注者的链接之一并转到他/她的页面。有人,请建议更改代码以不单击链接。

标签: pythonseleniumselenium-webdriverweb-scraping

解决方案


import random
from selenium import webdriver
from selenium.webdriver.common.keys import Keys
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
import time
from selenium.webdriver.common.action_chains import ActionChains

driver = webdriver.Firefox()


time.sleep(5)

driver.get("https://www.instagram.com/")
WebDriverWait(driver, 10).until(EC.element_to_be_clickable(
    (By.XPATH, "//input[@name='username']"))).send_keys("email")  # replace with your username
# replace with your password
driver.find_element_by_xpath(
    "//input[@name='password']").send_keys("pasword")

driver.find_element_by_xpath("//button/div[text()='Log In']").click()

#driver.find_element_by_class_name("_2dbep qNELH").click()

element = WebDriverWait(driver, 10).until(
    EC.presence_of_element_located((By.CSS_SELECTOR, "span._2dbep.qNELH"))
)
element.click()

driver.find_element_by_xpath("//div[text()='Profile']").click()
driver.implicitly_wait(5)
driver.find_element_by_xpath("//a[text() = ' followers']").click()
driver.implicitly_wait(3)


FList = driver.find_element_by_css_selector('div[role=\'dialog\'] ul')
numberOfFollowersInList = len(FList.find_elements_by_css_selector('li'))
FList.click()
divL = driver.find_element_by_class_name("eiUFA")
actionChain = ActionChains(driver)
time.sleep(random.randint(2, 4))
frame = driver.find_element_by_css_selector("[class='isgrP']")
while (numberOfFollowersInList < 290):
    actionChain.key_down(Keys.PAGE_DOWN, element=FList).key_up(
        Keys.PAGE_DOWN, element=FList).perform()
    numberOfFollowersInList = len(
        FList.find_elements_by_css_selector('li'))
    time.sleep(0.4)
    print(numberOfFollowersInList)
    frame.send_keys(Keys.PAGE_DOWN)
    time.sleep(2)

您可以将 page_down 击键发送到 driver.find_element_by_css_selector("[class='isgrP']") (我的代码中的标识符框架)

我不确定为什么 actionchain 点击元素


推荐阅读