首页 > 解决方案 > 使用 Selenium 抓取 Instagram

问题描述

我正在尝试执行此脚本。第一个问题是浏览器打开时,除了我还需要单击登录按钮之外的所有内容。最后,当目标页面打开时,我无法点击关注者链接。在这里做什么?我需要自动向下滚动到关注者页面并仅获取关注者姓名。我在这里做错了什么?

import scrapy
from selenium import webdriver
from selenium.webdriver.chrome.options import Options
from shutil import which
from selenium.webdriver.support.wait import WebDriverWait
import time
from scrapy_selenium import SeleniumRequest
from scrapy import selector
from scrapy.selector import Selector
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.common.by import By
from selenium.webdriver.common.keys import Keys



class FollowersSpider(scrapy.Spider):
    name = 'followers'

    def start_requests(self):
        yield SeleniumRequest(
            url='http://www.instagram.com/vintage.fashion_styleofny/',
            wait_time=10,
            callback=self.parse
        )

    def __init__(self):
       chrome_options = Options()
       chrome_options.add_argument('__headless')

       #specify the path to chromedriver.exe (download and save on your computer)
       chrome_path = which('chromedriver')

       driver = webdriver.Chrome(executable_path=chrome_path, options=chrome_options)
       driver.set_window_size(1920, 1080)

       #open the webpage
       driver.get('http://www.instagram.com/vintage.fashion_styleofny/')

       #target username
       username = WebDriverWait(driver, 10).until(EC.element_to_be_clickable((By.CSS_SELECTOR, "input[name='username']")))
       password = WebDriverWait(driver, 10).until(EC.element_to_be_clickable((By.CSS_SELECTOR, "input[name='password']")))

       #enter username and password
       username.clear()
       username.send_keys("username")
       password.clear()
       password.send_keys("password")

       #target the login button and click it
       button = WebDriverWait(driver, 10).until(EC.element_to_be_clickable((By.CSS_SELECTOR, 
       "button[type='submit']"))).click()
       #We are logged in!

       #closing pop_up
       time.sleep(5)

       #closing saved_login
       saved_login = WebDriverWait(driver, 10).until(EC.element_to_be_clickable((By.XPATH, "//button[contains(text(), 'Not Now')]")))
       saved_login.click()


       #closing pop up
       pop_up = WebDriverWait(driver, 10).until(EC.element_to_be_clickable((By.XPATH, "//button[contains(text(), 'Not Now')]")))
       pop_up.click()


       #click the followers link
       click_follower = WebDriverWait(driver, 10).until(EC.element_to_be_clickable((By.xpath, "//a[@href='/vintage.fashion_styleofny/followers/']")))
       click_follower.click()

       driver.execute_script("window.scrollTo(0, 4000);")

       driver.close()

   def parse(self, response):
       pass

标签: seleniumselenium-webdriverscrapyselenium-chromedriver

解决方案


推荐阅读