selenium - 使用 Selenium 抓取 Instagram
问题描述
我正在尝试执行此脚本。第一个问题是浏览器打开时,除了我还需要单击登录按钮之外的所有内容。最后,当目标页面打开时,我无法点击关注者链接。在这里做什么?我需要自动向下滚动到关注者页面并仅获取关注者姓名。我在这里做错了什么?
import scrapy
from selenium import webdriver
from selenium.webdriver.chrome.options import Options
from shutil import which
from selenium.webdriver.support.wait import WebDriverWait
import time
from scrapy_selenium import SeleniumRequest
from scrapy import selector
from scrapy.selector import Selector
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.common.by import By
from selenium.webdriver.common.keys import Keys
class FollowersSpider(scrapy.Spider):
name = 'followers'
def start_requests(self):
yield SeleniumRequest(
url='http://www.instagram.com/vintage.fashion_styleofny/',
wait_time=10,
callback=self.parse
)
def __init__(self):
chrome_options = Options()
chrome_options.add_argument('__headless')
#specify the path to chromedriver.exe (download and save on your computer)
chrome_path = which('chromedriver')
driver = webdriver.Chrome(executable_path=chrome_path, options=chrome_options)
driver.set_window_size(1920, 1080)
#open the webpage
driver.get('http://www.instagram.com/vintage.fashion_styleofny/')
#target username
username = WebDriverWait(driver, 10).until(EC.element_to_be_clickable((By.CSS_SELECTOR, "input[name='username']")))
password = WebDriverWait(driver, 10).until(EC.element_to_be_clickable((By.CSS_SELECTOR, "input[name='password']")))
#enter username and password
username.clear()
username.send_keys("username")
password.clear()
password.send_keys("password")
#target the login button and click it
button = WebDriverWait(driver, 10).until(EC.element_to_be_clickable((By.CSS_SELECTOR,
"button[type='submit']"))).click()
#We are logged in!
#closing pop_up
time.sleep(5)
#closing saved_login
saved_login = WebDriverWait(driver, 10).until(EC.element_to_be_clickable((By.XPATH, "//button[contains(text(), 'Not Now')]")))
saved_login.click()
#closing pop up
pop_up = WebDriverWait(driver, 10).until(EC.element_to_be_clickable((By.XPATH, "//button[contains(text(), 'Not Now')]")))
pop_up.click()
#click the followers link
click_follower = WebDriverWait(driver, 10).until(EC.element_to_be_clickable((By.xpath, "//a[@href='/vintage.fashion_styleofny/followers/']")))
click_follower.click()
driver.execute_script("window.scrollTo(0, 4000);")
driver.close()
def parse(self, response):
pass
解决方案
推荐阅读
- java - 使用 JUnit 和 Mockito 对 DAO 类进行单元测试
- reactjs - ReactJS:从 api print 获取不仅仅是一次
- java - 如何等到 JavaRx2 Flowable 完成所有任务?
- c# - 使用 EF Core 枚举属性进行映射
- docker - 无法登录弹性搜索
- angular - Angular 7 Universal 的元标记未及时更新以供爬虫使用
- html - 如何将元素固定在水平可滚动的 div 中
- c# - 如何创建 SHA-2 和 AES 加密字符串以在 .NET 的 HTTP 请求标头中使用?
- python - 使用 scipy.signal 方法强制数据类型
- ruby-on-rails - 模型中两个日期之间的 Rails 范围