python - Instagram 抓取登录
问题描述
我正在从多个 Instagram 帖子中抓取评论,代码如下。起初,我不需要登录我的帐户,因为新闻已经打开,我所要做的就是关闭一个弹出窗口然后刮掉。但是运行几次后,selenium 中的浏览器要求我登录,所以我不得不在下面的代码中添加登录操作。我已经使用下面的代码成功登录,但不是打开我要抓取的帖子,而是打开我的时间线。登录后如何打开要抓取的帖子?
from selenium.webdriver.common.by import By
from selenium import webdriver
import time
import sys
import pandas as pd
from pandas import ExcelWriter
import os.path
from selenium.webdriver.common.keys import Keys
from selenium.webdriver.support.wait import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.common.action_chains import ActionChains
url=['https://www.instagram.com/p/CRLe53_hmMH','https://www.instagram.com/p/CRX7VL1sL54/?utm_medium=share_sheet',
'https://www.instagram.com/p/CRVB7ykM7-R/?utm_medium=share_sheet', 'https://www.instagram.com/p/CRQ9Bq5M6ce/?utm_medium=share_sheet',
'https://www.instagram.com/p/CRQT1BJMmSi/?utm_medium=share_sheet', 'https://www.instagram.com/p/CM8T3HgMQG0/?utm_medium=copy_link'
'https://www.instagram.com/p/COrn5fYs78O/?utm_medium=share_sheet']
user_names = []
user_comments = []
for n in url:
driver = driver = webdriver.Chrome('E:/chromedriver')
driver.get(n)
wait = WebDriverWait(driver, 10)
time.sleep(3)
#if user not logined
try:
username = WebDriverWait(driver, 10).until(EC.element_to_be_clickable((By.CSS_SELECTOR, "input[name='username']")))
password = WebDriverWait(driver, 10).until(EC.element_to_be_clickable((By.CSS_SELECTOR, "input[name='password']")))
username.clear()
username.send_keys('my_username')
password.clear()
password.send_keys('my_password')
Login_button = WebDriverWait(driver, 2).until(EC.element_to_be_clickable((By.CSS_SELECTOR, "button[type='submit']"))).click()
time.sleep(4)
not_now = WebDriverWait(driver, 10).until(EC.element_to_be_clickable((By.XPATH, '//button[contains(text(), "Not Now")]'))).click()
not_now2 = WebDriverWait(driver, 10).until(EC.element_to_be_clickable((By.XPATH, '//button[contains(text(), "Not Now")]'))).click()
not_now.click()
not_now2.click()
try:
load_more_comment = driver.find_element_by_css_selector('.MGdpg > button:nth-child(1)')
print("Found {}".format(str(load_more_comment)))
i = 0
while load_more_comment.is_displayed() and i < 10:
load_more_comment.click()
time.sleep(1.5)
load_more_comment = driver.find_element_by_css_selector('.MGdpg > button:nth-child(1)')
print("Found {}".format(str(load_more_comment)))
i += 1
except Exception as e:
print(e)
pass
except:
pass
comment = driver.find_elements_by_class_name('gElp9 ')
for c in comment:
container = c.find_element_by_class_name('C4VMK')
name = container.find_element_by_class_name('_6lAjh').text
content = container.find_element_by_tag_name('span').text
content = content.replace('\n', ' ').strip().rstrip()
user_names.append(name)
user_comments.append(content)
#print(content)
user_names.pop(0)
user_comments.pop(0)
#export(user_names, user_comments)
driver.close()
df = pd.DataFrame(list(zip(user_names, user_comments)),
columns =['Name', 'Comments'])
df.to_excel('ujicoba_gabung_IG_4.xlsx')
print(df)
解决方案
推荐阅读
- swift - :-1: 链接器命令失败,退出代码为 1(使用 -v 查看调用)
- apache-nifi - NiFi Registry 0.8.0 无法加载缓存项错误
- javascript - 单击后将数据注入文件输入更改事件
- swiftui - SwiftUI:使用matchedGeometryEffect控制视图上的zIndex
- android - 分支 IO 深层链接始终打开启动活动
- angularjs - 无法将值传递到 uibModal
- maven - Apache Atlas: curl: (7) 无法连接到 localhost 端口 21000: Connection refused
- python - 如何正确结束分配给 Telegram Telebot 的线程?
- c# - 使用 WaitForSelector 处于无头模式的 Puppeteer 有时会超时
- python - django 中的最佳批量大小是多少