首页 > 解决方案 > Instagram 抓取登录

问题描述

我正在从多个 Instagram 帖子中抓取评论,代码如下。起初,我不需要登录我的帐户,因为新闻已经打开,我所要做的就是关闭一个弹出窗口然后刮掉。但是运行几次后,selenium 中的浏览器要求我登录,所以我不得不在下面的代码中添加登录操作。我已经使用下面的代码成功登录,但不是打开我要抓取的帖子,而是打开我的时间线。登录后如何打开要抓取的帖子?

from selenium.webdriver.common.by import By
from selenium import webdriver
import time
import sys
import pandas as pd
from pandas import ExcelWriter
import os.path
from selenium.webdriver.common.keys import Keys
from selenium.webdriver.support.wait import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.common.action_chains import ActionChains



url=['https://www.instagram.com/p/CRLe53_hmMH','https://www.instagram.com/p/CRX7VL1sL54/?utm_medium=share_sheet',
     'https://www.instagram.com/p/CRVB7ykM7-R/?utm_medium=share_sheet', 'https://www.instagram.com/p/CRQ9Bq5M6ce/?utm_medium=share_sheet',
     'https://www.instagram.com/p/CRQT1BJMmSi/?utm_medium=share_sheet', 'https://www.instagram.com/p/CM8T3HgMQG0/?utm_medium=copy_link'
     'https://www.instagram.com/p/COrn5fYs78O/?utm_medium=share_sheet']
user_names = []
user_comments = []

for n in url:
    driver = driver = webdriver.Chrome('E:/chromedriver')
    driver.get(n)
    wait = WebDriverWait(driver, 10)


    time.sleep(3)
    
    #if user not logined
    try:
        username = WebDriverWait(driver, 10).until(EC.element_to_be_clickable((By.CSS_SELECTOR, "input[name='username']")))
        password = WebDriverWait(driver, 10).until(EC.element_to_be_clickable((By.CSS_SELECTOR, "input[name='password']")))
        username.clear()
        username.send_keys('my_username')
        password.clear()
        password.send_keys('my_password')
        Login_button = WebDriverWait(driver, 2).until(EC.element_to_be_clickable((By.CSS_SELECTOR, "button[type='submit']"))).click()
        
        time.sleep(4)
        not_now = WebDriverWait(driver, 10).until(EC.element_to_be_clickable((By.XPATH, '//button[contains(text(), "Not Now")]'))).click()
        not_now2 = WebDriverWait(driver, 10).until(EC.element_to_be_clickable((By.XPATH, '//button[contains(text(), "Not Now")]'))).click()
        not_now.click()
        not_now2.click()
        
        
        try:
            
            load_more_comment = driver.find_element_by_css_selector('.MGdpg > button:nth-child(1)')
            print("Found {}".format(str(load_more_comment)))
            i = 0
            while load_more_comment.is_displayed() and i < 10:
                load_more_comment.click()
                time.sleep(1.5)
                load_more_comment = driver.find_element_by_css_selector('.MGdpg > button:nth-child(1)')
                print("Found {}".format(str(load_more_comment)))
                i += 1
        except Exception as e:
            print(e)
            pass
    except:
        pass
    
    

    

    comment = driver.find_elements_by_class_name('gElp9 ')
    for c in comment:
        container = c.find_element_by_class_name('C4VMK')
        name = container.find_element_by_class_name('_6lAjh').text
        content = container.find_element_by_tag_name('span').text
        content = content.replace('\n', ' ').strip().rstrip()
        user_names.append(name)
        user_comments.append(content)
        #print(content)
    
    
    user_names.pop(0)
    user_comments.pop(0)
    
    
    #export(user_names, user_comments)
    
    driver.close()
df = pd.DataFrame(list(zip(user_names, user_comments)),
               columns =['Name', 'Comments'])
df.to_excel('ujicoba_gabung_IG_4.xlsx')
print(df)

标签: pythonseleniumselenium-webdriver

解决方案


推荐阅读