首页 > 解决方案 > 尝试使用链接从 .csv 中抓取 Instagram 帖子数据 - 硕士论文

问题描述

我正在尝试从 .csv 中的链接集合中抓取 instagram 帖子数据(点赞数、标题、标签、提及和评论数),以进行数据分析,以用于我的硕士论文。但是,我遇到了一个无法找到xpathor的错误。element这是错误消息:

selenium.common.exceptions.NoSuchElementException:消息:没有这样的元素:无法找到元素:{"method":"xpath","selector":"//*[@id="react-root"]/section/main /div/div/article/div[2]/section[2]/div/div/button"}

这是我使用 selenium 编写的代码块:

def scrape_post_data():
influencerpostsdata = []
# Specify the path to chromedriver.exe
chromedriver_path = r"C:\\Users\\stuar\\Instagram Scraper\\ChromeDrivers\chromedriver.exe"
driver = webdriver.Chrome(executable_path=chromedriver_path)
time.sleep(2)
    
# Open the webpage
url = "https://www.instagram.com"
driver.get(url)
time.sleep(3)
    
# Alert number 1
time.sleep(5)
alert = WebDriverWait(driver, 15).until(EC.element_to_be_clickable((By.XPATH, '//button[contains(text(), "Accept All")]'))).click()
    
# Target Username Entry
username = WebDriverWait(driver, 10).until(EC.element_to_be_clickable((By.CSS_SELECTOR, "input[name='username']")))
password = WebDriverWait(driver, 10).until(EC.element_to_be_clickable((By.CSS_SELECTOR, "input[name='password']")))
    
# Enter Username and Password
login_username = str(enter_loginusername_entry.get())
login_password = str(enter_password_entry.get())
username.clear()
username.send_keys(login_username)
password.clear()
password.send_keys(login_password)
button = WebDriverWait(driver, 2).until(EC.element_to_be_clickable((By.CSS_SELECTOR, "button[type='submit']"))).click()
    
# Alert number 2
time.sleep(5)
alert2 = WebDriverWait(driver, 15).until(EC.element_to_be_clickable((By.XPATH, '//button[contains(text(), "Not Now")]'))).click()
    
# Alert number 3
time.sleep(5)
alert3 = WebDriverWait(driver, 15).until(EC.element_to_be_clickable((By.XPATH, '//button[contains(text(), "Not Now")]'))).click()    
       
with open(r"C:\\Users\\stuar\\Instagram Scraper\\SourceCode/influencerpostlinks1.csv",'r') as csv_file:
csv_reading = csv.reader(csv_file)

for line in csv_reading:
links = line[1]
try:
Page = driver.get(links)
except Exception as e:
Page = None
time.sleep(20)

try:
# This captures the standard like count.
likes = driver.find_element_by_xpath("""//*[@id="react-root"]/section/main/div/div/article/div[2]/section[2]/div/div/button""").text.split()[0]
post_type = 'photo'
except:
# This captures the like count for videos which is stored
likes = driver.find_element_by_xpath("""//*[@id="react-root"]/section/main/div/div/article/div[2]/section[2]/div/span""").text.split()[0]
post_type = 'video'
age = driver.find_element_by_css_selector('a time').text
comment = driver.find_element_by_xpath("""//*[@id="react-root"]/section/main/div/div/article/div[2]/div[1]/ul/div/li/div/div/div[2]/span""").text
hashtags = find_hashtags(comment)
mentions = find_mentions(comment)
post_details = {'link': url, 'type': post_type, 'likes/views': likes,
'age': age, 'comment': comment, 'hashtags': hashtags,
'mentions': mentions}
time.sleep(10)
#turning data into a .csv file
influencerpostsdata.append(post_details)
df = pd.DataFrame(influencerposts)
print(df)
df.to_csv('influencerpostsdata.csv')
            
driver.close() 

标签: pythonhtmlweb-scrapingxpathinstagram

解决方案


不用担心我已经解决了问题..

 with open(r"C:\\Users\\stuar\\Instagram Scraper\\SourceCode/influencerpostlinks1.csv",'r') as csv_file:
    csv_reading = csv.reader(csv_file)

    for line in csv_reading:
        links = line[1]
        try:
            Page = driver.get(links)
        except Exception as e:
            Page = None
            time.sleep(20)

        try:    
            likes = driver.find_element_by_xpath('/html/body/div[1]/section/main/div/div[1]/article/div[3]/section[2]/div/div/a/span')
        except Exception as e:
            likes = None
        try:
            likes2 = likes.text
        except Exception as e:
            likes2 = None
            time.sleep(20)    
        try:    
            age = driver.find_element_by_xpath('/html/body/div[1]/section/main/div/div[1]/article/div[3]/div[2]/a/time')
        except Exception as e:
            age = None
        try:
            age2 = age.text
        except Exception as e:
            age2 = None
            time.sleep(20)  
            
        try:    
            caption = driver.find_element_by_xpath('/html/body/div[1]/section/main/div/div[1]/article/div[3]/div[1]/ul/div/li/div/div/div[2]/span')    
        except Exception as e:
            caption = None
        try:
            caption2 = caption.text
        except Exception as e:
            caption2 = None
            time.sleep(20)   
            
        try:
            AccountName = driver.find_element_by_xpath('/html/body/div[1]/section/main/div/div[1]/article/header/div[2]/div[1]/div/span/a')
        except Exception as e:
            AccountName = None
        try:
            AccountName2 = AccountName.text
        except Exception as e:
            AccountName2 = None
            time.sleep(20)
            
        post_details = {'Username': AccountName2,'Caption': caption2, 'Likes/Views': likes2,
                        'Age': age2 }
        

        #turning data into a .csv file
        influencerpostsdata.append(post_details)
        df = pd.DataFrame(influencerpostsdata)
        print(df)
        df.to_csv('influencerpostsdata.csv')
        
driver.close() 

推荐阅读