python - 尝试使用链接从 .csv 中抓取 Instagram 帖子数据 - 硕士论文
问题描述
我正在尝试从 .csv 中的链接集合中抓取 instagram 帖子数据(点赞数、标题、标签、提及和评论数),以进行数据分析,以用于我的硕士论文。但是,我遇到了一个无法找到xpath
or的错误。element
这是错误消息:
selenium.common.exceptions.NoSuchElementException:消息:没有这样的元素:无法找到元素:{"method":"xpath","selector":"//*[@id="react-root"]/section/main /div/div/article/div[2]/section[2]/div/div/button"}
这是我使用 selenium 编写的代码块:
def scrape_post_data():
influencerpostsdata = []
# Specify the path to chromedriver.exe
chromedriver_path = r"C:\\Users\\stuar\\Instagram Scraper\\ChromeDrivers\chromedriver.exe"
driver = webdriver.Chrome(executable_path=chromedriver_path)
time.sleep(2)
# Open the webpage
url = "https://www.instagram.com"
driver.get(url)
time.sleep(3)
# Alert number 1
time.sleep(5)
alert = WebDriverWait(driver, 15).until(EC.element_to_be_clickable((By.XPATH, '//button[contains(text(), "Accept All")]'))).click()
# Target Username Entry
username = WebDriverWait(driver, 10).until(EC.element_to_be_clickable((By.CSS_SELECTOR, "input[name='username']")))
password = WebDriverWait(driver, 10).until(EC.element_to_be_clickable((By.CSS_SELECTOR, "input[name='password']")))
# Enter Username and Password
login_username = str(enter_loginusername_entry.get())
login_password = str(enter_password_entry.get())
username.clear()
username.send_keys(login_username)
password.clear()
password.send_keys(login_password)
button = WebDriverWait(driver, 2).until(EC.element_to_be_clickable((By.CSS_SELECTOR, "button[type='submit']"))).click()
# Alert number 2
time.sleep(5)
alert2 = WebDriverWait(driver, 15).until(EC.element_to_be_clickable((By.XPATH, '//button[contains(text(), "Not Now")]'))).click()
# Alert number 3
time.sleep(5)
alert3 = WebDriverWait(driver, 15).until(EC.element_to_be_clickable((By.XPATH, '//button[contains(text(), "Not Now")]'))).click()
with open(r"C:\\Users\\stuar\\Instagram Scraper\\SourceCode/influencerpostlinks1.csv",'r') as csv_file:
csv_reading = csv.reader(csv_file)
for line in csv_reading:
links = line[1]
try:
Page = driver.get(links)
except Exception as e:
Page = None
time.sleep(20)
try:
# This captures the standard like count.
likes = driver.find_element_by_xpath("""//*[@id="react-root"]/section/main/div/div/article/div[2]/section[2]/div/div/button""").text.split()[0]
post_type = 'photo'
except:
# This captures the like count for videos which is stored
likes = driver.find_element_by_xpath("""//*[@id="react-root"]/section/main/div/div/article/div[2]/section[2]/div/span""").text.split()[0]
post_type = 'video'
age = driver.find_element_by_css_selector('a time').text
comment = driver.find_element_by_xpath("""//*[@id="react-root"]/section/main/div/div/article/div[2]/div[1]/ul/div/li/div/div/div[2]/span""").text
hashtags = find_hashtags(comment)
mentions = find_mentions(comment)
post_details = {'link': url, 'type': post_type, 'likes/views': likes,
'age': age, 'comment': comment, 'hashtags': hashtags,
'mentions': mentions}
time.sleep(10)
#turning data into a .csv file
influencerpostsdata.append(post_details)
df = pd.DataFrame(influencerposts)
print(df)
df.to_csv('influencerpostsdata.csv')
driver.close()
解决方案
不用担心我已经解决了问题..
with open(r"C:\\Users\\stuar\\Instagram Scraper\\SourceCode/influencerpostlinks1.csv",'r') as csv_file:
csv_reading = csv.reader(csv_file)
for line in csv_reading:
links = line[1]
try:
Page = driver.get(links)
except Exception as e:
Page = None
time.sleep(20)
try:
likes = driver.find_element_by_xpath('/html/body/div[1]/section/main/div/div[1]/article/div[3]/section[2]/div/div/a/span')
except Exception as e:
likes = None
try:
likes2 = likes.text
except Exception as e:
likes2 = None
time.sleep(20)
try:
age = driver.find_element_by_xpath('/html/body/div[1]/section/main/div/div[1]/article/div[3]/div[2]/a/time')
except Exception as e:
age = None
try:
age2 = age.text
except Exception as e:
age2 = None
time.sleep(20)
try:
caption = driver.find_element_by_xpath('/html/body/div[1]/section/main/div/div[1]/article/div[3]/div[1]/ul/div/li/div/div/div[2]/span')
except Exception as e:
caption = None
try:
caption2 = caption.text
except Exception as e:
caption2 = None
time.sleep(20)
try:
AccountName = driver.find_element_by_xpath('/html/body/div[1]/section/main/div/div[1]/article/header/div[2]/div[1]/div/span/a')
except Exception as e:
AccountName = None
try:
AccountName2 = AccountName.text
except Exception as e:
AccountName2 = None
time.sleep(20)
post_details = {'Username': AccountName2,'Caption': caption2, 'Likes/Views': likes2,
'Age': age2 }
#turning data into a .csv file
influencerpostsdata.append(post_details)
df = pd.DataFrame(influencerpostsdata)
print(df)
df.to_csv('influencerpostsdata.csv')
driver.close()
推荐阅读
- python-3.x - 有没有办法遍历多个目录?
- augmented-reality - Reality Composer – 如何永远旋转对象?
- c - 如何从 .iso 中的同一文件运行 .bin 文件
- python - pygresql copy_from 给出关系不存在错误
- python - 从图像 PyTesseract 中读取数字
- reactjs - 如何使用 gatsby 将值传递到 GraphQL 查询并做出反应?
- pandas - 如何按类别绘制一维散点图/或在箱线图上添加点
- c# - 从现有数组的一部分创建新数组的好方法是什么?
- google-api - 尝试在 Google Calendar API 中插入 ACL 时出现“invalidEmail”错误
- sql - PostgreSQL:从多对多关系中检索数据