python - selenium web 抓取到 excel 文件:没有数据导出到 excel 输出文件。为什么?
问题描述
from selenium import webdriver
from selenium.webdriver.common.keys import Keys
from selenium.webdriver.support.ui import Select, WebDriverWait
from selenium.webdriver.common.by import By
from selenium.common.exceptions import StaleElementReferenceException
from selenium.webdriver.support import expected_conditions as EC
from openpyxl import Workbook
import time
import pandas as pd
from pandas import ExcelWriter
# initial value for table data
total = []
result = []
#initial setting for excel file
wb = Workbook()
ws = wb.create_sheet()
filename = '/Users/sungyeon/Desktop/projects/text.xlsx'
writer = pd.ExcelWriter(filename)
#setting of crawling
driver = webdriver.Chrome('./chromedriver')
target_url = 'https://price.joinsland.joins.com/theme/index_theme.asp?sisaegbn=T05'
driver.get(target_url)
# selection of first dropbox
select1 = Select(WebDriverWait(driver, 5).until(EC.element_to_be_clickable((By.XPATH, "//select[@name='sido']"))))
select1.options[0].click()
# selection of second dropbox
select2 = Select(WebDriverWait(driver, 5).until(EC.element_to_be_clickable((By.XPATH, "//select[@name='gugun']"))))
# for loop for values of second dropbox
for i in range(0,3):
try:
select2.options[i].click()
title = select2.options[i].text
wb.create_sheet(title)
driver.implicitly_wait(5)
driver.find_element_by_class_name('btn_search').click()
# in case of stale element reference error
except StaleElementReferenceException as e:
select2 = Select(WebDriverWait(driver, 5).until(EC.element_to_be_clickable((By.XPATH, "//select[@name='gugun']"))))
select2.options[i].click()
title = select2.options[i].text
wb.create_sheet(title)
driver.find_element_by_class_name('btn_search').click()
driver.implicitly_wait(5)
# setting of table data from crawled webpage
driver.current_url
table = driver.find_element_by_class_name('tbl_box')
tbody = table.find_element_by_tag_name('tbody')
rows=tbody.find_elements_by_tag_name('tr')
# making lists of data from crawled data
for index, value in enumerate(rows):
body = value.find_elements_by_tag_name('td')
print('ok5')
for i in range(len(body)):
try:
data = body[i].text
result.append(data)
except StaleElementReferenceException as e:
body = value.find_elements_by_tag_name('td')
continue
# adding data of a row to list of final dataset
total.append(result)
# clearing temp list
result=[]
time.sleep(2)
# to create a new sheet named after the name of dropbox value
# forming dataframe from list of dataset
df = pd.DataFrame.from_records(total)
# converting to excel file into the sheet named after the name of dropbox value
df.to_excel(writer, sheet_name = title)
writer.save()
wb.save(filename=filename)
我尝试过使用 selenium 进行网络抓取。数据收集部分工作正常,但问题是我没有导出到 excel 文件的数据。文件,工作表创建做得很好,但我仍然没有在每张工作表上写入数据我猜它可能与缩进有关,但无法弄清楚。或任何提示将不胜感激!非常感谢!
解决方案
尝试通过在查找元素之前放置等待来等待元素首先出现。
select2.options[i].click()
title = select2.options[i].text
wb.create_sheet(title)
driver.implicitly_wait(5)
driver.find_element_by_class_name('btn_search').click()
推荐阅读
- javascript - 使用nodejs将文件上传到s3
- spring-boot - 带有 Vault 的 Spring Cloud Config Server - Vault 覆盖本机配置文件中的任何值
- python - 将数据框的行保存到单独的 txt 文件中
- java - 无法理解如何在下面的课程中完成自动接线
- python - 如何解析两个粘在一起的元素?
- jasper-reports - 基于另一个字段的字段求和
- java - 将 Apache HttpClient 响应拦截器与请求匹配
- date - 我在 JSON 中保存了一个日期,当我重新加载它时,它看起来几乎一样......那是什么 `m=+2.58`?
- vue.js - Vuetify 删除 v-data-table 上的分页
- c# - 如何将 wacom 数位板集成到 WPF