首页 > 解决方案 > selenium web 抓取到 excel 文件:没有数据导出到 excel 输出文件。为什么?

问题描述

from selenium import webdriver
from selenium.webdriver.common.keys import Keys
from selenium.webdriver.support.ui import Select, WebDriverWait
from selenium.webdriver.common.by import By
from selenium.common.exceptions import StaleElementReferenceException
from selenium.webdriver.support import expected_conditions as EC
from openpyxl import Workbook
import time
import pandas as pd 
from pandas import ExcelWriter

# initial value for table data 
total = []
result = []

#initial setting for excel file
wb = Workbook()
ws = wb.create_sheet()
filename = '/Users/sungyeon/Desktop/projects/text.xlsx'
writer = pd.ExcelWriter(filename)

#setting of crawling
driver = webdriver.Chrome('./chromedriver')
target_url = 'https://price.joinsland.joins.com/theme/index_theme.asp?sisaegbn=T05'
driver.get(target_url)

# selection of first dropbox
select1 = Select(WebDriverWait(driver, 5).until(EC.element_to_be_clickable((By.XPATH, "//select[@name='sido']"))))
select1.options[0].click()
# selection of second dropbox
select2 = Select(WebDriverWait(driver, 5).until(EC.element_to_be_clickable((By.XPATH, "//select[@name='gugun']"))))

# for loop for values of second dropbox
for i in range(0,3):
    try:
        select2.options[i].click()
        title = select2.options[i].text
        wb.create_sheet(title)
        driver.implicitly_wait(5)
        driver.find_element_by_class_name('btn_search').click()


# in case of stale element reference error 
    except StaleElementReferenceException as e:
        select2 = Select(WebDriverWait(driver, 5).until(EC.element_to_be_clickable((By.XPATH, "//select[@name='gugun']"))))
        select2.options[i].click()
        title = select2.options[i].text
        wb.create_sheet(title)
        driver.find_element_by_class_name('btn_search').click()
        driver.implicitly_wait(5)

# setting of table data from crawled webpage 
driver.current_url
table = driver.find_element_by_class_name('tbl_box')
tbody = table.find_element_by_tag_name('tbody')
rows=tbody.find_elements_by_tag_name('tr')

# making lists of data from crawled data    
for index, value in enumerate(rows):
    body = value.find_elements_by_tag_name('td')
    print('ok5')
    for i in range(len(body)):
        try: 
            data = body[i].text
            result.append(data)

        except StaleElementReferenceException as e:
            body = value.find_elements_by_tag_name('td')
            continue

    # adding data of a row to list of final dataset
    total.append(result)
    # clearing temp list
    result=[]
    time.sleep(2)   

# to create a new sheet named after the name of dropbox value 
# forming dataframe from list of dataset
    df = pd.DataFrame.from_records(total)
# converting to excel file into the sheet named after the name of dropbox value
    df.to_excel(writer, sheet_name = title)
writer.save()
wb.save(filename=filename)

我尝试过使用 selenium 进行网络抓取。数据收集部分工作正常,但问题是我没有导出到 excel 文件的数据。文件,工作表创建做得很好,但我仍然没有在每张工作表上写入数据我猜它可能与缩进有关,但无法弄清楚。或任何提示将不胜感激!非常感谢!

标签: pythonseleniumweb-scraping

解决方案


尝试通过在查找元素之前放置等待来等待元素首先出现。

select2.options[i].click()
title = select2.options[i].text
wb.create_sheet(title)
driver.implicitly_wait(5)
driver.find_element_by_class_name('btn_search').click()

推荐阅读