首页 > 解决方案 > 无法使用 Python 将数据写入 Excel 文件(多个选项卡)

问题描述

我不太熟悉使用 Python 以 Excel 格式写入数据,需要一些帮助才能将我的数据输出写入具有多个选项卡的单个 .xlsx (Excel) 文件。

我的代码在这里给出:

import time
import requests
import random
from lxml import html 
from bs4 import BeautifulSoup
import xlsxwriter

def write_to_file(file, mode, data, newline=None, with_tab=None):  
    with open(file, mode, encoding='utf-8') as l:
        if with_tab == True:
            data = '\t'.join(data)
        if newline == True:
            data = data+'\n'
        l.write(data)

link = ["http://ec.europa.eu/environment/ets/ohaDetails.do?returnURL=&languageCode=en&accountID=&registryCode=&buttonAction=all&action=&account.registryCode=&accountType=&identifierInReg=&accountHolder=&primaryAuthRep=&installationIdentifier=&installationName=&accountStatus=&permitIdentifier=&complianceStatus=&mainActivityType=-1&searchType=oha&resultList.currentPageNumber="+str(var)+"&nextList=Next%C2%A0%3E&selectedPeriods=" for var in range(17500)] # This will read the URL's line by line as per specific value of var.
start = 1 
end = 20 

for pagenum, links in enumerate(link[start:end]):
    print(links)
    r = requests.get(links)
    time.sleep(random.randint(2,5))
    soup = BeautifulSoup(r.content,"lxml")

# Table 2
    for items in soup.find(id="tblAccountContactInfo").find_all("tr")[:]:
        dataset = [item.get_text(strip=True) for item in items.find_all("td")[:]]
        print(dataset)

        write_to_file('Table3.tsv', 'a', dataset, with_tab=True, newline=True)
        write_to_file('Table3.tsv', 'a', links)

# Table 3
    for items in soup.find(id="tblChildDetails").find("table").find_all("tr"):

        dataset = [item.get_text(strip=True) for item in items.find_all("td")[:]]
        print(dataset)

        write_to_file('Table3.tsv', 'a', dataset, with_tab=True, newline=True)
        write_to_file('Table3.tsv', 'a', links)

        #workbook = xlsxwriter.Workbook('Table3.xlsx')
        #worksheet = workbook.add_worksheet("Table 3")
        #worksheet.write(dataset)
        #workbook.close()

我需要在多个选项卡(如表 1 选项卡和表 2 选项卡)中的 .xlsx Excel 工作表中的输出,目前我正在以 .tsv 格式获取数据。我试过 xlsxwriter 但无法得到结果,所以评论了这些行。请帮忙

标签: pythonexcelbeautifulsoupxlsxwriter

解决方案


您需要首先创建两个工作表,并跟踪要用于每个工作表的当前行。然后,一个append_row()函数可以将一行数据添加到所需的工作表中。

import time
import requests
import random
from lxml import html 
from bs4 import BeautifulSoup
import xlsxwriter


def append_row(ws, row):
    for col, value in enumerate(row):
        ws.write_string(ws.cur_row, col, value)

    ws.cur_row += 1


workbook = xlsxwriter.Workbook('output.xlsx')
ws_2 = workbook.add_worksheet("Table 2")
ws_3 = workbook.add_worksheet("Table 3")

# Keep a track of the row to use in each worksheet
ws_2.cur_row = 0    
ws_3.cur_row = 0        

start = 1 
end = 3
link = "http://ec.europa.eu/environment/ets/ohaDetails.do?returnURL=&languageCode=en&accountID=&registryCode=&buttonAction=all&action=&account.registryCode=&accountType=&identifierInReg=&accountHolder=&primaryAuthRep=&installationIdentifier=&installationName=&accountStatus=&permitIdentifier=&complianceStatus=&mainActivityType=-1&searchType=oha&resultList.currentPageNumber={}&nextList=Next%C2%A0%3E&selectedPeriods="

for page_number in range(start, end):
    print("Page {}".format(page_number))
    url = link.format(page_number)
    r = requests.get(url)

    time.sleep(random.randint(2, 5))
    soup = BeautifulSoup(r.content, "lxml")

    # Table 2
    for items in soup.find(id="tblAccountContactInfo").find_all("tr")[:]:
        dataset = [item.get_text(strip=True) for item in items.find_all("td")[:]]
        append_row(ws_2, [url] + dataset])

    # Table 3
    for items in soup.find(id="tblChildDetails").find("table").find_all("tr"):
        dataset = [item.get_text(strip=True) for item in items.find_all("td")[:]]
        append_row(ws_3, [url] + dataset])

workbook.close()    

推荐阅读