python - 无法使用 Python 将数据写入 Excel 文件(多个选项卡)
问题描述
我不太熟悉使用 Python 以 Excel 格式写入数据,需要一些帮助才能将我的数据输出写入具有多个选项卡的单个 .xlsx (Excel) 文件。
我的代码在这里给出:
import time
import requests
import random
from lxml import html
from bs4 import BeautifulSoup
import xlsxwriter
def write_to_file(file, mode, data, newline=None, with_tab=None):
with open(file, mode, encoding='utf-8') as l:
if with_tab == True:
data = '\t'.join(data)
if newline == True:
data = data+'\n'
l.write(data)
link = ["http://ec.europa.eu/environment/ets/ohaDetails.do?returnURL=&languageCode=en&accountID=®istryCode=&buttonAction=all&action=&account.registryCode=&accountType=&identifierInReg=&accountHolder=&primaryAuthRep=&installationIdentifier=&installationName=&accountStatus=&permitIdentifier=&complianceStatus=&mainActivityType=-1&searchType=oha&resultList.currentPageNumber="+str(var)+"&nextList=Next%C2%A0%3E&selectedPeriods=" for var in range(17500)] # This will read the URL's line by line as per specific value of var.
start = 1
end = 20
for pagenum, links in enumerate(link[start:end]):
print(links)
r = requests.get(links)
time.sleep(random.randint(2,5))
soup = BeautifulSoup(r.content,"lxml")
# Table 2
for items in soup.find(id="tblAccountContactInfo").find_all("tr")[:]:
dataset = [item.get_text(strip=True) for item in items.find_all("td")[:]]
print(dataset)
write_to_file('Table3.tsv', 'a', dataset, with_tab=True, newline=True)
write_to_file('Table3.tsv', 'a', links)
# Table 3
for items in soup.find(id="tblChildDetails").find("table").find_all("tr"):
dataset = [item.get_text(strip=True) for item in items.find_all("td")[:]]
print(dataset)
write_to_file('Table3.tsv', 'a', dataset, with_tab=True, newline=True)
write_to_file('Table3.tsv', 'a', links)
#workbook = xlsxwriter.Workbook('Table3.xlsx')
#worksheet = workbook.add_worksheet("Table 3")
#worksheet.write(dataset)
#workbook.close()
我需要在多个选项卡(如表 1 选项卡和表 2 选项卡)中的 .xlsx Excel 工作表中的输出,目前我正在以 .tsv 格式获取数据。我试过 xlsxwriter 但无法得到结果,所以评论了这些行。请帮忙
解决方案
您需要首先创建两个工作表,并跟踪要用于每个工作表的当前行。然后,一个append_row()
函数可以将一行数据添加到所需的工作表中。
import time
import requests
import random
from lxml import html
from bs4 import BeautifulSoup
import xlsxwriter
def append_row(ws, row):
for col, value in enumerate(row):
ws.write_string(ws.cur_row, col, value)
ws.cur_row += 1
workbook = xlsxwriter.Workbook('output.xlsx')
ws_2 = workbook.add_worksheet("Table 2")
ws_3 = workbook.add_worksheet("Table 3")
# Keep a track of the row to use in each worksheet
ws_2.cur_row = 0
ws_3.cur_row = 0
start = 1
end = 3
link = "http://ec.europa.eu/environment/ets/ohaDetails.do?returnURL=&languageCode=en&accountID=®istryCode=&buttonAction=all&action=&account.registryCode=&accountType=&identifierInReg=&accountHolder=&primaryAuthRep=&installationIdentifier=&installationName=&accountStatus=&permitIdentifier=&complianceStatus=&mainActivityType=-1&searchType=oha&resultList.currentPageNumber={}&nextList=Next%C2%A0%3E&selectedPeriods="
for page_number in range(start, end):
print("Page {}".format(page_number))
url = link.format(page_number)
r = requests.get(url)
time.sleep(random.randint(2, 5))
soup = BeautifulSoup(r.content, "lxml")
# Table 2
for items in soup.find(id="tblAccountContactInfo").find_all("tr")[:]:
dataset = [item.get_text(strip=True) for item in items.find_all("td")[:]]
append_row(ws_2, [url] + dataset])
# Table 3
for items in soup.find(id="tblChildDetails").find("table").find_all("tr"):
dataset = [item.get_text(strip=True) for item in items.find_all("td")[:]]
append_row(ws_3, [url] + dataset])
workbook.close()
推荐阅读
- javascript - TicTacToe MinMax 算法不返回最佳移动
- ruby-on-rails-5 - Dropbox 目录重命名导致资产预编译时出现“URI::InvalidURIError: bad URI”
- mysql - 我不能使用 INTERSECT 运算符
- node.js - 更新一个字段,取决于另一个带有猫鼬的字段
- java - 主启动类无效,缺少主方法
- swift - 使用 Alamofire 5 上传文件到 RocketChat 服务器
- python - 与bash md5sum相比,Python hashlib md5输出不同且速度慢?
- c# - 清理帖子条目!ASP.NET API
- javascript - 如何从 HTML 获取信息到 Javascript 但已连接到 API
- azure-cosmosdb - 如何通过 SDK 或 Select QUERY 在 CosmosBD 中使用分区键