python - 如何编辑此代码以在循环内而不是在结束时导入 CSV?
问题描述
我有一个使用熊猫创建数据框的代码,然后将结果输出到 csv。我想要它做的是在 csv 中导入循环的每次迭代,这样如果发生错误,例如连接丢失,我仍然有一些结果。
import requests
from googlesearch import search
import csv
import pandas
from bs4 import BeautifulSoup
import numpy as np
import os
from datetime import datetime
import time
import os
start_time = time.time()
emptyWebPageSet = []
emptySetTitle = []
emptysetGenre = []
infoSet = []
date = []
colnames = ['title']
data = pandas.read_csv('D:/Desktop/imdbWebScrape/mediaDataForGenreScrape.csv', names=colnames, header=None)
my_list = data["title"]
my_list = list(my_list)
my_list = my_list[1:]
length = len(my_list)
for film in my_list:
filmIndex = my_list.index(film) + 1
query = film + " imdb"
for j in search(query, tld="co.in", num=10, stop=1, pause=2):
page = requests.get(j)
response = page.status_code
if response == 200:
soup = BeautifulSoup(page.content, "lxml")
genreData = soup.find_all("div",{"class":"subtext"})
summaryText = soup.find("div", {"class":"summary_text"})
summaryText = summaryText.string
infoSet.append(summaryText)
filmtitle = soup.find("h1")
filmtitle = filmtitle.contents[0].strip()
emptySetTitle.append(filmtitle)
links = []
genres = []
for h in genreData:
a = h.find_all('a')
aLength = len(a)
a1 = a[0]
for b in range(0,aLength - 1):
r = a[b].string
genres.append(r)
print (str(filmIndex) + " " + str(filmtitle))
emptysetGenre.append(genres)
emptyWebPageSet.append(j)
lst1 = [item[0] for item in emptysetGenre]
lst2 = [i[1] if len(i) > 1 else '' for i in emptysetGenre]
df = pandas.DataFrame({"imdbPage": emptyWebPageSet,
"title": emptySetTitle,
"genre1": lst1,
"info":infoSet
})
df.to_csv("movieDetails.csv", encoding='utf-8', index=False)
解决方案
只打算最后一部分。
import requests
from googlesearch import search
import csv
import pandas
from bs4 import BeautifulSoup
import numpy as np
import os
from datetime import datetime
import time
import os
start_time = time.time()
emptyWebPageSet = []
emptySetTitle = []
emptysetGenre = []
infoSet = []
date = []
colnames = ['title']
data = pandas.read_csv('D:/Desktop/imdbWebScrape/mediaDataForGenreScrape.csv', names=colnames, header=None)
my_list = data["title"]
my_list = list(my_list)
my_list = my_list[1:]
length = len(my_list)
for film in my_list:
filmIndex = my_list.index(film) + 1
query = film + " imdb"
for j in search(query, tld="co.in", num=10, stop=1, pause=2):
page = requests.get(j)
response = page.status_code
if response == 200:
soup = BeautifulSoup(page.content, "lxml")
genreData = soup.find_all("div",{"class":"subtext"})
summaryText = soup.find("div", {"class":"summary_text"})
summaryText = summaryText.string
infoSet.append(summaryText)
filmtitle = soup.find("h1")
filmtitle = filmtitle.contents[0].strip()
emptySetTitle.append(filmtitle)
links = []
genres = []
for h in genreData:
a = h.find_all('a')
aLength = len(a)
a1 = a[0]
for b in range(0,aLength - 1):
r = a[b].string
genres.append(r)
print (str(filmIndex) + " " + str(filmtitle))
emptysetGenre.append(genres)
emptyWebPageSet.append(j)
lst1 = [item[0] for item in emptysetGenre]
lst2 = [i[1] if len(i) > 1 else '' for i in emptysetGenre]
df = pandas.DataFrame({"imdbPage": emptyWebPageSet,
"title": emptySetTitle,
"genre1": lst1,
"info":infoSet
})
df.to_csv("movieDetails.csv", encoding='utf-8', index=False)
推荐阅读
- java - .getSelectedItem() 在组合框中不起作用
- drupal - Drupal 8 背后的 Kong
- javascript - 为什么JS函数不起作用?
- c++ - 复制(显式)初始化与直接(隐式)初始化 C++
- nuxt.js - Nuxt.js 不接受问题报告?
- asp.net-core - 发布依赖于第三方 dll 的框架
- javascript - 如何使用 ajax 在同一个 servlet 中实现 doPost() 和 doGet() 请求?
- reactjs - onClick 循环匹配 API 属性/产品 - ReactJS
- c# - 使用代码将材质应用于 Unity 中的所有其他对象
- javascript - 为什么 MobX 不能用作状态容器?