首页 > 解决方案 > 如何编辑此代码以在循环内而不是在结束时导入 CSV?

问题描述

我有一个使用熊猫创建数据框的代码,然后将结果输出到 csv。我想要它做的是在 csv 中导入循环的每次迭代,这样如果发生错误,例如连接丢失,我仍然有一些结果。

import requests
from googlesearch import search 
import csv
import pandas
from bs4 import BeautifulSoup
import numpy as np
import os
from datetime import datetime
import time
import os


start_time = time.time()
emptyWebPageSet = []
emptySetTitle = []
emptysetGenre = []
infoSet = []
date = []
colnames = ['title']
data = pandas.read_csv('D:/Desktop/imdbWebScrape/mediaDataForGenreScrape.csv', names=colnames, header=None)
my_list = data["title"]
my_list = list(my_list)
my_list = my_list[1:]
length = len(my_list)
for film in my_list:
    filmIndex = my_list.index(film) + 1
    query = film + " imdb"
    for j in search(query, tld="co.in", num=10, stop=1, pause=2):

        page = requests.get(j)
        response = page.status_code
        if response == 200:
            soup = BeautifulSoup(page.content, "lxml")
            genreData = soup.find_all("div",{"class":"subtext"})
            summaryText = soup.find("div", {"class":"summary_text"})
            summaryText = summaryText.string
            infoSet.append(summaryText)
            filmtitle = soup.find("h1")
            filmtitle = filmtitle.contents[0].strip() 
            emptySetTitle.append(filmtitle)
            links = []
            genres = []
            for h in genreData:
                a = h.find_all('a')
                aLength = len(a)
                a1 = a[0]
                for b in range(0,aLength - 1):
                    r = a[b].string
                    genres.append(r)
                print (str(filmIndex) + " " + str(filmtitle))
                emptysetGenre.append(genres)
        emptyWebPageSet.append(j)
lst1 = [item[0] for item in emptysetGenre]
lst2 = [i[1] if len(i) > 1 else '' for i in emptysetGenre]
df = pandas.DataFrame({"imdbPage": emptyWebPageSet,
                       "title": emptySetTitle,
                       "genre1": lst1,
                       "info":infoSet
                       })
df.to_csv("movieDetails.csv", encoding='utf-8', index=False)

标签: python

解决方案


只打算最后一部分。

import requests
from googlesearch import search 
import csv
import pandas
from bs4 import BeautifulSoup
import numpy as np
import os
from datetime import datetime
import time
import os


start_time = time.time()
emptyWebPageSet = []
emptySetTitle = []
emptysetGenre = []
infoSet = []
date = []
colnames = ['title']
data = pandas.read_csv('D:/Desktop/imdbWebScrape/mediaDataForGenreScrape.csv', names=colnames, header=None)
my_list = data["title"]
my_list = list(my_list)
my_list = my_list[1:]
length = len(my_list)
for film in my_list:
    filmIndex = my_list.index(film) + 1
    query = film + " imdb"
    for j in search(query, tld="co.in", num=10, stop=1, pause=2):

        page = requests.get(j)
        response = page.status_code
        if response == 200:
            soup = BeautifulSoup(page.content, "lxml")
            genreData = soup.find_all("div",{"class":"subtext"})
            summaryText = soup.find("div", {"class":"summary_text"})
            summaryText = summaryText.string
            infoSet.append(summaryText)
            filmtitle = soup.find("h1")
            filmtitle = filmtitle.contents[0].strip() 
            emptySetTitle.append(filmtitle)
            links = []
            genres = []
            for h in genreData:
                a = h.find_all('a')
                aLength = len(a)
                a1 = a[0]
                for b in range(0,aLength - 1):
                    r = a[b].string
                    genres.append(r)
                print (str(filmIndex) + " " + str(filmtitle))
                emptysetGenre.append(genres)
        emptyWebPageSet.append(j)
       lst1 = [item[0] for item in emptysetGenre]
       lst2 = [i[1] if len(i) > 1 else '' for i in emptysetGenre]
       df = pandas.DataFrame({"imdbPage": emptyWebPageSet,
                       "title": emptySetTitle,
                       "genre1": lst1,
                       "info":infoSet
                       })
       df.to_csv("movieDetails.csv", encoding='utf-8', index=False)

推荐阅读