首页 > 解决方案 > python列表只打印到csv中的一行

问题描述

我希望将列表打印到每一列和每一行。当它打印时,所有数据都被塞进 csv 文件的一行中。我应该为 csv 文件创建一个循环以逐个添加列表项吗?

def write_output(data):
    with open('data.csv', mode='w') as output_file:
        writer = csv.writer(output_file, delimiter=',', quotechar='"', quoting=csv.QUOTE_ALL)

        # Header
        writer.writerow(["locator_domain", "location_name", "street_address", "city", "state", "zip", "country_code",
                         "store_number", "phone", "location_type", "latitude", "longitude", "hours_of_operation"])
        # Body
        for row in data:
            writer.writerow(row)


def fetch_data():
    base_url = 'http://leevers.com/'
    r = requests.get(base_url)
    soup = BeautifulSoup(r.text, 'lxml')
    return_main_object = []
    n = soup.select('div > p:nth-of-type(1)')
    name = [a.text for a in n]
    a = soup.select('div > p:nth-of-type(2)')
    address = [a.text for a in a]
    c = soup.select('div > p:nth-of-type(3)')
    city = [a.text for a in c]
    p = soup.select('div > p:nth-of-type(4)')
    phone = [a.text for a in p]
    country_code = 'US'
    store = []
    store.append(base_url)
    store.append(name)
    store.append(address)
    store.append(city)
    store.append('<MISSING>')
    store.append('<MISSING>')
    store.append(country_code)
    store.append('<MISSING>')
    store.append(phone)
    store.append('<MISSING>')
    store.append('<MISSING>')
    store.append('<MISSING>')
    store.append('<MISSING>')
    return_main_object.append(store)
    return return_main_object

标签: csvparsingweb-scrapingbeautifulsoup

解决方案


原样的方式不会逐行写入,因为您实际上是按列创建列表。我相信有一种方法可以将其转置zip,但您也可以将每个“行”写入数据帧,然后将数据帧写入 Pandas 文件:

import requests
from bs4 import BeautifulSoup
import pandas as pd

def write_output(data):
    data.to_csv('data.csv', index=False)    



def fetch_data():
    df = pd.DataFrame()
    base_url = 'http://leevers.com/'
    r = requests.get(base_url)
    soup = BeautifulSoup(r.text, 'lxml')

    locations = soup.find_all('div',{'class':'border'})

    for stores in locations:
        store = stores.find_all('p')

        name = store[0].text
        address = store[1].text
        city, state_zip = store[2].text.split(',')
        state, zip_code = state_zip.strip().split(' ')
        phone = store[3].text

        temp_df = pd.DataFrame([[base_url,name,address,city,state, zip_code,'US','<MISSING>',
                                phone,'<MISSING>','<MISSING>','<MISSING>','<MISSING>']],
                                columns=["locator_domain", "location_name", "street_address", "city", "state", "zip", "country_code",
                                         "store_number", "phone", "location_type", "latitude", "longitude", "hours_of_operation"])

        df = df.append(temp_df).reset_index(drop=True)
    return df

data = fetch_data()
write_output(data)

输出:

print (df.to_string())
         locator_domain          location_name        street_address           city state    zip country_code store_number             phone location_type   latitude  longitude hours_of_operation
0   http://leevers.com/  Colorado Ranch Market   11505 E. Colfax Ave         Aurora    CO  80010           US    <MISSING>  PH: 720-343-2195     <MISSING>  <MISSING>  <MISSING>          <MISSING>
1   http://leevers.com/             Save-A-Lot    4255 W Florida Ave         Denver    CO  80219           US    <MISSING>  PH: 303-935-0880     <MISSING>  <MISSING>  <MISSING>          <MISSING>
2   http://leevers.com/             Save-A-Lot      15220 E. 6th Ave         Aurora    CO  80011           US    <MISSING>  PH: 720-343-2011     <MISSING>  <MISSING>  <MISSING>          <MISSING>
3   http://leevers.com/             Save-A-Lot      3045 W. 74th Ave    Westminster    CO  80030           US    <MISSING>  PH: 303-339-2610     <MISSING>  <MISSING>  <MISSING>          <MISSING>
4   http://leevers.com/             Save-A-Lot    1110 Bonforte Blvd         Pueblo    CO  81001           US    <MISSING>  PH: 719-544-6057     <MISSING>  <MISSING>  <MISSING>          <MISSING>
5   http://leevers.com/             Save-A-Lot          698 Peria St         Aurora    CO  80011           US    <MISSING>  PH: 303-365-0393     <MISSING>  <MISSING>  <MISSING>          <MISSING>
6   http://leevers.com/             Save-A-Lot         4860 Pecos St         Denver    CO  80221           US    <MISSING>  PH: 720-235-3900     <MISSING>  <MISSING>  <MISSING>          <MISSING>
7   http://leevers.com/             Save-A-Lot      2630 W. 38th Ave         Denver    CO  80211           US    <MISSING>  PH: 303-433-4405     <MISSING>  <MISSING>  <MISSING>          <MISSING>
8   http://leevers.com/             Save-A-Lot       405 S Circle Dr  Colo. Springs    CO  80910           US    <MISSING>  PH: 719-520-5620     <MISSING>  <MISSING>  <MISSING>          <MISSING>
9   http://leevers.com/             Save-A-Lot       1750 N. Main St       Longmont    CO  80501           US    <MISSING>  PH: 720-864-8060     <MISSING>  <MISSING>  <MISSING>          <MISSING>
10  http://leevers.com/             Save-A-Lot       630 W. 84th Ave       Thornton    CO  80260           US    <MISSING>  PH: 303-468-6290     <MISSING>  <MISSING>  <MISSING>          <MISSING>
11  http://leevers.com/             Save-A-Lot  1951 S. Federal Blvd         Denver    CO  80219           US    <MISSING>  PH: 303-407-0430     <MISSING>  <MISSING>  <MISSING>          <MISSING>
12  http://leevers.com/             Save-A-Lot        7290 Manaco St  Commerce City    CO  80022           US    <MISSING>  PH: 303-288-1747     <MISSING>  <MISSING>  <MISSING>          <MISSING>
13  http://leevers.com/             Save-A-Lot    6601 W. Colfax Ave       Lakewood    CO  80214           US    <MISSING>  PH: 303-468-6290     <MISSING>  <MISSING>  <MISSING>          <MISSING>
14  http://leevers.com/             Save-A-Lot           816 25th St        Greeley    CO  80631           US    <MISSING>  PH: 970-356-7498     <MISSING>  <MISSING>  <MISSING>          <MISSING>

推荐阅读