首页 > 解决方案 > Web 抓取到 CSV - ValueError 无法将输入数组从形状 (2) 广播到形状 (1)





我正在将我的数据导出(失败)到 csv。

我收到错误 - ValueError - 无法将输入数组从形状 (2) 广播到形状 (1)。

一些阅读建议我将二维数组拉入一维数组?- 查看我的代码,我看不出我可能在哪里这样做?




from bs4 import BeautifulSoup
import requests
import urllib.request
import csv
import pandas
from pandas import DataFrame
import re

#csv creation
with open('Guntrader_Dealer.csv', mode='w') as csv_file:
fieldnames = ['Title', 'Make', 'Model', 'Licence', 'Orientation', 'Barrel Length', 'Stock Length', 'Chokes', 'Origin', 'Trigger', 'Ejection', 'Scope', 'Serial No', 'Stock No', 'Condition', 'Description', 'Price']
writer = csv.DictWriter(csv_file, fieldnames=fieldnames)

#all links list

#grab all links which contain the href specifed
for page in range(1,3):
 for link in soup.select('a[href*="dealers/minsterley/minsterley-ranges/guns/"]'):
  all_links.append("https://www.guntrader.uk" + link['href'])

for a_link in all_links:

#Defining the span text in GunDetails lookups
    def make_span(make):
       return make.name=='span' and 'Make:' in make.parent.contents[0]    
    def model_span(model):
       return model.name=='span' and 'Model:' in model.parent.contents[0]
    def licence_span(licence):
       return licence.name=='span' and 'Licence:' in licence.parent.contents[0]
    def orient_span(orient):
       return orient.name=='span' and 'Orient.:' in orient.parent.contents[0]    
    def barrel_span(barrel):
      return barrel.name=='span' and 'Barrel:' in barrel.parent.contents[0]
    def stock_span(stock):
      return stock.name=='span' and 'Stock:' in stock.parent.contents[0]    
    def choke_span(choke):
      return choke.name=='span' and 'Chokes:' in choke.parent.contents[0]
    def origin_span(origin):
      return origin.name=='span' and 'Origin:' in origin.parent.contents[0]
    def trigger_span(trigger):
      return trigger.name=='span' and 'Trigger:' in trigger.parent.contents[0]
    def ejection_span(ejection):
      return ejection.name=='span' and 'Ejection:' in ejection.parent.contents[0]
    def serial_span(serial):
      return serial.name=='span' and 'Serial #:' in serial.parent.contents[0]
    def stockno_span(stockno):
      return stockno.name=='span' and 'Stock #:' in stockno.parent.contents[0]
    def condition_span(condition):
      return condition.name=='span' and 'Condition:' in condition.parent.contents[0]
    def scope_span(scope):
      return scope.name=='span' and 'Scope:' in scope.parent.contents[0]

    res = urllib.request.urlopen(a_link)
    soup = BeautifulSoup(res, 'html.parser')

#soup searches using the define criteria

    makes = soup.find(make_span)
    gun_makes = makes.content if makes else 'none'
    models = soup.find(model_span)
    gun_models = models.contents if models else 'none'
    licences = soup.find(licence_span)
    gun_licences = licences.contents if licences else 'none'
    orients = soup.find(orient_span)
    gun_orients = orients.contents if orients else 'none'    
    barrels = soup.find(barrel_span)
    gun_barrels = barrels.contents if barrels else 'none'
    stocks = soup.find(stock_span)
    gun_stocks = stocks.contents if stocks else 'none'
    chokes = soup.find(choke_span)
    gun_chokes = chokes.contents if chokes else 'none'
    origins = soup.find(origin_span)
    gun_origins = origins.contents if origins else 'none'
    triggers = soup.find(trigger_span)
    gun_triggers = triggers.contents if triggers else 'none'
    ejections = soup.find(ejection_span)
    gun_ejections = ejections.contents if ejections else 'none'
    scopes = soup.find(scope_span)
    gun_scopes = scopes.contents if scopes else 'none'
    serials = soup.find(serial_span)
    gun_serials = serials.contents if serials else 'none'
    stocknos = soup.find(stockno_span)
    gun_stocknos = stocknos.contents if stocknos else 'none'
    conditions = soup.find(condition_span)
    gun_conditions = conditions.contents if conditions else 'none'

#title price and description
    title = soup.select_one('h1[itemprop="name"]')
    gun_title = title.text if title else 'none'
    price = soup.select_one('p.price')
    gun_price = price.text if price else 'none'
    description = soup.select_one('p[itemprop="description"]')
    gun_description = description.text if description else 'none'

    data = { 'Title': gun_title, 'Make': gun_makes, 'Model': gun_models, 'Licence': gun_licences, 'Orientation': gun_orients, 'Barrel Length': gun_barrels, 'Stock Length': gun_stocks, 'Chokes': gun_chokes, 'Origin': gun_origins, 'Trigger': gun_triggers, 'Ejection': gun_ejections, 'Scope': gun_scopes, 'Serial No': gun_serials, 'Stock No': gun_stocknos, 'Condition': gun_conditions, 'Description': gun_description, 'Price': gun_price}

    df = DataFrame(data, columns = ['Title', 'Make', 'Model', 'Licence', 'Orientation', 'Barrel Length', 'Stock Length', 'Chokes', 'Origin', 'Trigger', 'Ejection', 'Scope', 'Serial No', 'Stock No', 'Condition', 'Description', 'Price'], index=[0])


标签: pythonpandasweb-scrapingbeautifulsouppython-requests


我已经为你写好了剧本。我没有将不同的 df 覆盖到同一个文件,而是创建了 main df,它将所有 df 附加到 for 循环中。


from bs4 import BeautifulSoup
import requests
import csv
import pandas
from pandas import DataFrame
import re
import os
import locale
os.environ["PYTHONIOENCODING"] = "utf-8"

#csv creation
with open('Guntrader_Dealer.csv', mode='w') as csv_file:
    fieldnames = ['Title', 'Make', 'Model', 'Licence', 'Orientation', 'Barrel Length', 'Stock Length', 'Chokes', 'Origin', 'Trigger', 'Ejection', 'Scope', 'Serial No', 'Stock No', 'Condition', 'Description', 'Price']
    writer = csv.DictWriter(csv_file, fieldnames=fieldnames)


#grab all links which contain the href specifed
for page in range(1,3):
    for link in soup.select('a[href*="dealers/minsterley/minsterley-ranges/guns/"]'):
        all_links.append("https://www.guntrader.uk" + link['href'])

df_main = DataFrame(columns = ['Title', 'Make', 'Model', 'Licence', 'Orientation', 'Barrel Length', 'Stock Length', 'Chokes', 'Origin', 'Trigger', 'Ejection', 'Scope', 'Serial No', 'Stock No', 'Condition', 'Description', 'Price'])

for a_link in all_links:
    def make_span(make):
       return make.name=='span' and 'Make:' in make.parent.contents[0]    
    def model_span(model):
       return model.name=='span' and 'Model:' in model.parent.contents[0]
    def licence_span(licence):
       return licence.name=='span' and 'Licence:' in licence.parent.contents[0]
    def orient_span(orient):
       return orient.name=='span' and 'Orient.:' in orient.parent.contents[0]    
    def barrel_span(barrel):
      return barrel.name=='span' and 'Barrel:' in barrel.parent.contents[0]
    def stock_span(stock):
      return stock.name=='span' and 'Stock:' in stock.parent.contents[0]    
    def choke_span(choke):
      return choke.name=='span' and 'Chokes:' in choke.parent.contents[0]
    def origin_span(origin):
      return origin.name=='span' and 'Origin:' in origin.parent.contents[0]
    def trigger_span(trigger):
      return trigger.name=='span' and 'Trigger:' in trigger.parent.contents[0]
    def ejection_span(ejection):
      return ejection.name=='span' and 'Ejection:' in ejection.parent.contents[0]
    def serial_span(serial):
      return serial.name=='span' and 'Serial #:' in serial.parent.contents[0]
    def stockno_span(stockno):
      return stockno.name=='span' and 'Stock #:' in stockno.parent.contents[0]
    def condition_span(condition):
      return condition.name=='span' and 'Condition:' in condition.parent.contents[0]
    def scope_span(scope):
      return scope.name=='span' and 'Scope:' in scope.parent.contents[0]

    res = requests.get(a_link)
    soup = BeautifulSoup(res.text, 'html.parser')

    makes = soup.find(make_span)
    gun_makes = makes.content if makes else 'none'
    models = soup.find(model_span)
    gun_models = models.contents if models else 'none'
    licences = soup.find(licence_span)
    gun_licences = licences.contents if licences else 'none'
    orients = soup.find(orient_span)
    gun_orients = orients.contents if orients else 'none'    
    barrels = soup.find(barrel_span)
    gun_barrels = barrels.contents if barrels else 'none'
    stocks = soup.find(stock_span)
    gun_stocks = stocks.contents if stocks else 'none'
    chokes = soup.find(choke_span)
    gun_chokes = chokes.contents if chokes else 'none'
    origins = soup.find(origin_span)
    gun_origins = origins.contents if origins else 'none'
    triggers = soup.find(trigger_span)
    gun_triggers = triggers.contents if triggers else 'none'
    ejections = soup.find(ejection_span)
    gun_ejections = ejections.contents if ejections else 'none'
    scopes = soup.find(scope_span)
    gun_scopes = scopes.contents if scopes else 'none'
    serials = soup.find(serial_span)
    gun_serials = serials.contents if serials else 'none'
    stocknos = soup.find(stockno_span)
    gun_stocknos = stocknos.contents if stocknos else 'none'
    conditions = soup.find(condition_span)
    gun_conditions = conditions.contents if conditions else 'none'

    title = soup.select_one('h1[itemprop="name"]')
    gun_title = title.text if title else 'none'
    price = soup.select_one('p.price')
    gun_price = price.text if price else 'none'
    description = soup.select_one('p[itemprop="description"]')
    gun_description = description.text if description else 'none'

    data = { 'Title': gun_title, 'Make': gun_makes, 'Model': gun_models, 'Licence': gun_licences, 'Orientation': gun_orients, 'Barrel Length': gun_barrels, 'Stock Length': gun_stocks, 'Chokes': gun_chokes, 'Origin': gun_origins, 'Trigger': gun_triggers, 'Ejection': gun_ejections, 'Scope': gun_scopes, 'Serial No': gun_serials, 'Stock No': gun_stocknos, 'Condition': gun_conditions, 'Description': gun_description, 'Price': gun_price}

    df = DataFrame(data, columns = ['Title', 'Make', 'Model', 'Licence', 'Orientation', 'Barrel Length', 'Stock Length', 'Chokes', 'Origin', 'Trigger', 'Ejection', 'Scope', 'Serial No', 'Stock No', 'Condition', 'Description', 'Price'], index=[0])
    df_main = df_main.append(df, ignore_index = True)
df_main.to_csv('Guntrader_Dealer.csv', encoding='UTF-8')
