python - 如果满足条件,则从 CSV 文件中删除行
问题描述
我正在尝试抓取pickels.com.au。
如果链接相同并且价格不同,我正在尝试更新 pickels_dataset.csv 文件,我将删除列表并将新行插入到 CSV 文件中,但它不会从中删除旧条目CSV 文件。
删除和更新 CSV 文件中的行的最佳方法是什么。
下面是我的代码...
import requests
from scrapy.selector import Selector
import csv
import re
from tqdm import tqdm
from time import sleep
with open('pickels_dataset.csv', 'a+', newline='', encoding='utf-8') as auction_csv_file:
auction_csv_writer = csv.writer(auction_csv_file)
live_auctions_api = 'https://www.pickles.com.au/PWR-Web/services/api/sales/future'
api_request = requests.get(url=live_auctions_api)
for auctions in api_request.json():
auction_link = auctions.get('viewSaleListingLink')
if 'cars/item/search/-/listing/listSaleItems/' in auction_link:
auction_request = requests.get(url=auction_link)
response = Selector(text=auction_request.text)
sales_id_re = response.xpath('//script[contains(text(), "Product_Type_Sequence")]/text() | //script[contains(text(), "lot_number_suffix_sequence")]/text()').get()
sales_id = re.findall(r'"Product_Type_Sequence";var n="(.*?)"', sales_id_re) or re.findall(r'"lot_number_suffix_sequence";var n="(.*?)"', sales_id_re)
if sales_id == []:
continue
auction_sale_link = f'https://www.pickles.com.au/v4/caradvert/saleid-{sales_id[0]}-public?count=true&inav=Car%7Cbc%7Cha%7Cu&q=(And.ProductType.Vehicles._.Year.range(2010..2021).)&sr=%7Clot_number_suffix_sequence%7C0%7C30'
auction_sale_link_requests = requests.get(url=auction_sale_link)
auctions_data = auction_sale_link_requests.json().get('SearchResults')
if auctions_data == []:
print("NO RESULTS")
for auction_data in auctions_data:
if int(auction_data.get('MinimumBid')) > 0:
ids = auction_data.get('TargetId')
main_title = auction_data.get('Title')
short_title = str(auction_data.get('Year')) + ' ' + str(auction_data.get('Make')) + ' ' + str(auction_data.get('Model'))
make = auction_data.get('M ake')
model = auction_data.get('Model')
variant = auction_data.get('Series')
transmission = auction_data.get('Transmission')
odometer = auction_data.get('Odometer')
state = auction_data.get('Location').get('State')
sale_price = auction_data.get('MinimumBid')
link_path = main_title.replace(' ', '-').replace('/', '-').replace(',', '-') + '/' + str(ids)
link = f'https://www.pickles.com.au/cars/item/-/details/{link_path}'
sale_date = auction_data.get('SaleEndString')
auction_values = [
main_title, short_title, make,
model, variant, transmission, odometer,
state, "${:,.2f}".format(sale_price).strip() ,
link, sale_date
]
with open('pickels_dataset.csv', 'r+') as csv_read:
auction_reader = list(csv.reader(csv_read))
for each in auction_reader:
if link in each:
each_link, each_price = each[9], each[0]
if (link == each_link) and (sale_price != each_price):
auction_reader.clear()
print('New list found, old list deleted')
auction_csv_writer.writerow(auction_values)
print('New value added')
continue
elif (link == each[9]) and (sale_price == each[0]):
print('Same result already exist in the file')
continue
else:
auction_csv_writer.writerow(auction_values)
print('Unique result found and added.')
break
解决方案
您当前的脚本正在打开您的拍卖 CSV 文件以进行附加,然后在它仍处于打开状态时尝试再次打开以进行读取。这可能是它没有按预期更新的原因。
更好的方法是首先将现有保存的拍卖文件的全部内容读入字典。关键可能是link
这样可以很容易地确定您是否已经看过现有的拍卖。
接下来抓取当前的拍卖并saved_auctions
根据需要更新字典。
最后,将内容写saved_auctions
回 CSV 文件。
例如:
import requests
from scrapy.selector import Selector
import csv
import re
auction_filename = 'pickels_dataset.csv'
# Load existing auctions into a dictionary with link as key
saved_auctions = {}
with open(auction_filename, newline='', encoding='utf-8') as f_auction_file:
for row in csv.reader(f_auction_file):
saved_auctions[row[9]] = row # dictionary key is link
live_auctions_api = 'https://www.pickles.com.au/PWR-Web/services/api/sales/future'
api_request = requests.get(url=live_auctions_api)
for auctions in api_request.json():
auction_link = auctions.get('viewSaleListingLink')
if 'cars/item/search/-/listing/listSaleItems/' in auction_link:
auction_request = requests.get(url=auction_link)
response = Selector(text=auction_request.text)
sales_id_re = response.xpath('//script[contains(text(), "Product_Type_Sequence")]/text() | //script[contains(text(), "lot_number_suffix_sequence")]/text()').get()
sales_id = re.findall(r'"Product_Type_Sequence";var n="(.*?)"', sales_id_re) or re.findall(r'"lot_number_suffix_sequence";var n="(.*?)"', sales_id_re)
if sales_id == []:
continue
auction_sale_link = f'https://www.pickles.com.au/v4/caradvert/saleid-{sales_id[0]}-public?count=true&inav=Car%7Cbc%7Cha%7Cu&q=(And.ProductType.Vehicles._.Year.range(2010..2021).)&sr=%7Clot_number_suffix_sequence%7C0%7C30'
auction_sale_link_requests = requests.get(url=auction_sale_link)
auctions_data = auction_sale_link_requests.json().get('SearchResults')
if auctions_data == []:
print("NO RESULTS")
for auction_data in auctions_data:
if int(auction_data.get('MinimumBid')) > 0:
ids = auction_data.get('TargetId')
main_title = auction_data.get('Title')
short_title = str(auction_data.get('Year')) + ' ' + str(auction_data.get('Make')) + ' ' + str(auction_data.get('Model'))
make = auction_data.get('Make')
model = auction_data.get('Model')
variant = auction_data.get('Series')
transmission = auction_data.get('Transmission')
odometer = auction_data.get('Odometer')
state = auction_data.get('Location').get('State')
minimum_bid = auction_data.get('MinimumBid')
sale_price = "${:,.2f}".format(minimum_bid).strip()
link_path = main_title.replace(' ', '-').replace('/', '-').replace(',', '-') + '/' + str(ids)
link = f'https://www.pickles.com.au/cars/item/-/details/{link_path}'
sale_date = auction_data.get('SaleEndString')
auction_values = [
main_title, short_title, make,
model, variant, transmission, odometer,
state, sale_price,
link, sale_date
]
if link in saved_auctions:
if saved_auctions[link][8] == sale_price:
print('Same result already exists in the file')
else:
print('New value updated')
saved_auctions[link] = auction_values # Updated the entry
else:
print('New auction added')
saved_auctions[link] = auction_values
# Update the saved auction file
with open(auction_filename, 'w', newline='', encoding='utf-8') as f_auction_file:
csv_auction_file = csv.writer(f_auction_file)
csv_auction_file.writerows(saved_auctions.values())
如果您还想删除不再有效的拍卖,那么最好直接忽略保存的文件并按原样写入所有当前条目。
推荐阅读
- spotfire - 如何在spotfire中显示具有特定范围的饼图
- javascript - 水平平滑滚动的 scrollIntoView 替代方案
- airflow - 使用数据构建工具 (dbt) 路径 ['name'] 的 Cloud composer 出错:'jaffle_shop' 与 '^[^\\d\\W]\\w*$' 不匹配
- google-sheets - 如何解析 Google 表格查询(仅显示第一列)
- oracle - BI Publisher - 无法在数据模型中实现游标逻辑
- flutter - AutofillHints.email 没有按预期工作?
- python - 如何对分类系列进行分组(就像在 Tableau 中一样
- ssl - 如何解决打算由 LAN 托管的站点的自签名 SSL 证书问题
- azure - Unable to create PostgreSQL server with Azure CLI
- unity3d - Unity Shader 使 2D 对象变形