python - Python csv 导出限制为 8KB?
问题描述
有人能告诉我这段代码有什么问题吗?它基于一个旧项目,我正在尝试实现多个 URL 而不仅仅是一个。
#!/usr/bin/env python
from bs4 import BeautifulSoup
from urllib2 import urlopen
from datetime import datetime
import csv
import sys
import os
import smtplib
import config
# Craigslist search URL
SEARCH_URL = ['http://auburn.craigslist.org/search/sss?query={0}', 'http://bham.craigslist.org/search/sss?query={0}', 'http://dothan.craigslist.org/search/sss?query={0}', 'http://shoals.craigslist.org/search/sss?query={0}', 'http://gadsden.craigslist.org/search/sss?query={0}', 'http://huntsville.craigslist.org/search/sss?query={0}', 'http://auburn.craigslist.org/search/sss?query={0}', 'http://bham.craigslist.org/search/sss?query={0}', 'http://virgin.craigslist.org/search/sss?query={0}']
BASE_URL = "craigslist.org"
class Result:
""" Creates object from passed in HTML. Should be one row element from base search"""
def __init__(self, row):
self.url = row.find("a", "result-title")['href']
if row.find("span", "result-price"):
self.price = row.find("span", "result-price").get_text()
else:
self.price = "NA"
self.create_date = row.find('time').get('datetime')
self.title = row.find('a', 'result-title').get_text()
def print_out(self):
print "Title: " + self.title
print "Price: " + self.price
print "Creation Date: " + self.create_date
print "URL: " + self.url
print
def parse_results(search_term):
results = []
search_term = search_term.strip().replace(' ', '+')
search_url = [url.format(search_term) for url in SEARCH_URL]
soup = BeautifulSoup(urlopen(url).read())
rows = soup.find_all("li", "result-row")
for row in rows:
formattedResult = Result(row)
results.append(formattedResult)
return results
def record_results(results):
""" Writes URLs to file so we can keep track of what posts have been seen """
with open('results.csv', 'w') as f:
for x in results:
f.write(x.url)
f.write("\n")
def has_new_records(results):
current_posts = [x.url for x in results]
fields = ["url"]
if not os.path.exists('results.csv'):
return True
with open('results.csv', 'r') as f:
reader = csv.DictReader(f, fieldnames=fields, delimiter='|')
seen_posts = [row['url'] for row in reader]
is_new = False
for post in current_posts:
if post in seen_posts:
pass
else:
is_new = True
return is_new
def send_text(phone_number, msg):
fromaddr = "Craigslist Checker"
toaddrs = phone_number + "@att.net"
msg = ("From:{0}\r\nTo:{1}\r\nSubject:New Craigslist Result\r\n\r\n{2}").format(fromaddr, toaddrs, msg)
server = smtplib.SMTP('smtp.gmail.com:587')
server.starttls()
server.login(config.data['username'], config.data['password'])
server.sendmail(fromaddr, toaddrs, msg)
server.quit()
def get_current_time():
return datetime.strftime(datetime.now(), '%Y-%m-%d %H:%M:%S')
if __name__ == '__main__':
PHONE_NUMBER = config.data['phone']
SEARCH_TERM = sys.argv[1]
if len(PHONE_NUMBER) != 10:
print "Phone numbers must be 10 digits! Please change in config.\n"
sys.exit(1)
results = parse_results(SEARCH_TERM)
# Send an SMS message if there are new results. Only send first result to avoid spamming texts.
if has_new_records(results):
message = "Title: {0}\nPrice: {1}\nURL: {2}".format(results[0].title, results[0].price, results[0].url)
print "[{0}] There are new results - sending text message to {1}".format(get_current_time(), PHONE_NUMBER)
send_text(PHONE_NUMBER, message)
record_results(results)
else:
print "[{0}] No new results - will try again later".format(get_current_time())
如果有人可以修复此代码并详细解释为什么会失败,我将永远欠你的债
更新:它现在在results.csv
.
解决方案
你SEARCH_URL
不是一个字符串,它是一个字符串列表。因此,您必须单独转换它们。就像是
search_urls = [url.format(search_term) for url in SEARCH_URLS]
推荐阅读
- docker - 找不到 docker 的钥匙串访问权限
- objective-c - 无法解释此试飞崩溃报告
- c# - 从私钥创建以太坊公共地址
- java - 我正在尝试实施 Eratosthenes 筛,但它仅适用于小于 33 的数字
- c# - 在 VS Code 中调试 Powershell cmdlet(用 c# 编写)
- amazon-cloudformation - Cloudformation:如何将属性附加到预定义的存储桶?
- powershell - 将进程环境变量解析为 Windows 任务
- laravel - Laravel 中使用视图的动态元标记
- css - 我正在尝试使用 bootstrap4 对齐我的图像,但它不起作用
- c++ - 如何获取当前登录用户win32 api的用户名?