python - 如何使用 Python 正确创建 JSON 文件?
问题描述
我正在尝试使用来自 Yahoo 的财务数据创建一个 JSON 文件。我可以解析网站上的所有数据。但不幸的是,我找不到正确创建 JSON 文件的方法。似乎 JSON 对象没有以正确的方式连接。
有谁知道如何解决这个问题?
请在下面找到代码:
#!/usr/bin/python
# -*- coding: utf-8 -*-
import urllib2
from bs4 import BeautifulSoup
import ssl
import json
import ast
import os
from urllib2 import Request, urlopen
import datetime
# For ignoring SSL certificate errors
ctx = ssl.create_default_context()
ctx.check_hostname = False
ctx.verify_mode = ssl.CERT_NONE
# Input from the user (temp. manually)
urls = [
'https://finance.yahoo.com/quote/DB1.DE?p=DB1.DE'
, 'https://finance.yahoo.com/quote/ALV.DE?p=ALV.DE&.tsrc=fin-srch'
, 'https://finance.yahoo.com/quote/FME.DE?p=FME.DE'
, 'https://finance.yahoo.com/quote/DBK.DE?p=DBK.DE'
]
# Making the website believe that you are accessing it using a Mozilla browser
for url in urls:
req = Request(url, headers={'User-Agent': 'Mozilla/5.0'})
webpage = urlopen(req).read()
# Creating a BeautifulSoup object of the HTML page for easy extraction of data.
soup = BeautifulSoup(webpage, 'html.parser')
html = soup.prettify('utf-8')
world_ana_json = {}
for h1 in soup.findAll('h1'):
world_ana_json['TICKER'] = h1.text.strip()
for span in soup.findAll('span',attrs={'class': 'Trsdu(0.3s) Trsdu(0.3s) Fw(b) Fz(36px) Mb(-4px) D(b)'}):
world_ana_json['PRESENT_VALUE'] = span.text.strip()
for div in soup.findAll('div', attrs={'class': 'D(ib) Va(t)'}):
for span in div.findAll('span', recursive=False):
world_ana_json['PRESENT_GROWTH'] = span.text.strip()
for td in soup.findAll('td', attrs={'data-test': 'PREV_CLOSE-value'}):
for span in td.findAll('span', recursive=False):
world_ana_json['PREV_CLOSE'] = span.text.strip()
for td in soup.findAll('td', attrs={'data-test': 'OPEN-value'}):
for span in td.findAll('span', recursive=False):
world_ana_json['OPEN'] = span.text.strip()
for td in soup.findAll('td', attrs={'data-test': 'BID-value'}):
for span in td.findAll('span', recursive=False):
world_ana_json['BID'] = span.text.strip()
for td in soup.findAll('td', attrs={'data-test': 'ASK-value'}):
for span in td.findAll('span', recursive=False):
world_ana_json['ASK'] = span.text.strip()
for td in soup.findAll('td', attrs={'data-test': 'DAYS_RANGE-value'}):
for span in td.findAll('span', recursive=False):
world_ana_json['DAYS_RANGE'] = span.text.strip()
for td in soup.findAll('td',attrs={'data-test': 'FIFTY_TWO_WK_RANGE-value'}):
for span in td.findAll('span', recursive=False):
world_ana_json['FIFTY_TWO_WK_RANGE'] = span.text.strip()
for td in soup.findAll('td', attrs={'data-test': 'TD_VOLUME-value'}):
for span in td.findAll('span', recursive=False):
world_ana_json['TD_VOLUME'] = span.text.strip()
for td in soup.findAll('td',attrs={'data-test': 'AVERAGE_VOLUME_3MONTH-value'}):
for span in td.findAll('span', recursive=False):
world_ana_json['AVERAGE_VOLUME_3MONTH'] = span.text.strip()
for td in soup.findAll('td', attrs={'data-test': 'MARKET_CAP-value'}):
for span in td.findAll('span', recursive=False):
world_ana_json['MARKET_CAP'] = span.text.strip()
for td in soup.findAll('td', attrs={'data-test': 'BETA_3Y-value'}):
for span in td.findAll('span', recursive=False):
world_ana_json['BETA_3Y'] = span.text.strip()
for td in soup.findAll('td', attrs={'data-test': 'PE_RATIO-value'}):
for span in td.findAll('span', recursive=False):
world_ana_json['PE_RATIO'] = span.text.strip()
for td in soup.findAll('td', attrs={'data-test': 'EPS_RATIO-value'}):
for span in td.findAll('span', recursive=False):
world_ana_json['EPS_RATIO'] = span.text.strip()
for td in soup.findAll('td', attrs={'data-test': 'EARNINGS_DATE-value'}):
world_ana_json['EARNINGS_DATE'] = []
for span in td.findAll('span', recursive=False):
world_ana_json['EARNINGS_DATE'].append(span.text.strip())
for td in soup.findAll('td',attrs={'data-test': 'DIVIDEND_AND_YIELD-value'}):
world_ana_json['DIVIDEND_AND_YIELD'] = td.text.strip()
for td in soup.findAll('td',attrs={'data-test': 'EX_DIVIDEND_DATE-value'}):
for span in td.findAll('span', recursive=False):
world_ana_json['EX_DIVIDEND_DATE'] = span.text.strip()
for td in soup.findAll('td',attrs={'data-test': 'ONE_YEAR_TARGET_PRICE-value' }):
for span in td.findAll('span', recursive=False):
world_ana_json['ONE_YEAR_TARGET_PRICE'] = span.text.strip()
world_ana_json['PARSE_DATE'] = str(datetime.datetime.now())
with open('world_ana_kpis.json', 'a') as outfile:
json.dump(world_ana_json, outfile, indent=4)
print world_ana_json
print '----------Iteration complete.----------'
print '----------Extraction of data is complete. Check json file.----------'
这是 JSON 结果:
{
"TD_VOLUME": "684,446",
"MARKET_CAP": "24.882B",
"OPEN": "134.05",
"BID": "135.15 x 3500",
"AVERAGE_VOLUME_3MONTH": "849,271",
"PREV_CLOSE": "132.65",
"EARNINGS_DATE": [
"Feb 13, 2018",
"Feb 19, 2018"
],
"DIVIDEND_AND_YIELD": "2.90 (2.19%)",
"EPS_RATIO": "5.47",
"PARSE_DATE": "2020-04-14 22:06:45.339815",
"PE_RATIO": "24.80",
"ASK": "135.10 x 14200",
"PRESENT_VALUE": "135.65",
"TICKER": "DB1.DE - Deutsche Borse AG",
"EX_DIVIDEND_DATE": "May 20, 2020",
"ONE_YEAR_TARGET_PRICE": "119.75"
}{
"TD_VOLUME": "1,841,919",
"MARKET_CAP": "69.86B",
"OPEN": "168.72",
"BID": "167.66 x 214900",
"AVERAGE_VOLUME_3MONTH": "2,259,476",
"PREV_CLOSE": "164.88",
"EARNINGS_DATE": [
"May 12, 2020"
],
"DIVIDEND_AND_YIELD": "9.60 (5.82%)",
"EPS_RATIO": "18.83",
"PARSE_DATE": "2020-04-14 22:06:46.592483",
"PE_RATIO": "8.91",
"ASK": "167.74 x 10000",
"PRESENT_VALUE": "167.70",
"TICKER": "ALV.DE - Allianz SE",
"EX_DIVIDEND_DATE": "May 07, 2020",
"ONE_YEAR_TARGET_PRICE": "213.69"
}{
"TD_VOLUME": "912,856",
"MARKET_CAP": "19.246B",
"OPEN": "64.26",
"BID": "63.98 x 38700",
"AVERAGE_VOLUME_3MONTH": "1,293,817",
"PREV_CLOSE": "63.24",
"EARNINGS_DATE": [
"May 06, 2020"
],
"DIVIDEND_AND_YIELD": "1.20 (1.90%)",
"EPS_RATIO": "3.96",
"PARSE_DATE": "2020-04-14 22:06:48.157705",
"PE_RATIO": "16.18",
"ASK": "64.04 x 105300",
"PRESENT_VALUE": "64.06",
"TICKER": "FME.DE - Fresenius Medical Care AG & Co. KGaA",
"EX_DIVIDEND_DATE": "May 20, 2020",
"ONE_YEAR_TARGET_PRICE": "80.61"
}{
"TD_VOLUME": "15,877,455",
"MARKET_CAP": "12.828B",
"OPEN": "6.20",
"BID": "6.19 x 555100",
"AVERAGE_VOLUME_3MONTH": "25,845,616",
"PREV_CLOSE": "6.12",
"EARNINGS_DATE": [
"Apr 29, 2020"
],
"DIVIDEND_AND_YIELD": "0.11 (1.82%)",
"EPS_RATIO": "-2.71",
"PARSE_DATE": "2020-04-14 22:06:49.821249",
"PE_RATIO": "N/A",
"ASK": "6.19 x 230000",
"PRESENT_VALUE": "6.21",
"TICKER": "DBK.DE - Deutsche Bank Aktiengesellschaft",
"EX_DIVIDEND_DATE": "May 24, 2019",
"ONE_YEAR_TARGET_PRICE": "N/A"
}
解决方案
您正在 for 循环中写入文件,json 对象之间没有“,”,所有 json 对象都应该在一个数组中。
我已经修改了代码。
#!/usr/bin/python
# -*- coding: utf-8 -*-
import urllib2
from bs4 import BeautifulSoup
import ssl
import json
import ast
import os
from urllib2 import Request, urlopen
import datetime
# For ignoring SSL certificate errors
ctx = ssl.create_default_context()
ctx.check_hostname = False
ctx.verify_mode = ssl.CERT_NONE
# Input from the user (temp. manually)
urls = [
'https://finance.yahoo.com/quote/DB1.DE?p=DB1.DE'
, 'https://finance.yahoo.com/quote/ALV.DE?p=ALV.DE&.tsrc=fin-srch'
, 'https://finance.yahoo.com/quote/FME.DE?p=FME.DE'
, 'https://finance.yahoo.com/quote/DBK.DE?p=DBK.DE'
]
output = []
# Making the website believe that you are accessing it using a Mozilla browser
for url in urls:
req = Request(url, headers={'User-Agent': 'Mozilla/5.0'})
webpage = urlopen(req).read()
# Creating a BeautifulSoup object of the HTML page for easy extraction of data.
soup = BeautifulSoup(webpage, 'html.parser')
html = soup.prettify('utf-8')
world_ana_json = {}
for h1 in soup.findAll('h1'):
world_ana_json['TICKER'] = h1.text.strip()
for span in soup.findAll('span',attrs={'class': 'Trsdu(0.3s) Trsdu(0.3s) Fw(b) Fz(36px) Mb(-4px) D(b)'}):
world_ana_json['PRESENT_VALUE'] = span.text.strip()
for div in soup.findAll('div', attrs={'class': 'D(ib) Va(t)'}):
for span in div.findAll('span', recursive=False):
world_ana_json['PRESENT_GROWTH'] = span.text.strip()
for td in soup.findAll('td', attrs={'data-test': 'PREV_CLOSE-value'}):
for span in td.findAll('span', recursive=False):
world_ana_json['PREV_CLOSE'] = span.text.strip()
for td in soup.findAll('td', attrs={'data-test': 'OPEN-value'}):
for span in td.findAll('span', recursive=False):
world_ana_json['OPEN'] = span.text.strip()
for td in soup.findAll('td', attrs={'data-test': 'BID-value'}):
for span in td.findAll('span', recursive=False):
world_ana_json['BID'] = span.text.strip()
for td in soup.findAll('td', attrs={'data-test': 'ASK-value'}):
for span in td.findAll('span', recursive=False):
world_ana_json['ASK'] = span.text.strip()
for td in soup.findAll('td', attrs={'data-test': 'DAYS_RANGE-value'}):
for span in td.findAll('span', recursive=False):
world_ana_json['DAYS_RANGE'] = span.text.strip()
for td in soup.findAll('td',attrs={'data-test': 'FIFTY_TWO_WK_RANGE-value'}):
for span in td.findAll('span', recursive=False):
world_ana_json['FIFTY_TWO_WK_RANGE'] = span.text.strip()
for td in soup.findAll('td', attrs={'data-test': 'TD_VOLUME-value'}):
for span in td.findAll('span', recursive=False):
world_ana_json['TD_VOLUME'] = span.text.strip()
for td in soup.findAll('td',attrs={'data-test': 'AVERAGE_VOLUME_3MONTH-value'}):
for span in td.findAll('span', recursive=False):
world_ana_json['AVERAGE_VOLUME_3MONTH'] = span.text.strip()
for td in soup.findAll('td', attrs={'data-test': 'MARKET_CAP-value'}):
for span in td.findAll('span', recursive=False):
world_ana_json['MARKET_CAP'] = span.text.strip()
for td in soup.findAll('td', attrs={'data-test': 'BETA_3Y-value'}):
for span in td.findAll('span', recursive=False):
world_ana_json['BETA_3Y'] = span.text.strip()
for td in soup.findAll('td', attrs={'data-test': 'PE_RATIO-value'}):
for span in td.findAll('span', recursive=False):
world_ana_json['PE_RATIO'] = span.text.strip()
for td in soup.findAll('td', attrs={'data-test': 'EPS_RATIO-value'}):
for span in td.findAll('span', recursive=False):
world_ana_json['EPS_RATIO'] = span.text.strip()
for td in soup.findAll('td', attrs={'data-test': 'EARNINGS_DATE-value'}):
world_ana_json['EARNINGS_DATE'] = []
for span in td.findAll('span', recursive=False):
world_ana_json['EARNINGS_DATE'].append(span.text.strip())
for td in soup.findAll('td',attrs={'data-test': 'DIVIDEND_AND_YIELD-value'}):
world_ana_json['DIVIDEND_AND_YIELD'] = td.text.strip()
for td in soup.findAll('td',attrs={'data-test': 'EX_DIVIDEND_DATE-value'}):
for span in td.findAll('span', recursive=False):
world_ana_json['EX_DIVIDEND_DATE'] = span.text.strip()
for td in soup.findAll('td',attrs={'data-test': 'ONE_YEAR_TARGET_PRICE-value' }):
for span in td.findAll('span', recursive=False):
world_ana_json['ONE_YEAR_TARGET_PRICE'] = span.text.strip()
world_ana_json['PARSE_DATE'] = str(datetime.datetime.now())
output.append(world_ana_json);
with open('world_ana_kpis.json', 'a') as outfile:
json.dump(output, outfile, indent=4)
print '----------Extraction of data is complete. Check json file.----------'
推荐阅读
- angular - Angular中的DomSanitizer不适用于Transform css属性
- ruby - 如何在 ruby 中转置哈希数组
- python - 如何避免从网站网页抓取中创建空 csv?
- r - 使用 R 进行线性优化
- azure-devops - 如何访问 Azure DevOpps 组织
- camera - 如何从相机固有矩阵计算 KITTI 相机的水平和垂直 FOV?
- graphql - 使用 GraphQL,是否可以通过数据类型为 Int 的属性进行 SQL 注入?
- php - 无法从数据库中的另一个表中获取值
- python - PDF文档:如何使用python验证数字签名?
- r - 将 `RasterLayer` 添加到传单中的 `addLayersControl`