首页 > 解决方案 > 抓取 BBB 站点,将 JSON 转换为 DataFrame

问题描述

我想将此信息放入数据框中,然后导出到 Excel。到目前为止,python 中的教程会产生表错误。没有运气将 JSON 数据转换为数据框。任何提示都会非常有帮助。

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
%matplotlib inline
from urllib.request import urlopen
import bs4

import requests, re, json

headers = {'User-Agent':'Mozilla/5.0'}
r = requests.get('https://www.bbb.org/search?find_country=USA&find_entity=10126-000&find_id=357_10126-000_alias&find_text=roofing&find_type=Category&page=1&touched=1', headers = headers)
p = re.compile(r'PRELOADED_STATE__ = (.*?);')
data = json.loads(p.findall(r.text)[0])
results = [(item['businessName'], ' '.join([item['address'],item['city'], item['state'], item['postalcode']]), item['phone']) for item in data['searchResult']['results']]
print(results)

标签: pythonjsonwebscreen-scraping

解决方案


import re
import json
import requests
import pandas as pd
from bs4 import BeautifulSoup


headers = {'User-Agent':'Mozilla/5.0'}
r = requests.get('https://www.bbb.org/search?find_country=USA&find_entity=10126-000&find_id=357_10126-000_alias&find_text=roofing&find_type=Category&page=1&touched=1', headers = headers)
p = re.compile(r'PRELOADED_STATE__ = (.*?);')
data = json.loads(p.findall(r.text)[0])
results = [(item['businessName'], ' '.join([item['address'],item['city'], item['state'], item['postalcode']]), item['phone']) for item in data['searchResult']['results']]

df = pd.DataFrame(results, columns=['Business Name', 'Address', 'Phone'])
print(df)
df.to_csv('data.csv')

印刷:

                                     Business Name                                            Address                                             Phone
0                             Trinity Roofing, LLC                             Stilwell KS 66085-8238                  [(913) 432-4425, (303) 699-7999]
1                             Trinity Roofing, LLC     14241 E 4th Ave Ste 5-300 Aurora CO 80011-8733                  [(913) 432-4425, (303) 699-7999]
2         CMR Construction & Roofing of Texas, LLC  12500 E US Highway 40, Ste. B1 Independence MO...                  [(855) 376-6326, (855) 766-3267]
3                        All-Star Home Repairs LLC              1806 Grove Ave Richmond VA 23220-4506                                  [(804) 405-9337]
4                MadSky Roofing & Restoration, LLC  Bank of America Center, 16th Floor 1111 E. Mai...                                  [(855) 623-7597]
5                             Robert Owens Roofing                             Bealeton VA 22712-9706                                  [(540) 878-3544]
6                             Proof Seal of Athens                   PO Box 80732 Canton OH 447080732                                  [(330) 685-6363]
7                             Proof Seal of Athens                               Athens OH 45701-1847                                  [(330) 685-6363]
8                   Tenecela General Services Corp                57 Anderson St Lowell MA 01852-5357                                              None
9                     Water Tight Roofing & Siding             57 Whitehall Way Hyannis MA 02601-2149                                  [(508) 364-8323]
10                  Tenecela General Services Corp          745 Broadway St Fl 2 Lowell MA 01854-3137                                              None
11         Just In Time Roofing & Contracting, LLC                            ----- Ft Worth TX 76102  [(888) 666-3122, (254) 296-8016, (888) 370-3331]
12  Paramount Construction of Southerntier NY Inc.               323 Fluvanna Ave. Jamestown NY 14701                                  [(716) 487-0093]
13  Paramount Construction of Southerntier NY Inc.                      P O Box 488 Falconer NY 14733                                  [(716) 487-0093]
14  Paramount Construction of Southerntier NY Inc.            1879 Lyndon Boulevard Falconer NY 14733                                  [(716) 487-0093]

并保存data.csv(来自 LibreOffice 的屏幕截图):

在此处输入图像描述


推荐阅读