首页 > 解决方案 > python抓取json比较文件

问题描述

这是我的 python 代码,它搜索网页两次以获取产品详细信息并将数据保存在 .json 文件中。它应该检查新文件中的密钥是否更改并打印更改的内容,但我收到以下错误。

错误 :

 Traceback (most recent call last):
    File "x.py", line 84, in <module>
    compare()
    File "x.py", line 76, in compare
    for key in b.keys():
    AttributeError: 'NoneType' object has no attribute 'keys'

代码:

#!/usr/bin/env python
# -*- coding: utf-8 -*-
import cfscrape
import requests
from bs4 import BeautifulSoup as bs
import re
from pprint import pprint
import json

s = requests.Session()
s = cfscrape.create_scraper()

products = []
products1 = []

def x():
    r = s.get("https://www.oneblockdown.it/it/calzature-sneakers", headers={"User-Agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_11_2) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/48.0.2564.103 Safari/537.36"})
    soup = bs(r.content, "html.parser")

    js = [x.text for x in soup.find_all('script', {'type': 'text/javascript'}) if "var preloadedItems =" in x.text][0]
    js = js.replace('var preloadedItems = ', '')
    js = js[:js.find("}];")]+"}]".strip()
    data = json.loads(js)
    for product in data:
        product_id = product["id"]
        product_title = product["title"]
        product_link = product["permalink"]
        product_price = product["displayPrice"]
        product_available = product["isAvailable"]
        product_size = product["attributes"]
        products.append({
            "product_id": product_id,
            "product_title": product_title,
            "product_link": product_link,
            "product_price": product_price,
            "product_available": product_available,
            "product_size": product_size
        })

    with open('data.json', 'w') as f:
        json.dump(products, f, indent = 4)
        f.close()

def y():
    r1 = s.get("https://www.oneblockdown.it/it/calzature-sneakers", 
    headers={"User-Agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_11_2) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/48.0.2564.103 Safari/537.36"})
    soup = bs(r1.content, "html.parser")

    js = [x.text for x in soup.find_all('script', {'type': 'text/javascript'}) if "var preloadedItems =" in x.text][0]
    js = js.replace('var preloadedItems = ', '')
    js = js[:js.find("}];")]+"}]".strip()
    data1 = json.loads(js)
    for product in data1:
        product_id = product["id"]
        product_title = product["title"]
        product_link = product["permalink"]
        product_price = product["displayPrice"]
        product_available = product["isAvailable"]
        product_size = product["attributes"]
        products1.append({
            "product_id": product_id,
            "product_title": product_title,
            "product_link": product_link,
            "product_price": product_price,
            "product_available": product_available,
            "product_size": product_size
        })

    with open('data1.json', 'w') as f:
        json.dump(products, f, indent = 4)
        f.close()


def compare():
    while True:
        a = x()
        b = y()
        for key in b.keys():
            value = b[key]
            if key not in a:
                print(key, value)
            else:
                if a[key] != value:
                    print("for key {} values are different".format(key))

compare()

我选择了这种方法,但我不知道是否有更好的方法用于此目的。

标签: pythonjsonweb-scraping

解决方案


你没有从x()y()方法返回任何东西。因此,ab属于 类型None

您很可能希望从and中删除products列表,因此在方法中添加 return 语句。x()y()

喜欢:

return products


推荐阅读