python - 使用 Python 进行 Web Scraping - 代码在 ~10000 行处停止,没有返回预期的输出大小
问题描述
我有以下用 Python 编写的代码,用于从网站上刮下价格表。根据“总结果”,应该有超过 100k 的零件可用,但是当输出仅返回 ~10k 时。只是想知道可能是什么原因造成的,任何帮助表示赞赏!
import pandas as pd
import requests
query = 'GE Healthcare'
payload = {
"facets":[],
"facilityId": 38451,
"id_ins": "a2a3d332-73a7-4194-ad87-fe7412388916",
"limit": 200,
"query": query,
"referer": "/catalog/Service",
"start": 0,
"urlParams":[]
}
r = requests.post('https://prodasf-vip.partsfinder.com/Orion/CatalogService/api/v1/search', json=payload)
if r.status_code == 200:
js = r.json()
df = pd.json_normalize(js["products"])
while len(df) < js["totalResults"] and len(df)<200000:
payload["start"] += 200
r = requests.post('https://prodasf-vip.partsfinder.com/Orion/CatalogService/api/v1/search', json=payload)
if r.status_code == 200:
df = pd.concat([df, pd.json_normalize(r.json()["products"])])
else:
break
print(f"want: {js['totalResults']} got: {len(df)}")
df.to_csv(r'C:\Users\212677036\Documents\output_final.csv')