python-3.x - 网络抓取 - 我如何获得网页中所有可用帖子的价格
问题描述
这段代码是为我提供的,它返回了很多关于一篇文章的信息,我想用它在这个url中获取相同的信息,并增加这个页面中的帖子数量,现在它只有 20
import requests
# https://haraj.com.sa/1179070147
def main(url):
params = {
'queryName': 'detailsPosts_singlePost',
'token': '',
'clientid': '812f41b2-9936-4405-aa9c-378db19b8cc4',
'version': '8.2.9 , 10 18 - 7 - 21'
}
data = {
"query": "query($ids:[Int]) { posts( id:$ids) {\n\t\titems {\n\t\t\tid status authorUsername title city postDate updateDate hasImage thumbURL authorId bodyHTML bodyTEXT city tags imagesList commentStatus commentCount upRank downRank geoHash\n\t\t}\n\t\tpageInfo {\n\t\t\thasNextPage\n\t\t}\n\t\t} }",
"variables": {
"ids": [
79070147
]
}
}
r = requests.post(url, params=params, json=data)
print(r.json())
main('https://graphql.haraj.com.sa/')
任何帮助表示感谢
解决方案
遍历页面以获得所需的信息。
请注意,您可以直接从 JSON 响应中获取所有信息,而无需再次调用 API。
import requests
from pprint import pp
headers = {
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:90.0) Gecko/20100101 Firefox/90.0'
}
def main(url):
with requests.Session() as req:
req.headers.update(headers)
for page in range(1, 3): # increase the number of pages from here.
params = {
'queryName': 'detailsPosts_tag_page{}'.format(page),
'token': '',
'clientid': '812f41b2-9936-4405-aa9c-378db19b8cc4',
'version': '8.2.9 , 10 18 - 7 - 21'
}
data = {
"query": "query($tag:String,$page:Int) { posts( tag:$tag, page:$page) {\n\t\titems {\n\t\t\tid status authorUsername title city postDate updateDate hasImage thumbURL authorId bodyHTML bodyTEXT city tags imagesList commentStatus commentCount upRank downRank geoHash geoCity geoNeighborhood\n\t\t}\n\t\tpageInfo {\n\t\t\thasNextPage\n\t\t}\n\t\t} }",
"variables": {
"page": page,
"tag": "حراج العقار"
}
}
r = req.post(url, params=params, json=data)
if r.status_code == 200:
for i in r.json()['data']['posts']['items']:
pp(i)
# check i.keys()
else:
exit(f"Page# {page} is not exist, However program stopped.")
main('https://graphql.haraj.com.sa/')