python-3.x - 请求 Python,如何美化这段代码并做些什么?
问题描述
所以问题是我不明白如何美化这段代码,请帮帮我代码通过这个网站的链接树,我需要保存那里提供的所有文件。但我被困在过渡上。我不知道如何在没有循环的情况下通过循环
import requests
from bs4 import BeautifulSoup
links = 'https://daten.gdz.bkg.bund.de/produkte/'
info = []
def get_hrefs():
global link5
r = requests.get(links)
# print(r)
soup = BeautifulSoup(r.content, 'html.parser')
for linkse in soup.find_all('a'):
link = linkse.get('href')
# return links+link
# print(links + link) take all links of href
print('______________________')
if '?C=' in link:
pass
else:
r2 = requests.get(links + link)
soup = BeautifulSoup(r2.content, 'html.parser')
for linkse in soup.find_all('a'):
link2 = linkse.get('href')
# return links+link
# print(links + link + link2)
if '?C=' in link2:
pass
else:
r3 = requests.get(links + link + link2)
soup = BeautifulSoup(r3.content, 'html.parser')
for linkse in soup.find_all('a'):
link3 = linkse.get('href')
# return links+link
# print(links + link + link2+link3)
if '?C=' in link3:
pass
else:
r4 = requests.get(links + link + link2 + link3)
soup = BeautifulSoup(r4.content, 'html.parser')
for linkse in soup.find_all('a'):
link4 = linkse.get('href')
# return links+link
print(links + link + link2 + link3 + link4)
if '?C=' in link4:
pass
else:
try:
r5 = requests.get(links + link + link2 + link3 + link4)
soup = BeautifulSoup(r4.content, 'html.parser')
for linkse in soup.find_all('a'):
link5 = linkse.get('href')
# return links+link
print(links + link + link2 + link3 + link4 + link5)
# info = links + link + link2 + link3 + link4 + link5
except:
info.append(links + link + link2 + link3 + link4 + link5)
print('--------------------INFO')
print(info)
print("________________END")
def main():
link = get_hrefs()
# link2 = get_another_link(link)
if __name__ == '__main__':
main()
解决方案
我相信您正在尝试遍历链接层次结构并在最后找出某些文件(zip)的最终 url。
即使不是这种情况,您应该做的是Move repeating code into a function
并使用它recursion
来避免任何循环。
以下示例代码打印出最终的 zip 文件链接
import requests
from bs4 import BeautifulSoup
links = 'https://daten.gdz.bkg.bund.de/produkte/'
info = []
def get_hrefs(rootlink):
r = requests.get(rootlink)
soup = BeautifulSoup(r.content, 'html.parser')
for linkse in soup.find_all('a'):
link = linkse.get('href')
if '?C=' in link:
pass
elif link.endswith('.zip'):
print("Final link ", rootlink + link)
return
else:
get_hrefs(rootlink + link)
if __name__ == '__main__':
get_hrefs(links)
推荐阅读
- javascript - 通过缩小 graphql 查询,我是否会遇到问题
- django - How to change permission of django project?
- c# - 无法发布活动。取消授权。机器人模拟器错误
- scikit-learn - 如何正确缩放新数据点 sklearn
- reporting-services - SSRS - 参数中传递的多个值的多个 tablix
- json - 使用 UISearchBar 文本搜索 UITableView 单元格
- tcl - 需要帮助消除我的代码中的竞争条件
- phpmyadmin - 如何删除 XAMPP 但保留其模块如 mysql?
- c++ - 具有特定属性的范围内的最小数量
- ruby-on-rails - puts + .inspect 不显示 has_many 属性