首页 > 解决方案 > 请求 Python,如何美化这段代码并做些什么?

问题描述

所以问题是我不明白如何美化这段代码,请帮帮我代码通过这个网站的链接树,我需要保存那里提供的所有文件。但我被困在过渡上。我不知道如何在没有循环的情况下通过循环

import requests
from bs4 import BeautifulSoup

links = 'https://daten.gdz.bkg.bund.de/produkte/'
info = []


def get_hrefs():
    global link5
    r = requests.get(links)

    # print(r)
    soup = BeautifulSoup(r.content, 'html.parser')
    for linkse in soup.find_all('a'):
        link = linkse.get('href')
        # return links+link
        # print(links + link) take all links of href
        print('______________________')
        if '?C=' in link:
            pass
        else:
            r2 = requests.get(links + link)
            soup = BeautifulSoup(r2.content, 'html.parser')
            for linkse in soup.find_all('a'):
                link2 = linkse.get('href')
                # return links+link
                # print(links + link + link2)
                if '?C=' in link2:
                    pass
                else:
                    r3 = requests.get(links + link + link2)
                    soup = BeautifulSoup(r3.content, 'html.parser')
                    for linkse in soup.find_all('a'):
                        link3 = linkse.get('href')
                        # return links+link
                        # print(links + link + link2+link3)
                        if '?C=' in link3:
                            pass
                        else:

                            r4 = requests.get(links + link + link2 + link3)
                            soup = BeautifulSoup(r4.content, 'html.parser')
                            for linkse in soup.find_all('a'):
                                link4 = linkse.get('href')
                                # return links+link
                                print(links + link + link2 + link3 + link4)
                                if '?C=' in link4:
                                    pass
                                else:
                                    try:
                                        r5 = requests.get(links + link + link2 + link3 + link4)
                                        soup = BeautifulSoup(r4.content, 'html.parser')
                                        for linkse in soup.find_all('a'):
                                            link5 = linkse.get('href')
                                            # return links+link
                                            print(links + link + link2 + link3 + link4 + link5)
                                            # info = links + link + link2 + link3 + link4 + link5

                                    except:
                                        info.append(links + link + link2 + link3 + link4 + link5)

        print('--------------------INFO')
        print(info)
        print("________________END")
def main():
    link = get_hrefs()
    # link2 = get_another_link(link)


if __name__ == '__main__':
    main()

标签: python-3.xpython-requests

解决方案


我相信您正在尝试遍历链接层次结构并在最后找出某些文件(zip)的最终 url。
即使不是这种情况,您应该做的是Move repeating code into a function并使用它recursion来避免任何循环。

以下示例代码打印出最终的 zip 文件链接

import requests
from bs4 import BeautifulSoup

links = 'https://daten.gdz.bkg.bund.de/produkte/'
info = []

def get_hrefs(rootlink):
    r = requests.get(rootlink)
    soup = BeautifulSoup(r.content, 'html.parser')
    for linkse in soup.find_all('a'):
        link = linkse.get('href')
        if '?C=' in link:
            pass
        elif link.endswith('.zip'):
            print("Final link ", rootlink + link)
            return
        else:
          get_hrefs(rootlink + link)
          
if __name__ == '__main__':
    get_hrefs(links)

推荐阅读