首页 > 解决方案 > 如何在功能中应用多进程池 apply_async?

问题描述

我有一个计算 url 的混合内容数据的函数,我想使用多进程运行它,但每次我都没有得到任何结果。谁能告诉我在这里做错了什么?

通常它运行完美,但我想检查如果我使用多进程在整体执行时间方面是否有任何区别。在应用程序中它应该有 1000+ url 可以处理。

from bs4 import BeautifulSoup
import requests
import multiprocessing as mp

def MixedContentCheck(url:str):
    try:
        data_set = { "http_status":0,"https_status":0,"insecure_link_count":0,"insecure_links":[] }
        insecure_link_count = 0
        insecure_links = []
        url_clean = url.split('://')
        url_http = 'http://'+url_clean[1]
        url_https = 'https://'+url_clean[1]
        http_response = requests.get(url_http, headers=HEADERS, allow_redirects=False)
        data_set['http_status'] = http_response.status_code
        data_set['http_redirect'] = http_response.is_redirect
        https_response = requests.get(url_https, headers=HEADERS, allow_redirects=False)
        data_set['https_status'] = https_response.status_code
        if https_response.status_code in redirect_code:
            data_set['redirect_loop'] = "Present"
        else:
            data_set['redirect_loop'] = "Absent"

        if https_response.status_code == 200 and http_response.status_code == 200:
            data_set['mixed_content'] = "Present"
        else:
            data_set['mixed_content'] = "Absent"
        if https_response.status_code == 200:
            soup = BeautifulSoup(https_response.content, 'lxml')
            for ltag in soup.find_all("link"):
                try:
                    # insecure_link_count = insecure_link_count + (1 if ltag['href'].find('http://') > 0 else 0)
                    if (ltag['href'].startswith('http://')==True):
                        insecure_link_count = insecure_link_count + 1
                        insecure_links.append(ltag['href'])
    
                except:
                    pass        

            for stag in soup.find_all("script"):
                try:
                    # insecure_link_count = insecure_link_count + (1 if stag['src'].find('http://') > 0 else 0)
                    if (stag['src'].startswith('http://')==True):
                        insecure_link_count = insecure_link_count + 1
                        insecure_links.append(stag['src'])

                except:
                    pass
            for link in soup.find_all("a",href=True):
                try:
                    # insecure_link_count = insecure_link_count + (1 if stag['src'].find('http://') > 0 else 0)
                    if (link['href'].startswith('http://')==True):
                        insecure_link_count = insecure_link_count + 1
                        insecure_links.append(link['href'])
    
                except:
                    pass
        
        data_set['insecure_link_count'] = insecure_link_count
        data_set['insecure_links'] = insecure_links
        return { 'status': True, 'url': url, 'data':data_set }
    except:
        return {'status': False, 'url': url, 'message': 'URL is Invalid!!! OR Website Not Found', 'url_status_code': 404}

urls=["https://shop.heavyglare.com",
"https://shop.heavyglare.com/contact",
"https://shop.heavyglare.com/deals",
"https://shop.heavyglare.com/promo",
"https://shop.heavyglare.com/blog",
"https://shop.heavyglare.com/customer/account/login",
"https://shop.heavyglare.com/catalogsearch/advanced",
"https://shop.heavyglare.com/wishlist",
"https://shop.heavyglare.com/checkout/cart",
"https://shop.heavyglare.com/accessories",
"https://www.jockey.in/women/collection/relax",
"https://www.jockey.in/men/accessories"]

result_list = []
def log_result(result):
    # This is called whenever foo_pool(i) returns a result.
    # result_list is modified only by the main process, not the pool workers.
    result_list.append(result)


def apply_async_with_callback():
    pool = mp.Pool()
    for i in range(len(urls)):
        async_result=pool.apply_async(MixedContentCheck, args=(urls[i],), callback=log_result)
        async_result.wait()

    pool.close()
    pool.join()
    print(result_list)



if __name__ == '__main__':
    apply_async_with_callback()

我每次都得到空白列表,但是当您正常调用它时,该功能就可以了。请帮助。

标签: pythonpython-3.x

解决方案


推荐阅读