python - 如何在功能中应用多进程池 apply_async?
问题描述
我有一个计算 url 的混合内容数据的函数,我想使用多进程运行它,但每次我都没有得到任何结果。谁能告诉我在这里做错了什么?
通常它运行完美,但我想检查如果我使用多进程在整体执行时间方面是否有任何区别。在应用程序中它应该有 1000+ url 可以处理。
from bs4 import BeautifulSoup
import requests
import multiprocessing as mp
def MixedContentCheck(url:str):
try:
data_set = { "http_status":0,"https_status":0,"insecure_link_count":0,"insecure_links":[] }
insecure_link_count = 0
insecure_links = []
url_clean = url.split('://')
url_http = 'http://'+url_clean[1]
url_https = 'https://'+url_clean[1]
http_response = requests.get(url_http, headers=HEADERS, allow_redirects=False)
data_set['http_status'] = http_response.status_code
data_set['http_redirect'] = http_response.is_redirect
https_response = requests.get(url_https, headers=HEADERS, allow_redirects=False)
data_set['https_status'] = https_response.status_code
if https_response.status_code in redirect_code:
data_set['redirect_loop'] = "Present"
else:
data_set['redirect_loop'] = "Absent"
if https_response.status_code == 200 and http_response.status_code == 200:
data_set['mixed_content'] = "Present"
else:
data_set['mixed_content'] = "Absent"
if https_response.status_code == 200:
soup = BeautifulSoup(https_response.content, 'lxml')
for ltag in soup.find_all("link"):
try:
# insecure_link_count = insecure_link_count + (1 if ltag['href'].find('http://') > 0 else 0)
if (ltag['href'].startswith('http://')==True):
insecure_link_count = insecure_link_count + 1
insecure_links.append(ltag['href'])
except:
pass
for stag in soup.find_all("script"):
try:
# insecure_link_count = insecure_link_count + (1 if stag['src'].find('http://') > 0 else 0)
if (stag['src'].startswith('http://')==True):
insecure_link_count = insecure_link_count + 1
insecure_links.append(stag['src'])
except:
pass
for link in soup.find_all("a",href=True):
try:
# insecure_link_count = insecure_link_count + (1 if stag['src'].find('http://') > 0 else 0)
if (link['href'].startswith('http://')==True):
insecure_link_count = insecure_link_count + 1
insecure_links.append(link['href'])
except:
pass
data_set['insecure_link_count'] = insecure_link_count
data_set['insecure_links'] = insecure_links
return { 'status': True, 'url': url, 'data':data_set }
except:
return {'status': False, 'url': url, 'message': 'URL is Invalid!!! OR Website Not Found', 'url_status_code': 404}
urls=["https://shop.heavyglare.com",
"https://shop.heavyglare.com/contact",
"https://shop.heavyglare.com/deals",
"https://shop.heavyglare.com/promo",
"https://shop.heavyglare.com/blog",
"https://shop.heavyglare.com/customer/account/login",
"https://shop.heavyglare.com/catalogsearch/advanced",
"https://shop.heavyglare.com/wishlist",
"https://shop.heavyglare.com/checkout/cart",
"https://shop.heavyglare.com/accessories",
"https://www.jockey.in/women/collection/relax",
"https://www.jockey.in/men/accessories"]
result_list = []
def log_result(result):
# This is called whenever foo_pool(i) returns a result.
# result_list is modified only by the main process, not the pool workers.
result_list.append(result)
def apply_async_with_callback():
pool = mp.Pool()
for i in range(len(urls)):
async_result=pool.apply_async(MixedContentCheck, args=(urls[i],), callback=log_result)
async_result.wait()
pool.close()
pool.join()
print(result_list)
if __name__ == '__main__':
apply_async_with_callback()
我每次都得到空白列表,但是当您正常调用它时,该功能就可以了。请帮助。
解决方案
推荐阅读
- javascript - 回调与异步回调
- java - 如何遍历 JSTL 中的对象列表
- python - 我怎样才能永远来回旋转这个对象?
- node.js - Node.js 听键盘并以交互方式阅读该行?
- search - 如何在 Prestashop 中配置搜索页面以隐藏/替换搜索结果中的不便消息抱歉
- asp.net-core - 如何在 Blazor 应用程序中重命名捆绑的静态文件(ProjectName.style.css 和 blazor.server.js)
- flutter - 如何根据flutter中的int值以列表形式订购Firestore文档
- intellij-idea - Intellij 的 Error Stripe 侧栏中的误导性警告
- python - 通过推文的pandas字符串列中的单词列表查找子字符串
- python-3.x - 熊猫构建失败 M1 Big Sur 11.1