首页 > 解决方案 > Flask、Asyncio、BS4、Requests-html ValueError:信号仅在主解释器的主线程中有效

问题描述

ValueError: signal only works in main thread of the main interpreter尝试运行以下代码时出现错误:

from bs4 import BeautifulSoup
from requests_html import AsyncHTMLSession
import asyncio
from flask import Flask, render_template, request
from wtforms import Form, StringField, SubmitField
import pandas

base_url="https://www.airbnb.com/s/"
request_header = {'User-agent': 'Mozilla/5.0 (Linux; Android 6.0; Nexus 5 Build/MRA58N) '
                    'AppleWebKit/537.36 (KHTML, like Gecko) Chrome/93.0.4577.82 Mobile Safari/537.36'}

async def find_num_pages(asession, search_area, checkin_date, checkout_date, adults, children, infants):
    search_area_fixed = search_area.replace(" ","-").replace(",","-")
    scrape_url = f"{base_url}/{search_area_fixed}/homes?checkin={checkin_date}&checkout={checkout_date}&adults={adults}&children={children}&infants={infants}&items_offset="
    r = await asession.get(scrape_url+"0", headers=request_header)
    await r.html.arender()
    soup = BeautifulSoup(r.html.html, "lxml")
    number_of_pages = soup.find_all("a", {"class": "_1y623pm"})[-1].text
    return number_of_pages

async def main(search_area, checkin_date, checkout_date, adults, children, infants):
    asession = AsyncHTMLSession()
    num_pages = await find_num_pages(asession=asession, search_area=search_area, checkin_date=checkin_date, checkout_date=checkout_date, adults=adults, children=children, infants=infants)
    await asession.close()
    print(num_pages)
    return num_pages

app = Flask(__name__)

default = ["Tampa, FL 33602", "2021-09-27", "2021-10-01", "4", "1", "1"]
class InputForm(Form):
    search_area = StringField("Search Area (City, ST #####): ", default=default[0])
    checkin_date = StringField("Check In Date (YYYY-MM-DD): ", default=default[1])
    checkout_date = StringField("Check Out Date (YYYY-MM-DD): ", default=default[2])
    adults = StringField("Adults: ", default=default[3])
    children = StringField("Children: ", default=default[4])
    infants = StringField("Infants: ", default=default[5])
    button = SubmitField("Submit")

@app.route("/", methods=['GET', 'POST'])
async def HomePage():
    if request.method == "GET":
        input_form = InputForm(request.form)
        return render_template('index_test.html', inputform=input_form)

    if request.method == "POST":
        input_form = InputForm(request.form)
        result = await asyncio.gather(main(search_area = input_form.search_area.data, checkin_date = input_form.checkin_date.data, checkout_date = input_form.checkout_date.data, adults = input_form.adults.data, children = input_form.children.data, infants = input_form.infants.data))
        table = result
        return render_template('index_test.html', inputform=input_form, property=property, table=table, result=True)

if __name__ == "__main__":
    # asyncio.run(main(search_area = default[0], checkin_date = default[1], checkout_date = default[2],adults = default[3],children = default[4], infants = default[5]))
    app.run(debug=True, use_reloader=False)

这是 index_test.html 代码:

<!DOCTYPE html>
<html lang="en">
<head>
    <meta charset="UTF-8">
</head>
<body>
    <form action="/" method="post">
        <p>Input Property Info</p>
        {{inputform.search_area.label}} {{inputform.search_area}}
        <br>
        {{inputform.checkin_date.label}} {{inputform.checkin_date}}
        <br>
        {{inputform.checkout_date.label}} {{inputform.checkout_date}}
        <br>        
        {{inputform.adults.label}} {{inputform.adults}}
        <br>
        {{inputform.children.label}} {{inputform.children}}
        <br>
        {{inputform.infants.label}} {{inputform.infants}}
        <br>
        {{inputform.button}}
        <br>
        {% if result==True %}
            <div>{{ table | safe }}</div>
        {% endif %}
    </form>
</body>
</html>

这是回溯:

Traceback (most recent call last)
File "C:\Users\JOHN\Documents\AirBnb_Tool\.airbnb\Lib\site-packages\flask\app.py", line 2088, in __call__
return self.wsgi_app(environ, start_response)
File "C:\Users\JOHN\Documents\AirBnb_Tool\.airbnb\Lib\site-packages\flask\app.py", line 2073, in wsgi_app
response = self.handle_exception(e)
File "C:\Users\JOHN\Documents\AirBnb_Tool\.airbnb\Lib\site-packages\flask\app.py", line 2070, in wsgi_app
response = self.full_dispatch_request()
File "C:\Users\JOHN\Documents\AirBnb_Tool\.airbnb\Lib\site-packages\flask\app.py", line 1515, in full_dispatch_request
rv = self.handle_user_exception(e)
File "C:\Users\JOHN\Documents\AirBnb_Tool\.airbnb\Lib\site-packages\flask\app.py", line 1513, in full_dispatch_request
rv = self.dispatch_request()
File "C:\Users\JOHN\Documents\AirBnb_Tool\.airbnb\Lib\site-packages\flask\app.py", line 1499, in dispatch_request
return self.ensure_sync(self.view_functions[rule.endpoint])(**req.view_args)
File "C:\Users\JOHN\Documents\AirBnb_Tool\.airbnb\Lib\site-packages\asgiref\sync.py", line 223, in __call__
return call_result.result()
File "C:\Program Files\Python39\Lib\concurrent\futures\_base.py", line 438, in result
return self.__get_result()
File "C:\Program Files\Python39\Lib\concurrent\futures\_base.py", line 390, in __get_result
raise self._exception
File "C:\Users\JOHN\Documents\AirBnb_Tool\.airbnb\Lib\site-packages\asgiref\sync.py", line 292, in main_wrap
result = await self.awaitable(*args, **kwargs)
File "C:\Users\JOHN\Documents\AirBnb_Tool\help.py", line 48, in HomePage
result = await asyncio.gather(main(search_area = input_form.search_area.data, checkin_date = input_form.checkin_date.data, checkout_date = input_form.checkout_date.data, adults = input_form.adults.data, children = input_form.children.data, infants = input_form.infants.data))
File "C:\Users\JOHN\Documents\AirBnb_Tool\help.py", line 23, in main
num_pages = await find_num_pages(asession=asession, search_area=search_area, checkin_date=checkin_date, checkout_date=checkout_date, adults=adults, children=children, infants=infants)
File "C:\Users\JOHN\Documents\AirBnb_Tool\help.py", line 16, in find_num_pages
await r.html.arender()
File "C:\Users\JOHN\Documents\AirBnb_Tool\.airbnb\Lib\site-packages\requests_html.py", line 615, in arender
self.browser = await self.session.browser
File "C:\Users\JOHN\Documents\AirBnb_Tool\.airbnb\Lib\site-packages\requests_html.py", line 714, in browser
self._browser = await pyppeteer.launch(ignoreHTTPSErrors=not(self.verify), headless=True, args=self.__browser_args)
File "C:\Users\JOHN\Documents\AirBnb_Tool\.airbnb\Lib\site-packages\pyppeteer\launcher.py", line 307, in launch
return await Launcher(options, **kwargs).launch()
File "C:\Users\JOHN\Documents\AirBnb_Tool\.airbnb\Lib\site-packages\pyppeteer\launcher.py", line 159, in launch
signal.signal(signal.SIGINT, _close_process)
File "C:\Program Files\Python39\Lib\signal.py", line 47, in signal
handler = _signal.signal(_enum_to_int(signalnum), _enum_to_int(handler))
ValueError: signal only works in main thread of the main interpreter

当我只使用main()函数而不使用 Flask 应用程序来抓取数据时,代码运行良好。有一个注释部分可以main()使用asyncio.run(). 我已经收集到你不能在 Flask 中使用,所以我根据https://flask.palletsprojects.com/en/2.0.x/async-await/和我看到的其他人asyncio.run()使用了内置信息正在做。我在 Flask 异步函数和异步运行的函数中使用。await asyncio.gather()await

我还尝试在多个论坛上搜索如何解决此问题,但没有找到任何可行的方法。我尝试模仿 Patrick 在本文中所做的事情:https ://testdriven.io/blog/flask-async/ 。

我的系统是 Windows 10,我通过虚拟环境通过 PowerShell 运行该应用程序。

更新:我对其进行了更多研究,看来我可能需要使用 celery 和可能的 RQ 添加任务队列。如果需要,您能告诉我吗?我不想按计划运行它,只有在按下提交按钮时。

谢谢您的帮助!

标签: pythonflaskweb-scrapingpython-asynciovalueerror

解决方案


我能够使用WsgiToAsgiand解决问题hypercorn。显然问题是除非您使用 ASGI,否则 Flask 只会在一个线程上运行。

进口:

from asgiref.wsgi import WsgiToAsgi
from hypercorn.config import Config
from hypercorn.asyncio import serve

除了我如何调用应用程序在最后运行之外,代码保持不变:

if __name__ == "__main__":
    asgi_app = WsgiToAsgi(app)
    asyncio.run(serve(asgi_app, Config()))

这里有更多信息:
https ://flask.palletsprojects.com/en/2.0.x/deploying/asgi/#asgi https://gitlab.com/pgjones/hypercorn/-/blob/master/README.rst

我希望这对将来遇到此问题的其他人有所帮助。


推荐阅读