首页 > 解决方案 > 无法使用 PyQt5 获取 html

问题描述

我曾经PyQt5得到html。

它工作正常。

但是,我无法获取网址的第二页。

我应该怎么办?

import sys
from PyQt5 import QtCore, QtWidgets, QtWebEngineWidgets

class WebPage(QtWebEngineWidgets.QWebEnginePage):
    def __init__(self):
        super(WebPage, self).__init__()
        self.loadFinished.connect(self.handleLoadFinished)
        self.data = []

    def start(self, urls):
        self._urls = iter(urls)
        self.fetchNext()

    def fetchNext(self):
        try:
            url = next(self._urls)
        except StopIteration:
            return False
        else:
            self.load(QtCore.QUrl(url))
        return True

    def processCurrentPage(self, html):
        url = self.url().toString()
        self.data.append(html)
        if not self.fetchNext():
            QtWidgets.qApp.quit()

    def handleLoadFinished(self):
        self.toHtml(self.processCurrentPage)

if __name__ == '__main__':
    urls = ['https://www.hello.com/p1', 'https://www.haha.com/p1']
    app = QtWidgets.QApplication(sys.argv)
    webpage = WebPage()
    webpage.start(urls)
    first_result = webpage.data #empty, why?
    new_urls = [i for i in urls if needToGetHtml(i)]
    webpage.start(new_urls)
    sys.exit(app.exec_())

修改的:

if __name__ == '__main__':
    urls = ['https://www.hello.com/p1', 'https://www.haha.com/p1']
    app = QtWidgets.QApplication(sys.argv)
    webpage = WebPage()
    webpage.start(urls)
    ret = app.exec_()
    first_result = webpage.data
    new_urls = [i for i in urls if needToGetHtml(i)]
    webpage.start(new_urls)
    ret = app.exec_()
    second_result = webpage.data
    sys.exit(ret)

标签: pythonpyqt5

解决方案


请记住,Qt 是异步的,因此根据您提供的代码,将在 eventloop 停止后获取数据:

if not self.fetchNext():
    QtWidgets.qApp.quit()

所以你必须在app.exec_()(你不应该使用sys.exit())之后获取数据:

if __name__ == '__main__':
    urls = ['https://www.hello.com/p1', 'https://www.haha.com/p1']
    app = QtWidgets.QApplication(sys.argv)
    webpage = WebPage()
    webpage.start(urls)
    ret = app.exec_()
    print(webpage.data)
    sys.exit(ret)

推荐阅读