首页 > 解决方案 > 如何在解决之前等待承诺中的所有代码完成?(但有点复杂)

问题描述

抱歉这个非常令人困惑的问题,我有这段代码可以从没有任何节点模块或库的网站获取信息。?page=它是在 URL 末尾使用的不同页面中的用户列表。我已经设法遍历页面并恰到好处地拆分原始 HTML。但是,我的承诺在收集所有数据之前就解决了。在我解决承诺之前,我如何才能等待一切都完成?我尝试了无数解决方案,但似乎没有一个有效。请不要要求使用节点包,因为我的目标是不使用一个:) 一位朋友帮助使用正则表达式并将其拆分。这是我正在使用的代码:

function getData() {
    return new Promise((resolve, reject) => {
        let final = [] //the array of users returned in the end
        const https = require("https"), url = "https://buildtheearth.net/buildteams/121/members";
        https.get(url + "?page=1", request => { //initial request, gets the number of user pages.
            let rawList = '';

            request.setEncoding("utf8"), 
            request.on("data", data => {rawList += data}), 
            request.on("end", () => {
                if(request = (request = (request = rawList.substring(rawList.indexOf('<div class="pagination">'))).substring(0, request.indexOf("</div>"))).match(/<a(.+)>(.+)<\/a>/g)) {
                    for(let t = parseInt(request[request.length - 1].match(/(\d+)(?!.*\d)/g)), a = 1; a < t + 1; a++) { //iterates through member pages
                        https.get(url + "?page=" + a, request2 => { //https request for each page of members
                            let rawList2 = '';
                            
                            request2.setEncoding('utf8'), 
                            request2.on("data", data => {rawList2 += data}), 
                            request2.on("end", () => {
                                let i = rawList2.match(/<td>(.+)<\/td>/g); //finds table in HTML
                                if (i) 
                                    for (var t = 1; t < i.length; t += 3) //iterates through rows in table
                                        console.log(i[t].replace(/<td>/g, "").replace(/<\/td>/g, "")), /* logs element to the console (for testing) */
                                        final.push(i[t].replace(/<td>/g, "").replace(/<\/td>/g, "")); //pushes element to the array that is resolved in the end
                            })
                        })
                        
                    }
                }
                resolve(final) //resolves promise returning final array, but resolves before elements are added with code above
            })
        })
    })
}

如果这有帮助,是我试图从中获取信息的网站。我对 JS 还是有点陌生​​,所以如果您能提供帮助,我将不胜感激 :)

标签: javascriptnode.jsasynchronousget

解决方案


我最终将每个动作变成了一个带有 atrycatch块的异步函数,然后将这些函数链接在一起。.then()对于基础(从网站获取数据),我从Medium上的一篇文章中获得了灵感。是我从中提取数据的网站,这是从网站获取数据的功能:

const getData = async (url) => {
    const lib = url.startsWith('https://') ? https : http;
  
    return new Promise((resolve, reject) => {
        const req = lib.get(url, res => {
            if (res.statusCode < 200 || res.statusCode >= 300) {
                return reject(new Error(`Status Code: ${res.statusCode}`));
            }

            const data = [];

            res.on('data', chunk => data.push(chunk));
            res.on('end', () => resolve(Buffer.concat(data).toString()));
        });
  
        req.on('error', reject);
        req.end();
    });
};

?page=<page number>然后我用这个函数得到了页面的数量(可以通过附加到 url 的末尾来访问):

const pages = async () => {
    try {
        let html = await getData('https://buildtheearth.net/buildteams/121/members',);
        let pages = await (html = (html = html.substring(html.indexOf('<div class="pagination">'))).substring(0, html.indexOf("</div>"))).match(/<a(.+)>(.+)<\/a>/g)
        let pageCount = await parseInt(pages[pages.length - 1].match(/(\d+)(?!.*\d)/g))
        return pageCount
    } catch (error) {
        console.error(error);
    }
}

然后我使用页数遍历页面并使用此函数将每个页面的 HTML 添加到数组中:

const getPages = async pageCount => {
    let returns = []
    try {
        for (page = 1; page <= pageCount; page++) {
            try {
                let pageData = await getData('https://buildtheearth.net/buildteams/121/members?page=' + page)
                returns.push(pageData)
            } catch (error) {
                return error
            }
        }
    } catch (error) {
        return error
    } finally {return returns}
}

然后我遍历每个页面的 HTML 字符串数组,并使用此函数从每个页面中提取我需要的数据,该函数将返回我需要的成员列表:

const iteratePages = async pages => {
    if (!Array.isArray(pages)) return
    try {
        let returns = []
        await pages.forEach(page => {
            let list = page.match(/<td>(.+)<\/td>/g);
            if (list)
                for (var element = 1; element < list.length; element += 3)
                    returns.push(list[element].replace(/<td>/g, "").replace(/<\/td>/g, ""));
        })
        return returns
    } catch (error) {
        return error
    }   
}

然后将每个链接在一起以获得我需要的数组:

pages().then(pageCount => getPages(pageCount)).then(pages => iteratePages(pages)).then(finalList => {console.log(finalList); console.log(finalList.length)})

推荐阅读