首页 > 解决方案 > Excel VBA 抓取 - HTML 表格不可见

问题描述

我正在尝试使用 excel vba 网站抓取从“https://in.tradingview.com/symbols/NSE-ABB/technicals/”获取数据,尽管我得到了响应,但 body.innerHTML 没有显示所需的表格,但是在 chrome 中,如果我检查页面,我可以看到带有名称的表格。

代码有什么问题?

With CreateObject("MSXML2.XMLHTTP")
        .Open "GET", URL, False
        .send
        sResponse = StrConv(.responseBody, vbUnicode)
    End With
    
    sResponse = Mid$(sResponse, InStr(1, sResponse, "<!DOCTYPE "))
    WriteTxtFile sResponse
    With html
        .body.innerHTML = sResponse
        Set tElementC = .getElementsByClassName("table-1i1M26QY- maTable-27Z4Dq6Y- tableWithAction-2OCRQQ8y-")(0).getElementsByTagName("td")
        
    End With

URL --> https://in.tradingview.com/symbols/NSE-ABB/technicals/ 访问类名 = "table-1i1M26QY- maTable-27Z4Dq6Y- tableWithAction-2OCRQQ8y-"

标签: htmlexcelvbaweb-scraping

解决方案


链接提供的网页源 HTML https://in.tradingview.com/symbols/NSE-ABB/technicals/不包含必要的数据,它使用 AJAX。该网站有一个可用的 API。响应以 JSON 格式返回。所以你需要先做一些逆向工程来了解网站是如何工作的。在浏览器中,例如 Chrome,按下F12打开 DevTools,导航到网页,进入 Network 选项卡,将过滤器设置为 XHR,如下图所示:

网络选项卡

检查记录的响应。其中一个具有最大尺寸的实际上包含所有必要的数据:

json响应

要制作这样的 XHR,您还需要保留整个有效负载结构,并添加相关标头:

标题和表单数据

在表单数据部分有很多报价字段标题位于数组中,因此您可以选择实际需要的标题。您可能会找到更多可用的标题,单击 Initiator 链接(上面的第一个屏幕截图),您将看到启动该 XHR 的 JS 代码。单击{}底部的漂亮打印以使代码可读。在搜索框中输入您已经从表单数据中提取的任何标题,例如Recommend.Other,然后在代码中找到它旁边的其他标题:

引用字段标题

这是 VBA 示例,展示了如何进行此类抓取。将 JSON.bas模块导入VBA 项目以进行 JSON 处理。

Option Explicit

Sub Test()

    Dim aQuoteFieldTitles()
    Dim aQuoteFieldData()
    Dim sPayload As String
    Dim sJSONString As String
    Dim vJSON
    Dim sState As String
    Dim i As Long

    ' Put the necessary field titles into array
    aQuoteFieldTitles = Array( _
        "name", "description", "country", "type", "after_tax_margin", "average_volume", "average_volume_30d_calc", "average_volume_60d_calc", "average_volume_90d_calc", "basic_eps_net_income", "beta_1_year", "beta_3_year", "beta_5_year", "current_ratio", "debt_to_assets", "debt_to_equity", "dividends_paid", "dividends_per_share_fq", _
        "dividends_yield", "dps_common_stock_prim_issue_fy", "earnings_per_share_basic_ttm", "earnings_per_share_diluted_ttm", "earnings_per_share_forecast_next_fq", "earnings_per_share_fq", "earnings_release_date", "earnings_release_next_date", "ebitda", "enterprise_value_ebitda_ttm", "enterprise_value_fq", "exchange", "expected_annual_dividends", _
        "gross_margin", "gross_profit", "gross_profit_fq", "industry", "last_annual_eps", "last_annual_revenue", "long_term_capital", "market_cap_basic", "market_cap_calc", "net_debt", "net_income", "number_of_employees", "number_of_shareholders", "operating_margin", _
        "pre_tax_margin", "preferred_dividends", "price_52_week_high", "price_52_week_low", "price_book_ratio", "price_earnings_ttm", "price_revenue_ttm", "price_sales_ratio", "quick_ratio", "return_of_invested_capital_percent_ttm", "return_on_assets", "return_on_equity", "return_on_invested_capital", "revenue_per_employee", "sector", _
        "eps_surprise_fq", "eps_surprise_percent_fq", "total_assets", "total_capital", "total_current_assets", "total_debt", "total_revenue", "total_shares_outstanding_fundamental", "volume", "relative_volume", "pre_change", "post_change", "close", "open", "high", "low", "gap", "price_earnings_to_growth_ttm", "price_sales", "price_book_fq", _
        "price_free_cash_flow_ttm", "float_shares_outstanding", "total_shares_outstanding", "change_from_open", "change_from_open_abs", "Perf.W", "Perf.1M", "Perf.3M", "Perf.6M", "Perf.Y", "Perf.YTD", "Volatility.W", "Volatility.M", "Volatility.D", "RSI", "RSI7", "ADX", "ADX+DI", "ADX-DI", "ATR", "Mom", "High.All", "Low.All", "High.6M", "Low.6M", _
        "High.3M", "Low.3M", "High.1M", "Low.1M", "EMA5", "EMA10", "EMA20", "EMA30", "EMA50", "EMA100", "EMA200", "SMA5", "SMA10", "SMA20", "SMA30", "SMA50", "SMA100", "SMA200", "Stoch.K", "Stoch.D", "MACD.macd", "MACD.signal", "Aroon.Up", "Aroon.Down", "BB.upper", "BB.lower", "goodwill", "debt_to_equity_fq", "CCI20", "DonchCh20.Upper", _
        "DonchCh20.Lower", "HullMA9", "AO", "Pivot.M.Classic.S3", "Pivot.M.Classic.S2", "Pivot.M.Classic.S1", "Pivot.M.Classic.Middle", "Pivot.M.Classic.R1", "Pivot.M.Classic.R2", "Pivot.M.Classic.R3", "Pivot.M.Fibonacci.S3", "Pivot.M.Fibonacci.S2", "Pivot.M.Fibonacci.S1", "Pivot.M.Fibonacci.Middle", "Pivot.M.Fibonacci.R1", _
        "Pivot.M.Fibonacci.R2", "Pivot.M.Fibonacci.R3", "Pivot.M.Camarilla.S3", "Pivot.M.Camarilla.S2", "Pivot.M.Camarilla.S1", "Pivot.M.Camarilla.Middle", "Pivot.M.Camarilla.R1", "Pivot.M.Camarilla.R2", "Pivot.M.Camarilla.R3", "Pivot.M.Woodie.S3", "Pivot.M.Woodie.S2", "Pivot.M.Woodie.S1", "Pivot.M.Woodie.Middle", "Pivot.M.Woodie.R1", _
        "Pivot.M.Woodie.R2", "Pivot.M.Woodie.R3", "Pivot.M.Demark.S1", "Pivot.M.Demark.Middle", "Pivot.M.Demark.R1", "KltChnl.upper", "KltChnl.lower", "P.SAR", "Value.Traded", "MoneyFlow", "ChaikinMoneyFlow", "Recommend.All", "Recommend.MA", "Recommend.Other", "Stoch.RSI.K", "Stoch.RSI.D", "W.R", "ROC", "BBPower", "UO", "Ichimoku.CLine", _
        "Ichimoku.BLine", "Ichimoku.Lead1", "Ichimoku.Lead2", "VWMA", "ADR", "RSI[1]", "Stoch.K[1]", "Stoch.D[1]", "CCI20[1]", "ADX-DI[1]", "AO[1]", "Mom[1]", "Rec.Stoch.RSI", "Rec.WR", "Rec.BBPower", "Rec.UO", "Rec.Ichimoku", "Rec.VWMA", "Rec.HullMA9" _
    )

    ' Field titles exactly as in the table MOVING AVERAGES
    ' aQuoteFieldTitles = Array("EMA5", "SMA5", "EMA10", "SMA10", "EMA20", "SMA20", "EMA30", "SMA30", "EMA50", "SMA50", "EMA100", "SMA100", "EMA200", "SMA200", "Ichimoku.BLine", "VWMA", "HullMA9")

    ' Compose payload
    sPayload = "{""symbols"":{""tickers"":[""NSE:ABB""],""query"":{""types"":[]}},""columns"":" & JSON.Serialize(aQuoteFieldTitles) & "}"
    ' Retrieve JSON response
    With CreateObject("MSXML2.XMLHTTP")
        .Open "POST", "https://scanner.tradingview.com/india/scan", True
        .setRequestHeader "content-type", "application/x-www-form-urlencoded"
        .setRequestHeader "user-agent", "Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/70.0.3538.110 Safari/537.36"
        .setRequestHeader "content-length", Len(sPayload)
        .send (sPayload)
        Do Until .readyState = 4: DoEvents: Loop
        sJSONString = .responseText
    End With
    ' Parse JSON response
    JSON.Parse sJSONString, vJSON, sState
    ' Check response validity
    Select Case True
        Case sState <> "Object"
            MsgBox "Invalid JSON response"
        Case IsNull(vJSON("data"))
            MsgBox vJSON("error")
        Case Else
            ' Output data to worksheet #1
            aQuoteFieldData = vJSON("data")(0)("d")
            With ThisWorkbook.Sheets(1)
                .Cells.Delete
                .Cells.WrapText = False
                For i = 0 To UBound(aQuoteFieldTitles)
                    .Cells(i + 1, 1).Value = aQuoteFieldTitles(i)
                    .Cells(i + 1, 2).Value = aQuoteFieldData(i)
                Next
                .Columns.AutoFit
            End With
            MsgBox "Completed"
    End Select

End Sub

我的输出如下:

输出

顺便说一句,其他答案中应用的类似方法。


推荐阅读