首页 > 解决方案 > 如何使用 vba 从网站表中提取数据

问题描述

我在编写将从网页中提取数据的代码时遇到问题。代码的第一部分运行良好,但第二部分我无法正确编写以从站点的表中提取数据。问题是“td”标签,我需要“td”标签中包含的数据,全部或部分。我尝试了很多方法,但都没有成功。任何人都可以帮助我从表中提取这些数据的代码吗?

这是我的代码:

Sub provera_TR_klijenta()

    'check in References: _
    Microsoft Internet Controls _
    Microsoft HTML Object Library
    
    Dim objIE As InternetExplorer 'special object variable representing the IE browser
    Dim aEle As HTMLLinkElement 'special object variable for an <a> (link) element
    
    Dim ieDoc As MSHTML.HTMLDocument
    Dim iframeDoc As MSHTML.HTMLDocument
    
    'initiating a new instance of Internet Explorer and asigning it to objIE
    Set objIE = New InternetExplorer
    
    'make IE browser visible (False would allow IE to run in the background)
    objIE.Visible = True
    objIE.Width = 1000
    objIE.Height = 800
    
    'navigate IE to this web page (a pretty neat search engine really)
    objIE.navigate "https://nbs.rs/en/drugi-nivo-navigacije/servisi/jedinstveni-registar-racuna/index.html"
    'wait here a few seconds while the browser is busy
    Do While objIE.Busy = True Or objIE.readyState <> 4: DoEvents: Loop
    
    Set ieDoc = objIE.document
    Set iframeDoc = ieDoc.frames(0).document
    
    iframeDoc.getElementsByName("matbr")(1).Value = "21122017"
    iframeDoc.getElementsByName("Submit")(0).Click
    
    Do While objIE.Busy = True Or objIE.readyState <> 4: DoEvents: Loop
    
    '*** I don't know what to do from here ***

End Sub

html代码在这个链接上: https ://codebeautify.org/alleditor/cbf73981

标签: htmlexcelvbawebpull

解决方案


我找到了解决办法,谢谢大家!这是正确的代码:

Sub PullDataFromWebsite()

    Dim IE As New SHDocVw.InternetExplorer
    Dim HTMLDoc As MSHTML.HTMLDocument
    Dim HTMLInput As MSHTML.IHTMLElementCollection
    Dim HTMLAs As MSHTML.IHTMLElementCollection
    Dim HTMLA As MSHTML.IHTMLElement
    Dim framesITML As MSHTML.HTMLDocument
    
    IE.Visible = True
    IE.navigate "https://nbs.rs/sr_RS/drugi-nivo-navigacije/servisi/jedinstveni-registar-racuna/"
    
    Do While IE.ReadyState <> READYSTATE_COMPLETE Or IE.Busy
    Loop
    
    Set HTMLDoc = IE.Document
    Set framesITML = HTMLDoc.frames(0).Document
    
    Set HTMLInput = framesITML.getElementsByName("matbr")
    HTMLInput(1).Value = "07364954"
    
    Set HTMLInput = framesITML.getElementsByName("Submit")
    HTMLInput(0).Click
    
    Do While IE.ReadyState <> READYSTATE_COMPLETE Or IE.Busy
    Loop
    
    Dim HTMLTable As MSHTML.IHTMLElement
    Dim HTMLRow As MSHTML.IHTMLElement
    Dim HTMLCell As MSHTML.IHTMLElement
    Dim RowNum As Long, ColNum As Integer
    Dim num_len As Integer

        RowNum = 2
        
        For Each HTMLRow In framesITML.getElementById("result").getElementsByTagName("tr")
            ColNum = 1
            
            For Each HTMLCell In HTMLRow.Children
                num_len = Len(HTMLCell.innerText)
                
                If num_len = 22 Then
                    Cells(RowNum, ColNum) = Trim(HTMLCell.innerText)
                End If
                
            Next HTMLCell
            
            RowNum = Cells(100000, 1).End(xlUp).Offset(1, 0).Row
            
        Next HTMLRow
    
    Set HTMLInput = framesITML.getElementsByClassName("page-link")
    
    If HTMLInput.Length > 0 Then
    
        HTMLInput(0).Click
        
        Do While IE.ReadyState <> READYSTATE_COMPLETE Or IE.Busy
        Loop
        
        RowNum = Cells(100000, 1).End(xlUp).Offset(1, 0).Row
        
        For Each HTMLRow In framesITML.getElementById("result").getElementsByTagName("tr")
            ColNum = 1
            
            For Each HTMLCell In HTMLRow.Children
                num_len = Len(HTMLCell.innerText)
                
                If num_len = 22 Then
                    Cells(RowNum, ColNum) = Trim(HTMLCell.innerText)
                End If
                
            Next HTMLCell
            
            RowNum = Cells(100000, 1).End(xlUp).Offset(1, 0).Row
            
        Next HTMLRow
        
        Set HTMLInput = framesITML.getElementsByClassName("page-link")
        HTMLInput(0).Click
        
        Do While IE.ReadyState <> READYSTATE_COMPLETE Or IE.Busy
        Loop
        
        RowNum = Cells(100000, 1).End(xlUp).Offset(1, 0).Row
        
        For Each HTMLRow In framesITML.getElementById("result").getElementsByTagName("tr")
            ColNum = 1
            
            For Each HTMLCell In HTMLRow.Children
                num_len = Len(HTMLCell.innerText)
                
                If num_len = 22 Then
                    Cells(RowNum, ColNum) = Trim(HTMLCell.innerText)
                End If
                
            Next HTMLCell
            
            RowNum = Cells(100000, 1).End(xlUp).Offset(1, 0).Row
            
        Next HTMLRow
        
    End If
    
    Range("A2").CurrentRegion.RemoveDuplicates Columns:=1, Header:=xlNo
    
    IE.Quit
        
End Sub

推荐阅读