首页 > 解决方案 > 抓取特定的

雅虎财经类(VBA、Excel)

问题描述

我一直在尝试从某个嵌套类中提取特定信息

从以下位置的代码 https://finance.yahoo.com/quote/ASUR?p=ASUR

内部文本中所在的类名为“D(ib) Va(t)”,据我所知,至少该文本对于类名是唯一的。我正在使用以下代码来获取数据。

Private Sub CommandButton1_Click()

Dim IE4 As Object
Dim strURL3 As String
Dim divs1 As Object
Dim symbol1 As String
Dim rowd As Integer
Dim divs2 As Object

'turn calculation off

Application.ScreenUpdating = False
Application.DisplayAlerts = False
Application.Calculation = xlCalculationManual

rowd = 1
'Sheets.Add After:=Sheets(ActiveWorkbook.Sheets.Count)
'Sheets(ActiveSheet.Name).Name = "Industry"

'Worksheets("Panel").Activate
'Range("B9").Select
Set IE4 = CreateObject("InternetExplorer.Application") 'Create only one IE instance


'Do Until ActiveCell.Value = "" 'Loop
'symbol1 = ActiveCell.Value

strURL3 = "https://finance.yahoo.com/quote/ASUR?p=ASUR"

IE4.Visible = True 'Flag to remove IE visibility
        VBA.Shell "RunDll32.exe InetCpl.Cpl, ClearMyTracksByProcess 264", vbMinimizedNoFocus
        VBA.Shell "RunDll32.exe InetCpl.Cpl, ClearMyTracksByProcess 258", vbMinimizedNoFocus
        IE4.Navigate strURL3
        While IE4.Busy: DoEvents: Wend 'Break of 4 seconds after loading
        Application.Wait (Now + TimeValue("0:00:04"))


Set divs1 = IE4.Document.getelementsbytagname("div")

  Worksheets("Industry").Activate
  ActiveSheet.Cells("1,2").Select
 For Each div In divs1
          Set divs2 = IE4.Document.getelementsbytagname("p")
           For Each p In divs2

           If p.classname = "D(ib) Va(t)" Then

            Debug.Print p.innertext

            'Cells(rowd, 2) = p.innertext
            'rowd = rowd + 1
            End If
          Next p

Next div

 'Sheets("Panel").Select
' ActiveCell.Offset(1, 0).Select
 'Loop
    'Sheets("Panel").Select
    'Range("B9").Select 'range that selects rows and columns to paste in every company sheet
    'Range(Selection, Selection.End(xlDown)).Select
    'Range(Selection, Selection.End(xlDown)).Select
    'Range(Selection, Selection.End(xlDown)).Select
   ' Selection.Copy
   ' Sheets("Industry").Select
    'Range("A1").Select
    'ActiveSheet.Paste
   ' Application.CutCopyMode = False

 IE4.Quit

'turn calculation on
Application.Calculation = xlCalculationAutomatic
Application.DisplayAlerts = True
Application.ScreenUpdating = True

End Sub

它不捕获

上课,谁能帮帮我?

标签: htmlexcelvbaweb-scraping

解决方案


p该类的标签元素包括公司部门、行业和员工信息。您可以通过避免打开浏览器来使用更快的 xmlhttp 方法进行检索。然后使用 css 选择器组合来定位元素

Option Explicit
Public Sub GetInfo()
    Const URL As String = "https://finance.yahoo.com/quote/ASUR?p=ASUR"
    Dim html As HTMLDocument
    Set html = New HTMLDocument
    With CreateObject("MSXML2.XMLHTTP")
        .Open "GET", URL, False
        .Send
        html.body.innerHTML = .responsetext
    End With
    Debug.Print html.querySelector("p.D\(ib\).Va\(t\)").innerText
End Sub

如果要避免复合类,可以使用以下内容:

Debug.Print html.querySelector("p + .D\(ib\)").innerText

参考资料(VBE > 工具 > 参考资料):

  1. Microsoft HTML 对象库

推荐阅读