首页 > 解决方案 > 如何在 Visual Basic 中从 Indeed.com 上的搜索中获取工作 url

问题描述

我试图从搜索中获取链接。

我第一次尝试这个...

Private Sub GetDocumentLinks()
    Try
        If (WebBrowser1.Document IsNot Nothing) Then
            With WebBrowser1.Document
                For Each Items As HtmlElement In WebBrowser1.Document.Links
                    'lstLinks.Items.Add(Items.GetAttribute("HREF").ToString())
                Next
            End With
        End If
    Catch ex As Exception
        MessageBox.Show(ex.Message.ToString(), "Info")
    End Try
End Sub

我编写了一个新代码来搜索带有请求的链接。获取链接不是问题,重定向是。奇怪的是......当我将链接复制到我的浏览器时,会显示正确的页面。所以为什么在我的请求中不起作用?我试图限制重定向,但是当有更多重定向时,它会返回错误。

Public Sub ParsingIndeed()
        Dim IntNextpage As Integer = 0

        Do While urlNavigate <> ""
            Dim document As New HtmlAgilityPack.HtmlDocument
            Dim myHttpWebRequest = CType(WebRequest.Create(urlNavigate), HttpWebRequest)
            myHttpWebRequest.UserAgent = "Mozilla/5.0 (compat ble; MSIE 9.0; Windows NT 6.1; WOW64; Trident/5.0)"

            Dim streamRead = New StreamReader(CType(myHttpWebRequest.GetResponse(), HttpWebResponse).GetResponseStream)
            Dim res As HttpWebResponse = CType(myHttpWebRequest.GetResponse(), HttpWebResponse)
            document.Load(res.GetResponseStream(), True)

            Dim node As HtmlNode = document.GetElementbyId("resultsCol") 'id="resultsCol zoeken op indeed.com

            Dim TList As New ArrayList

            For Each noteA As HtmlNode In node.SelectNodes("//a[@onclick]") '("//a[@Data-tn-element]")

                Dim findstring = noteA.Attributes("onclick").Value

                If InStr(findstring, "setRefineByCookie([&#039;radius") <> 0 Then
                    Dim href = noteA.Attributes("href").Value
                    'MsgBox(href)
                    If Not String.IsNullOrEmpty(href) Then TList.Add("http://be.indeed.com" & href)
                End If
            Next
            ReDim Preserve StrLinkArray(0)
            ConvertArraylistToString(TList, StrLinkArray)
            ArraySplitOnSpacesAndFiterEmpty(StrLinkArray)
            RemoveDuplicatesArray(StrLinkArray) 'dit zijn alle linken die we gevonden hebben

            For Each noteA As HtmlNode In node.SelectNodes("//a[@href]")

                Dim href = noteA.Attributes("href").Value

                If Not String.IsNullOrEmpty(href) Then TList.Add("http://be.indeed.com" & href)
            Next
            ReDim Preserve StrUrlVolgende(0)
            ConvertArraylistToString(TList, StrUrlVolgende)
            ArraySplitOnSpacesAndFiterEmpty(StrUrlVolgende)
            RemoveDuplicatesArray(StrUrlVolgende)

            IntNextpage = IntNextpage + 10
            FilterUsefullStringsArray(StrUrlVolgende, "start=" & IntNextpage)

            If StrUrlVolgende.Length <> 0 Then
                urlNavigate = StrUrlVolgende(0)
            Else
                urlNavigate = ""
            End If

            For intteller As Integer = 0 To UBound(StrLinkArray)

                Try

                    document = New HtmlAgilityPack.HtmlDocument
                    myHttpWebRequest = CType(WebRequest.Create(StrLinkArray(intteller).ToString), HttpWebRequest)

                    myHttpWebRequest.UserAgent = "Mozilla/5.0 (compat ble; MSIE 9.0; Windows NT 6.1; WOW64; Trident/5.0)"

                    streamRead = New StreamReader(CType(myHttpWebRequest.GetResponse(), HttpWebResponse).GetResponseStream)

                    res = CType(myHttpWebRequest.GetResponse(), HttpWebResponse)

                    document.Load(res.GetResponseStream(), True)

                Catch ex As Exception
                    MessageBox.Show(ex.Message.ToString(), "", MessageBoxButtons.OK)
                End Try
            Next
        Loop
end sub

标签: windowsvb.net

解决方案


推荐阅读