Thursday, 4 January 2018

IE VBA WebScraping - interacting with a search engine

So another SO question about webscraping. I pushed my link of course. This time we interactive with a search engine, and we need a live copy of Internet Explorer to drive the interaction. Here is my code based on OP's code (but bugfixed by me)


Option Explicit


'See this http://exceldevelopmentplatform.blogspot.co.uk/2018/01/vba-mshtml-webscraping-looking-for-new.html

Sub SearchBot()

    'dimension (declare or set aside memory for) our variables
    Dim objIE As InternetExplorer 'special object variable representing the IE browser
    Dim aEle As HTMLLinkElement 'special object variable for an  (link) element
    Dim y As Integer 'integer variable we'll use as a counter
    Dim result As String 'string variable that will hold our result link

    'initiating a new instance of Internet Explorer and asigning it to objIE
    Set objIE = New InternetExplorer

    'make IE browser visible (False would allow IE to run in the background)
    objIE.Visible = True

    'navigate IE to this web page (a pretty neat search engine really)
    objIE.navigate "https://www.boersen-zeitung.de/index.php?li=310&subm=suche"
    'objIE.navigate "https://duckduckgo.com"

    'wait here a few seconds while the browser is busy
    Do While objIE.Busy = True Or objIE.readyState <> 4: DoEvents: Loop

    Dim vCarManufacturer As Variant
    vCarManufacturer = Sheets("Sheet1").Range("A2").value

    vCarManufacturer = "Daimler" 'overriden

    Dim vSearchURL As Variant
    vSearchURL = Sheets("Sheet1").Range("C1").value
    vSearchURL = "" 'overriden

    Debug.Assert Not objIE.document Is Nothing

    Dim htmlSuche As Object
    Set htmlSuche = objIE.document.querySelector("input.suche_unternehmen")
    Debug.Assert Not htmlSuche Is Nothing

    'in the search box put cell "A2" value, the word "in" and cell "C1" value
    htmlSuche.value = _
      vCarManufacturer '& " in " & vSearchURL

    'click the 'go' button
    'objIE.document.getElementById("search_button_homepage").Click
    objIE.document.querySelector("input.suche_button21").Click

    'wait again for the browser
    Do While objIE.Busy = True Or objIE.readyState <> 4: DoEvents: Loop

    'the first search result will go in row 2
    y = 2

    'for each <a> element in the collection of objects with class of 'result__a'...

    Dim objResults As Object
    Set objResults = objIE.document.querySelectorAll("a.ue_fl_l")

    For Each aEle In objResults ' objIE.document.querysselec("result__a")

        '...get the href link and print it to the sheet in col C, row y
        Dim anchorResult As MSHTML.IHTMLAnchorElement
        Set anchorResult = aEle

        result = aEle
        Sheets("Sheet1").Range("C" & y).value = anchorResult.href ' result

        '...get the text within the element and print it to the sheet in col D
        Sheets("Sheet1").Range("D" & y).value = anchorResult.innerText
        Debug.Print aEle.innerText

        'is it a yellowpages link?
        If InStr(result, "yellowpages.com") > 0 Or InStr(result, "yp.com") > 0 Then
            'make the result red
            Sheets("Sheet1").Range("C" & y).Interior.ColorIndex = 3
            'place a 1 to the left
            Sheets("Sheet1").Range("B" & y).value = 1
        End If

        'increment our row counter, so the next result goes below
        y = y + 1

    'repeat times the # of ele's we have in the collection
    Next

    'add up the yellowpages listings
    Sheets("Sheet1").Range("B1").value = _
      Application.WorksheetFunction.Sum(Sheets("Sheet1").Range("B2:B100"))

    'close the browser
    objIE.Quit


'exit our SearchBot subroutine
End Sub

No comments:

Post a Comment