So another SO question about webscraping. I pushed my link of course. This time we interactive with a search engine, and we need a live copy of Internet Explorer to drive the interaction. Here is my code based on OP's code (but bugfixed by me)
Option Explicit
'See this http://exceldevelopmentplatform.blogspot.co.uk/2018/01/vba-mshtml-webscraping-looking-for-new.html
Sub SearchBot()
'dimension (declare or set aside memory for) our variables
Dim objIE As InternetExplorer 'special object variable representing the IE browser
Dim aEle As HTMLLinkElement 'special object variable for an (link) element
Dim y As Integer 'integer variable we'll use as a counter
Dim result As String 'string variable that will hold our result link
'initiating a new instance of Internet Explorer and asigning it to objIE
Set objIE = New InternetExplorer
'make IE browser visible (False would allow IE to run in the background)
objIE.Visible = True
'navigate IE to this web page (a pretty neat search engine really)
objIE.navigate "https://www.boersen-zeitung.de/index.php?li=310&subm=suche"
'objIE.navigate "https://duckduckgo.com"
'wait here a few seconds while the browser is busy
Do While objIE.Busy = True Or objIE.readyState <> 4: DoEvents: Loop
Dim vCarManufacturer As Variant
vCarManufacturer = Sheets("Sheet1").Range("A2").value
vCarManufacturer = "Daimler" 'overriden
Dim vSearchURL As Variant
vSearchURL = Sheets("Sheet1").Range("C1").value
vSearchURL = "" 'overriden
Debug.Assert Not objIE.document Is Nothing
Dim htmlSuche As Object
Set htmlSuche = objIE.document.querySelector("input.suche_unternehmen")
Debug.Assert Not htmlSuche Is Nothing
'in the search box put cell "A2" value, the word "in" and cell "C1" value
htmlSuche.value = _
vCarManufacturer '& " in " & vSearchURL
'click the 'go' button
'objIE.document.getElementById("search_button_homepage").Click
objIE.document.querySelector("input.suche_button21").Click
'wait again for the browser
Do While objIE.Busy = True Or objIE.readyState <> 4: DoEvents: Loop
'the first search result will go in row 2
y = 2
'for each <a> element in the collection of objects with class of 'result__a'...
Dim objResults As Object
Set objResults = objIE.document.querySelectorAll("a.ue_fl_l")
For Each aEle In objResults ' objIE.document.querysselec("result__a")
'...get the href link and print it to the sheet in col C, row y
Dim anchorResult As MSHTML.IHTMLAnchorElement
Set anchorResult = aEle
result = aEle
Sheets("Sheet1").Range("C" & y).value = anchorResult.href ' result
'...get the text within the element and print it to the sheet in col D
Sheets("Sheet1").Range("D" & y).value = anchorResult.innerText
Debug.Print aEle.innerText
'is it a yellowpages link?
If InStr(result, "yellowpages.com") > 0 Or InStr(result, "yp.com") > 0 Then
'make the result red
Sheets("Sheet1").Range("C" & y).Interior.ColorIndex = 3
'place a 1 to the left
Sheets("Sheet1").Range("B" & y).value = 1
End If
'increment our row counter, so the next result goes below
y = y + 1
'repeat times the # of ele's we have in the collection
Next
'add up the yellowpages listings
Sheets("Sheet1").Range("B1").value = _
Application.WorksheetFunction.Sum(Sheets("Sheet1").Range("B2:B100"))
'close the browser
objIE.Quit
'exit our SearchBot subroutine
End Sub
No comments:
Post a Comment