Friday, 12 October 2018

VBA - Microsoft.ACE.OLEDB.12.0 - Slow Text Files

Summary: Skip using Text Files with Microsoft.ACE.OLEDB.12.0 because it is just too slow.

Previously, I had discovered that the Microsoft.ACE.OLEDB.12.0 handles databases stored in Html files but I found that to be unsatisfactory because of lack of schemas.

Exporting to Text Files outputs a schema. Indeed, an aggregated schema is built if one exports more than one table. So far so good.

Unfortunately, doing a simple query takes half a second and this looks like overhead, not at all acceptable. Connection pooling does not help. I'm guessing that the overhead is re-reading the schema and re-importing the file.

Depositing code below but skipping a commentary because I want to move on the next database technology candidate.

Option Explicit
Option Private Module

'* Tools -> References
'* ADODB  Microsoft ActiveX Data Objects 6.1 Library  C:\Program Files (x86)\Common Files\System\ado\msado15.dll
'* ADOX   Microsoft ADO Ext. 6.0 for DDL and Security C:\Program Files (x86)\Common Files\System\ado\msadox.dll


Private Const msOrdersTextFile As String = "Orders.txt"
Private Const msCustomersTextFile As String = "Customers.txt"


Public Function TextfilesDbFolder() As String
    Static fso As New Scripting.FileSystemObject
    
    Dim fldTemp As Scripting.Folder
    Set fldTemp = fso.GetFolder(Environ$("Temp"))
    
    Dim sFldTextfilesDb As String
    sFldTextfilesDb = fso.BuildPath(fldTemp.Path, "TextfilesDb")
    If fso.FolderExists(sFldTextfilesDb) Then
        TextfilesDbFolder = sFldTextfilesDb
    Else
        fso.CreateFolder sFldTextfilesDb
        TextfilesDbFolder = sFldTextfilesDb
    End If
        
End Function

Private Sub SetUpSomeData()
    '* WARNING this will wipe data!
    Dim sht As Excel.Worksheet
    Set sht = ThisWorkbook.Worksheets.Item("Sheet1")
    sht.Cells.Clear

    '*
    '* use our array literal trick, for more tricks tips and 'blue sky thinking'
    '* see http://exceldevelopmentplatform.blogspot.com
    '*
    Dim vOrders As Variant
    'vOrders = [{"OrderID","CustomerID","OrderDate";420,2,"10-Oct-2018";421,3,"11-Oct-2018";422,1,"12-Oct-2018"}]
    vOrders = [{"OrderID","CustomerID","OrderDate";420,2,"10-Oct-2018";421,3,"11-Oct-2018";422,1,"12-Oct-2018";423,2,"13-Oct-2018"}]
    sht.Range("A1").Resize(UBound(vOrders) - LBound(vOrders) + 1, 3).Value2 = vOrders
    
    Dim vCustomers As Variant
    vCustomers = [{"CustomerID","CustomerName","ContactName","Country";1,"Big Corp","Mandy","USA";2,"Medium Corp","Bob","Canada";3,"Small Corp","Jose","Mexico"}]
    'sht.Range("e1:h4").Value2 = vCustomers
    sht.Range("e1").Resize(UBound(vCustomers) - LBound(vOrders) + 1, 4).Value2 = vCustomers
    

End Sub



Private Sub TestWriteToTestFile()
    Dim sTextfilesDbFolder  As String
    sTextfilesDbFolder = TextfilesDbFolder
    WriteToTextFile ThisWorkbook.Worksheets.Item("Sheet1").Range("A1").CurrentRegion, sTextfilesDbFolder & msOrdersTextFile
    WriteToTextFile ThisWorkbook.Worksheets.Item("Sheet1").Range("e1").CurrentRegion, sTextfilesDbFolder & msCustomersTextFile
    
End Sub

Private Sub WriteToTextFile(ByVal rngTable As Excel.Range, ByVal sTextFile As String)

    Dim sTableAddress As String
    sTableAddress = "[" & rngTable.Worksheet.name & "$" & rngTable.Address(False, False, xlA1) & "] """

    Dim oConnExcel As ADODB.Connection
    Set oConnExcel = New ADODB.Connection

    If UBound(Split(ThisWorkbook.name, ".")) = 0 Then Err.Raise vbObjectError, , "#Workbook needs a file extension, i.e. saved at least once!"

    '*
    '* we're reading from worksheet so we need the Excel engine
    '*
    oConnExcel.Open "Provider=Microsoft.ACE.OLEDB.12.0;" & _
           "Data Source=" & ThisWorkbook.FullName & ";" & _
           "Extended Properties='Excel 12.0 Macro;HDR=YES'"

    Dim sFileNameOnly As String, sFolderOnly As String

    If ParseFileName(sTextFile, sFileNameOnly, sFolderOnly) Then

        If Not Dir(sTextFile) = "" Then Kill sTextFile   '* if it exists then delete before re-exporting

        Dim sCmdText As String
        sCmdText = VBA.Replace("SELECT * INTO [%filename%] in %quotedFolder% ""Text;"" FROM %tableName%", "%filename%", sFileNameOnly)
        sCmdText = VBA.Replace(sCmdText, "%quotedFolder%", """" & sFolderOnly & """")
        sCmdText = VBA.Replace(sCmdText, "%tableName%", sTableAddress)

        '*
        '* Text is specified in the command text, no need for a separate connection
        '*
        Debug.Print sCmdText
        oConnExcel.Execute sCmdText
    End If

End Sub


Private Sub TestReadFromTextFile()
    Dim sTextfilesDbFolder  As String
    sTextfilesDbFolder = TextfilesDbFolder

    Static fso As New Scripting.FileSystemObject
    Debug.Assert fso.FolderExists(sTextfilesDbFolder)

    '*
    '* Reuse the connection (pooled connections encouraged)
    '*
    Dim oConnText As ADODB.Connection
    Set oConnText = New ADODB.Connection
    oConnText.Open "Provider=Microsoft.ACE.OLEDB.12.0;" & _
           "Data Source=" & sTextfilesDbFolder & _
           ";Extended Properties='Text'"

    Dim dtTimeNow As Date
    dtTimeNow = Now()
    Dim lRepeatForTimings As Long
    For lRepeatForTimings = 1 To 10
    'ReadFromTextFile oConnText, "Orders.txt"
    'ReadFromTextFile oConnText, "Customers.txt"
    
    RunJoinQuery oConnText
    
    Next

    Debug.Print (Now() - dtTimeNow) * 86400 '*seconds

    '*
    '* can now close connection
    '*
    oConnText.Close
    Set oConnText = Nothing

End Sub



Private Function ConnectionFactory(ByVal sDbFolder As String) As ADODB.Connection
    Set ConnectionFactory = New ADODB.Connection
    ConnectionFactory.Open "Provider=Microsoft.ACE.OLEDB.12.0;" & _
           "Data Source=" & sDbFolder & _
           ";Extended Properties='Text'"

End Function
    



Private Function RunJoinQuery(ByVal oConnText As ADODB.Connection) As ADODB.Recordset

    Dim rsJoin As ADODB.Recordset
    Set rsJoin = New ADODB.Recordset

    '*
    '* The Left and Instr functions in the SQL are MSAccess functions, see this link for reference
    '* https://www.w3schools.com/sql/sql_ref_msaccess.asp
    '*
    rsJoin.Open "SELECT O.OrderID ,  Left(C.CustomerName,INSTR(C.CustomerName,' ')-1), C.CustomerName, O.OrderDate From [Orders.txt] as O INNER JOIN [Customers.txt] as C ON O.CustomerID=C.CustomerID ORDER BY O.OrderDate; ", oConnText, CursorTypeEnum.adOpenStatic '* can use CursorTypeEnum.adOpenKeyset
    DumpRecordset rsJoin
    Set RunJoinQuery = rsJoin
    'Stop
End Function

Private Sub ReadFromTextFile(ByVal oConnText As ADODB.Connection, ByVal sTableName As String)

    Dim rsText As ADODB.Recordset
    Set rsText = New ADODB.Recordset

    '*
    '* as with open for Excel I needed to use adOpenStatic
    '*
    rsText.Open "SELECT * From [" & sTableName & "]", oConnText, CursorTypeEnum.adOpenStatic '* can use CursorTypeEnum.adOpenKeyset
    Debug.Assert rsText.RecordCount > 0

    '*
    '* do some work with the recordset here
    '*
    DumpRecordset rsText
    'Stop

    Set rsText = Nothing
    'Stop

End Sub



Private Sub TestReadCatalogOfTextFile()
    ReadCatalogOfTextFile TextfilesDbFolder
End Sub

Private Sub ReadCatalogOfTextFile(ByVal sTextFile As String)
    Dim oConnText As ADODB.Connection
    Set oConnText = New ADODB.Connection
    oConnText.Open "Provider=Microsoft.ACE.OLEDB.12.0;" & _
           "Data Source=" & sTextFile & _
           ";Extended Properties='Text'"

    Dim catDB As ADOX.Catalog
    Dim tblList As ADOX.Table

    Set catDB = New ADOX.Catalog
    Set catDB.ActiveConnection = oConnText

    Dim adoxTables As ADOX.Tables
    Set adoxTables = catDB.Tables

    Dim adoxTableLoop As ADOX.Table
    For Each adoxTableLoop In adoxTables
        Debug.Print adoxTableLoop.name
    Next adoxTableLoop

End Sub

Private Sub DumpRecordset(ByVal rs As ADODB.Recordset)
    '*
    '* Some code to iterate over the recordset
    '*
    rs.MoveFirst

    Dim lFieldCount As Long
    lFieldCount = rs.Fields.Count

    While Not rs.EOF

        Dim sOutputLine As String
        sOutputLine = ""

        Dim sFieldAndValue As String
        Dim lFieldLoop As Long
        For lFieldLoop = 0 To lFieldCount - 1
            sFieldAndValue = rs.Fields.Item(lFieldLoop).name & ":" & rs.Fields.Item(lFieldLoop).Value

            sOutputLine = sOutputLine & VBA.IIf(Len(sOutputLine) > 0, vbTab, "") & sFieldAndValue
        Next
        Debug.Print sOutputLine
        rs.MoveNext
    Wend

End Sub

Private Sub UnitTestParseFileName()

    Const sTextFile As String = "N:Colors.txt"
    Dim sFileNameOnly As String, sFolderOnly As String

    Debug.Assert ParseFileName(sTextFile, sFileNameOnly, sFolderOnly)
    Debug.Assert sFileNameOnly = "Colors.txt"
    Debug.Assert sFolderOnly = "N:"

    Debug.Assert ParseFileName("N:folder1folder2Colors.txt", sFileNameOnly, sFolderOnly)
    Debug.Assert sFileNameOnly = "Colors.txt"
    Debug.Assert sFolderOnly = "N:folder1folder2"

End Sub

Private Function ParseFileName(ByVal sFullFileName As String, ByRef psFileNameOnly As String, ByRef psFolderOnly As String) As Boolean

    Dim vSplit As Variant
    vSplit = VBA.Split(sFullFileName, "")

    Dim lUBound As Long
    lUBound = UBound(vSplit)

    If lUBound > 0 Then
        psFileNameOnly = vSplit(lUBound)
        psFolderOnly = Left(sFullFileName, Len(sFullFileName) - Len(psFileNameOnly))
        ParseFileName = True
    End If

End Function

No comments:

Post a Comment