Wednesday, 31 January 2018

VBA - Shell - Find - Command Line - Recursively find in files

Summary: Find, command line, searches for a specific string of text in a file or files. However, Find is limited to one folder, if you want to scan a folder structure recursively you'll need this VBA program.

So, I had a need to scan a heap of C++ source files looking for some relevant code sample. The command line program Find is very useful but generates too much output that needs editing, also it only runs on one folder at a time, so we need to write some code to loop through a list of folders. The list of folders itself is the result of shelling to the command line so there is some code ReadDirList_FoldersOnly() to parse a directory listing.

Also there is a file filters feature where one specifies a list of acceptable file extensions, the code given filters for *.cpp and *,.h files only because it is searching for C++ source files.

The code can take some time to run so I've added a status bar percentage progress counter. Because of disk activity there is little point trying to add multi-tasking or multi-threading.

The code is given below but here is some sample output

---------- C:\PROGRA~2\MICROS~4\2017\COMMUN~1\VC\TOOLS\MSVC\1412~1.258\ATLMFC\INCLUDE\AFXDOCOB.H
    BEGIN_INTERFACE_PART(OleDocument, IOleDocument)
    BEGIN_INTERFACE_PART(OleDocumentView, IOleDocumentView)
---------- C:\PROGRA~2\MICROS~4\2017\COMMUN~1\VC\TOOLS\MSVC\1412~1.258\ATLMFC\INCLUDE\AFXOLE.H
    BEGIN_INTERFACE_PART(OleDocumentSite, IOleDocumentSite)

Option Explicit
Option Private Module

Sub TestShellRecursiveDirectory2()
    Application.StatusBar = False
    Dim dicFolders As Scripting.Dictionary
    Set dicFolders = New Scripting.Dictionary
    'ShellRecursiveDirectory "C:\Program Files (x86)\Microsoft Visual Studio\2017\Community\", dicFolders
    ShellRecursiveDirectory "C:\Program Files (x86)\Microsoft Visual Studio\2017\Community\VC\Tools\MSVC\14.12.25827\atlmfc\include", dicFolders



    'Debug.Assert dicFolders.Count = 6467

    Dim sFindText As String
    sFindText = "IOLEDocument"

    Dim sTopFolder As String
    'sTopFolder = dicFolders.Keys(0)
    'dicFolders.RemoveAll
    'dicFolders.Add sTopFolder, 0

    Dim dicFindings As Scripting.Dictionary
    Set dicFindings = New Scripting.Dictionary

    RunFindInFolders dicFolders, sFindText, dicFindings, Array("cpp", "h")

    'Debug.Print Join(dicFindings.Items, vbNewLine)

    Call CreateObject("Scripting.FileSystemObject").CreateTextFile("n:\foo.txt").Write(Join(dicFindings.Items, vbNewLine))
    Application.StatusBar = False
End Sub


Sub RunFindInFolders(ByVal dicFolders As Scripting.Dictionary, ByVal sFindText As String, _
                                ByRef pdicFindings As Scripting.Dictionary, ByVal vFileFilters As Variant)

    If pdicFindings Is Nothing Then Set pdicFindings = New Scripting.Dictionary

    Dim lCounter As Long: lCounter = 0
    Dim lTotal As Long: lTotal = dicFolders.Count

    Dim vFolderLoop As Variant
    For Each vFolderLoop In dicFolders.Keys
        lCounter = lCounter + 1
        Dim lPercent As Long
        lPercent = CLng((lCounter / lTotal) * 100)

        If lPercent Mod 5 = 0 Then
            Application.StatusBar = "Complete " & lPercent & "%"

        End If

        DoEvents
        RunFindInFolder vFolderLoop, sFindText, pdicFindings, vFileFilters

    Next vFolderLoop

End Sub

Sub TestRunFindInFolder()
    Dim dicFindings As Scripting.Dictionary
    RunFindInFolder "C:\Users\Simon\DOWNLO~1\RUBBER~1\Rubberduck-next", "ole", dicFindings, Empty

    Debug.Print VBA.Join(dicFindings.Items, vbNewLine)
End Sub

Sub RunFindInFolder(ByVal sFolder As String, ByVal sFindText As String, _
                    ByRef pdicFindings As Scripting.Dictionary, ByVal vFileFilters As Variant)
    Static fso As New Scripting.FileSystemObject
    If pdicFindings Is Nothing Then Set pdicFindings = New Scripting.Dictionary

    If fso.FolderExists(sFolder) Then

        Dim sFullFileFilter As String
        sFullFileFilter = fso.BuildPath(sFolder, "*.*")
        '#sFullFileFilter = fso.BuildPath(sFolder, sFileFilter)



        Dim sTempFile As String, sFullTempFile As String
        TempFile sFullFileFilter, sTempFile, sFullTempFile, ".txt"

        If fso.FileExists(sFullTempFile) Then fso.DeleteFile sFullTempFile
        Debug.Assert Not fso.FileExists(sFullTempFile)


        Dim sCmd As String
        sCmd = Environ$("comspec") & " /C Find """ & sFindText & """ " & sFullFileFilter & " /I > " & sFullTempFile
        'find "Ole" *.*  /I > %TEMP%\ole_find.txt

        Dim oWshShell As IWshRuntimeLibrary.WshShell
        Set oWshShell = New IWshRuntimeLibrary.WshShell

        Dim lProc As Long
        lProc = oWshShell.Run(sCmd, 0, True)

        Debug.Assert fso.FileExists(sFullTempFile)

        Dim dicLines As Scripting.Dictionary
        Set dicLines = New Scripting.Dictionary


        Dim txtIn As Scripting.TextStream
        Set txtIn = fso.OpenTextFile(sFullTempFile)

        Do While Not txtIn.AtEndOfStream
            DoEvents
            Dim sLine As String
            sLine = txtIn.ReadLine

            dicLines.Add dicLines.Count, sLine


        Loop
        txtIn.Close
        Set txtIn = Nothing

        Call ReadFindingsFile(sFolder, dicLines, pdicFindings, vFileFilters)

    End If

End Sub

Sub ReadFindingsFile(ByVal sFolder As String, ByVal dicLines As Scripting.Dictionary, _
                ByRef pdicFindings As Scripting.Dictionary, ByVal vFileFilters As Variant)
    Static fso As New Scripting.FileSystemObject


    If pdicFindings Is Nothing Then Set pdicFindings = New Scripting.Dictionary

    'Dim vSplit 'As String
    'vSplit = Split(sFindings, vbNewLine)

    Dim sPrefix As String
    sPrefix = "---------- " & sFolder

    Dim dicTemp As Scripting.Dictionary
    Set dicTemp = New Scripting.Dictionary

    Dim vLoop As Variant
    For Each vLoop In dicLines.Items
        DoEvents
        
            Dim sCurrentFile As String
        
        '* do we have a file header, if so capture the filename
        '* and check if it has the right file extension
        If StartsWith(sPrefix, vLoop) Then
            Dim sNextFile As String
            sNextFile = Trim(Mid(vLoop, Len("---------- "), Len(vLoop)))
            
            Dim bCaptureOutput As Boolean
            bCaptureOutput = FileExtensionMatch(sNextFile, vFileFilters)
            
            '* copy over anything
            MoveOverItemsWithHeader sCurrentFile, dicTemp, pdicFindings
            
            sCurrentFile = sNextFile
        Else
            '* it's not a header line it is an output line
            If bCaptureOutput Then
                If Len(Trim(vLoop)) > 0 Then
                    dicTemp.Add dicTemp.Count, vLoop
                End If
            End If
        
        End If
        
        
        
'        If StartsWith(sPrefix, vLoop) Then
'
'            Dim sFile As String
'
'            Dim sNextFile As String
'            sNextFile = Trim(Mid(vLoop, Len("---------- "), Len(vLoop)))
'
'
'
'            Dim sFileName As String
'            sFileName = Trim(Mid(vLoop, Len("---------- "), Len(vLoop)))
'
'            'Debug.Assert fso.FileExists(sFileName)
'
'
'
'            If FileExtensionMatch(sFileName, vFileFilters) Then 'UCase$(fil.Name) Like UCase$(sFileFilter)
'                'If Len(sFile) > 0 Then
'                    MoveOverItemsWithHeader sFile, dicTemp, pdicFindings
'                'End If
'            Else
'                dicTemp.RemoveAll
'            End If
'
'            sFile = sNextFile
'
'        Else
'            If Len(Trim(vLoop)) > 0 Then
'                dicTemp.Add dicTemp.Count, vLoop
'
'            End If
'
'        End If
    Next vLoop
    'Stop

    '* copy over anything
    MoveOverItemsWithHeader sCurrentFile, dicTemp, pdicFindings


'    If FileExtensionMatch(sFileName, vFileFilters) Then
'        MoveOverItemsWithHeader sFile, dicTemp, pdicFindings
'    Else
'        dicTemp.RemoveAll
'    End If


    Debug.Assert dicTemp.Count = 0

End Sub

Private Function FileExtensionMatch(ByVal sFileName As String, _
                            ByVal vFileFilters As Variant) As Boolean
    Static fso As New Scripting.FileSystemObject

    If Len(sFileName) > 0 Then

        If Not fso.FileExists(sFileName) Then
            FileExtensionMatch = False
        Else

            If IsEmpty(vFileFilters) Then
                FileExtensionMatch = True
            Else

                Debug.Assert fso.FileExists(sFileName)

                Dim fil As Scripting.File
                Set fil = fso.GetFile(sFileName)

                Dim vSplitFileName As Variant
                vSplitFileName = Split(fil.Name, ".")

                Dim sFileExt As String
                sFileExt = vSplitFileName(UBound(vSplitFileName))

                Dim bFilterMatch As Boolean
                FileExtensionMatch = (VBA.InStr(1, _
                            "|" & Join(vFileFilters, "|") & "|", _
                            "|" & sFileExt & "|", _
                            vbTextCompare) > 0)

            End If
        End If
    End If

End Function

Private Function MoveOverItemsWithHeader(ByVal sFile As String, ByVal dicFrom As Scripting.Dictionary, _
            ByVal dicTo As Scripting.Dictionary)

    If Len(sFile) > 0 Then
        If dicFrom.Count > 0 Then
            Debug.Print "---------- " & sFile
            dicTo.Add dicTo.Count, "---------- " & sFile
    
            Dim vCopyLoop As Variant
            For Each vCopyLoop In dicFrom.Items
                dicTo.Add dicTo.Count, vCopyLoop
                Debug.Print vCopyLoop
            Next
    
            dicFrom.RemoveAll
        End If
    End If

End Function


'Private Sub TestStartsWith()
'    Debug.Assert StartsWith("Hell", "Hello world")
'    Debug.Assert Not StartsWith("Hell", "He")
'    Debug.Assert Not StartsWith("Hell", "Foob")
'End Sub

Function StartsWith(ByVal sPrefix As String, ByVal sTest As String) As Boolean
    If LenB(sPrefix) <= LenB(sTest) Then
        StartsWith = (VBA.StrComp(sPrefix, Left$(sTest, Len(sPrefix)), vbTextCompare) = 0)
    End If
End Function

Function TempFile(ByVal sURL As String, ByRef psTempFile As String, ByRef psFullTempFile As String, _
                                                            Optional ByVal sSuffix As String = ".txt")


    Static dict As New Scripting.Dictionary
    Static fso As New Scripting.FileSystemObject

    Dim lProcessUniqueId As Long
    lProcessUniqueId = UniqueProcessId

    Dim sTempFile As String
    psTempFile = CStr(lProcessUniqueId) & CStr(dict.HashVal(sURL)) & sSuffix

    Dim sFullTempFile As String
    psFullTempFile = fso.BuildPath(Environ$("TEMP"), psTempFile)


End Function

Function UniqueProcessId() As Long
    On Error Resume Next
    UniqueProcessId = CallByName(Application, "Hwnd", VbGet)
End Function

Sub ShellRecursiveDirectory(ByVal sStartFolder As String, ByRef pdicFolders As Scripting.Dictionary)

    Static fso As New Scripting.FileSystemObject


    If Not fso.FolderExists(sStartFolder) Then
        Err.Raise vbObjectError, , "#Folder sStartFolder '" & sStartFolder & "' does not exist!"
    Else
        Dim fld As Scripting.Folder
        Set fld = fso.GetFolder(sStartFolder)

        Dim oWshShell As IWshRuntimeLibrary.WshShell
        Set oWshShell = New IWshRuntimeLibrary.WshShell



        Dim sTempFile As String, sFullTempFile As String
        TempFile sStartFolder, sTempFile, sFullTempFile, ".txt"

        If fso.FileExists(sFullTempFile) Then fso.DeleteFile sFullTempFile
        Debug.Assert Not fso.FileExists(sFullTempFile)

        Dim sCmdSpec As String
        sCmdSpec = Environ("comspec") & " /C " '* /C is required to run remainder of command

        Dim sCmdLine As String
        sCmdLine = sCmdSpec & " dir " & fld.ShortPath & "\*.* /s  > " & sFullTempFile


        Dim lProc As Long
        lProc = oWshShell.Run(sCmdLine, 0, True)

        Debug.Assert fso.FileExists(sFullTempFile)

        Dim dic As Scripting.Dictionary

        ReadDirList_FoldersOnly sFullTempFile, pdicFolders
        'Stop
    End If


End Sub


Function ReadDirList_FoldersOnly(ByVal sPipedOutputPath As String, ByRef pdicFolders As Scripting.Dictionary)

    Static fso As New Scripting.FileSystemObject
    If fso.FileExists(sPipedOutputPath) Then

        Dim txtIn As Scripting.TextStream
        Set txtIn = fso.OpenTextFile(sPipedOutputPath)

        Set pdicFolders = New Scripting.Dictionary

        Dim sLine As String
        sLine = txtIn.ReadLine
        While Not txtIn.AtEndOfStream
            DoEvents

            Dim sDirectory As String
            Dim bIsDirectoryHeader As Boolean
            bIsDirectoryHeader = IsDirectoryHeader(sLine, sDirectory)

            If IsTrailerLine(sLine) Then

                If Not pdicFolders.Exists(sDirectory) Then pdicFolders.Add sDirectory, 0
            End If

            sLine = txtIn.ReadLine
        Wend

        txtIn.Close
        Set txtIn = Nothing
    End If

End Function

Function IsTrailerLine(ByVal sLine As String) As Boolean
    If (VBA.InStr(1, sLine, "File(s)", vbTextCompare) > 0) Then
        IsTrailerLine = True
    End If
End Function

Function IsDirectoryHeader(ByVal sLine As String, ByRef psDirectory As String) As Boolean
    If VBA.InStr(1, sLine, " Directory of ", vbTextCompare) > 0 Then
        psDirectory = Trim(Mid$(sLine, 15))
        IsDirectoryHeader = True
    End If
End Function


No comments:

Post a Comment