  by Caligari
  with mods by Chuck

This tool can be used to find which files in a directory tree contain any
of a list of words.

The list of words to search for is provided by default in a file called
"wordlist.txt" (located in the same directory as the executable) with one
regular expression per line.

A list of filename extensions to ignore can also be provided, if needed.
That list has one extension per line (not including the "."), and is
not a regular expression.

A description of further options is available using the "-help" command-line argument.

Comments in the code explain the use of regular expressions.

use System.Text.RegularExpressions

class FindWords

    var wordsFilename = ''
    var ignoreFilename = ''
    var searchDirectory = ''
    var onlyNames as bool
    var showLines as bool
    var words = SortedDictionary<of String, Regex>()
    var ignoreExtensions = List<of String>()
    var startDir as DirectoryInfo?
    var listFile as FileInfo?

    def main
        .wordsFilename = Path.combine(Path.getDirectoryName(CobraCore.exePath), "wordlist.txt")
        .ignoreFilename = ""
        .searchDirectory = Path.getFullPath(".")
        .onlyNames = false
        .showLines = true

        .listFile = FileInfo(.wordsFilename)
        .startDir = DirectoryInfo(.searchDirectory)
        .checkDirectory(.startDir to !)
        print "\nDone."

    def showSyntax
        appName = CobraCore.commandLineArgs[0]
        if appName.lastIndexOf(Path.directorySeparatorChar) > 0
            appName = appName[appName.lastIndexOf(Path.directorySeparatorChar)+1:]
        if appName.lastIndexOf(".") > 0
            appName = appName[:appName.lastIndexOf(".")]
        print "Syntax:\n[appName] \[searchDirectory\] \[-l|list listFilename\] \[-i|ignore ingorelistFilename\] \[-n|-names] \[-s|-summary] \[-h|-help]"
        print "  -l|list file    filename of the list of words to search for"
        print "  -i|ignore file  filename of an optional list of extensions for files to ignore"
        print "  -n|names        only check file and directory names"
        print "  -s|summary      only show files, not lines within files"
        print "  -h|help         show this help text"        

    def processCommandLine
        args = CobraCore.commandLineArgs

        listArgs    = ["-l", "-list"]
        ignoreArgs  = ["-i", "-ignore"]
        nameArgs    = ["-n", "-name", "-names"]
        summaryArgs = ["-s", "-summary"]
        helpArgs    = ["-h", "-help"]
        allArgs     = listArgs.concated(ignoreArgs).concated(nameArgs).concated(summaryArgs).concated(helpArgs)
        lookingForIgnore = false
        lookingForList = false
        foundDir = false
        argError = false
        if args.count > 1
            for arg in args[1:]
                if arg in allArgs and lookingForList
                    print "unable to determine filename for list of words, expected '-l filename'"
                    lookingForList = false
                    argError = true
                if arg in allArgs and lookingForIgnore
                    print "unable to determine filename for list of ignore extensions, expected '-i filename'"
                    lookingForIgnore = false
                    argError = true
                if arg in helpArgs
                else if arg in nameArgs
                    .onlyNames = true
                else if arg in summaryArgs
                    .showLines = false
                else if arg in ignoreArgs
                    lookingForIgnore = true
                else if arg in listArgs
                    lookingForList = true
                else if arg[0] == "-"
                    print "unrecognized argument [arg]"
                    argError = true
                    if lookingForIgnore
                        .ignoreFilename = arg.toString
                        lookingForIgnore = false
                    else if lookingForList
                        .wordsFilename = arg.toString
                        lookingForList = false                        
                    else if not foundDir  # must be the directory to look in
                        .searchDirectory = arg.toString
                        foundDir = true
                    else  # already found directory
                        print "more than one directory provied on commandline, expected 'directory'"
                        argError = true

        if argError

    def readRegExList        
            using wordsFile = StreamReader(.wordsFilename)

                wordLine = wordsFile.readLine
                while wordLine
                    # We can tell if there is a word on a line using an
                    # inline regular expression:
                    #   Regex.match(textToSearch, regularExpression)
                    # The textToSearch can be a hard-coded string,
                    # but most often it will be a String.
                    # The regularExpression is a string (hard-coded
                    # or not) that follows the regular expression
                    # syntax.
                    wordMatch = Regex.match(wordLine, r".+")
                    # The return from a match() call is a Match class
                    # object. The success property tells us whether the
                    # search located what we were looking for.
                    # The toString method gives us the resulting subString.
                    if wordMatch.success
                        # We can create a Regex object which can later be
                        # used to execute a search with a given regular
                        # expression. In this case we are setting two
                        # options (ignoring case in the search we are
                        # creating, and pre-compiling the search, as we
                        # may be using it many times).
                        # Note that here we are saving the resulting
                        # Regex object to a Dictionary for later use.
                        # We could just as easily save the string itself,
                        # but this way we only pay the Regex creation cost
                        # once, rather than every time we carry out the
                        # search.
                        .words[wordMatch.toString] = Regex(wordMatch.toString, RegexOptions(Compiled, IgnoreCase))
                    wordLine = wordsFile.readLine
        catch ioe as IOException
            .reportError(.wordsFilename, ioe)
        print "\nFound [.words.count] search words.\n"

    def readExtToIgnore
        # optionally find extensions to ignore
        if .ignoreFilename.length > 0
                using ignoreFile = StreamReader(.ignoreFilename)
                    ignoreLine = ignoreFile.readLine                    
                    while ignoreLine
                        if ignoreLine.length > 0
                            .ignoreExtensions.add("." + ignoreLine.trim)
                        ignoreLine = ignoreFile.readLine
            catch ioe as IOException
                .reportError(.ignoreFilename, ioe)
            print "\nFound [.ignoreExtensions.count] ignored extenstions.\n"

    def checkDirectory(curDir as DirectoryInfo)
        if not curDir.exists
            print "Unable to find search directory [curDir.fullName]"
        # print "Checking [curDir.fullName]..."
        foundWords = List<of String>()

        for word, wordMatch in .words
            # check directory name
            # Here we use a regular expression match
            # to determine whether the directory names have
            # any of the words we are looking for.
            # Each directory name is passed to the match
            # method call made on each of the Regex objects
            # in the dictionary we created earlier.
            dirNameCheck = wordMatch.match( 

            if dirNameCheck.success
        if foundWords.count
            print "[curDir.fullName]: directory name may have [foundWords]"
        # check files
        for subFile in curDir.getFiles
            # Note that here we check if the file we are about
            # to look at has a name which matches our list of words
            # or an extension we are ignoring. Neither of these
            # tests use regular expressions, although we could
            # use the same scheme for the ignore extensions, if
            # we wanted to support regular expressions there as well.
            if subFile.fullName == .listFile.fullName or subFile.extension in .ignoreExtensions
            .checkFile(subFile, curDir)

        for subDir in curDir.getDirectories

    def checkFile(curFile as FileInfo, curDir as DirectoryInfo)
        if not curFile.exists
            print "Unable to find file [curFile.fullName]"
        # check filename
        foundWords = List<of String>()

        for word, wordMatch in .words
            # Here we use a regular expression match
            # to determine whether the filename has
            # any of the words we are looking for.
            # The name is passed to the match method
            # call made on each of the Regex objects
            # in the dictionary we created earlier.
            fileNameCheck = wordMatch.match( 

            if fileNameCheck.success
        if foundWords.count
            print "[curFile.fullName]: file name may have [foundWords]"
        if .onlyNames
        # check contents
            openFile = StreamReader(curFile.fullName)
        catch ioe as IOException
            .reportError(curFile.fullName, ioe)
        if .showLines
                lineNum, curLine = 1, openFile.readLine
                while curLine
                    foundWords = List<of String>()
                    for word, wordMatch in .words
                        # Finally, we use a regular expression match
                        # on the contents of each file for each
                        # of the words we want to find. We do this
                        # check for each line in the file, so that
                        # we can report the findings by line, if we
                        # want to do so.
                        curLineCheck = wordMatch.match(curLine)
                        if curLineCheck.success
                    if foundWords.count
                        print "[curFile.fullName]([lineNum]) has [foundWords]"
                    curLine = openFile.readLine
                    lineNum += 1
            catch ioe as IOException
                .reportError(curFile.fullName, ioe)
        else  # not show lines
            # When we don't need to report each line in a file
            # we read the entire contents into a "line" and
            # run the same regular expression match on it.
                curLine = openFile.readToEnd
            catch ioe as IOException
                .reportError(curFile.fullName, ioe)
            foundWords = List<of String>()
            for word, wordMatch in .words
                curLineCheck = wordMatch.match(curLine)
                if curLineCheck.success
            if foundWords.count
                print "[curFile.fullName] has [foundWords]"


    def reportError(fileName, exc as Exception)
        print 'Error reading "[fileName]":'
        print exc.message
        print 'Run with -h for options.'