""" FindWords This tool can be used to find which files in a directory tree contain any of a list of words. The list of words to search for is provided by default in a file called "wordlist.txt" (located in the same directory as the executable) with one regular expression per line. A list of filename extensions to ignore can also be provided, if needed. That list has one extension per line (not including the "."), and is not a regular expression. A description of further options is available using the "-help" command-line argument. Comments in the code highlight the use of Regular Expressions. """ use System.Text.RegularExpressions class FindWords var wordsFilename as String is shared var ignoreFilename as String is shared var searchDirectory as String is shared var onlyNames as bool is shared var showLines as bool is shared var words as SortedDictionary is shared var ignoreExtensions as List is shared var startDir as DirectoryInfo is shared var listFile as FileInfo is shared def showSyntax is shared appName = CobraCore.commandLineArgs[0] if appName.lastIndexOf(Path.directorySeparatorChar) > 0 appName = appName[appName.lastIndexOf(Path.directorySeparatorChar)+1:] if appName.lastIndexOf(".") > 0 appName = appName[:appName.lastIndexOf(".")] print "Syntax:\n[appName] \[searchDirectory\] \[-l|list listFilename\] \[-i|ignore ingorelistFilename\] \[-n|-names] \[-s|-summary] \[-h|-help]" print " -l|list file filename of the list of words to search for" print " -i|ignore file filename of an optional list of extensions for files to ignore" print " -n|names only check file and directory names" print " -s|summary only show files, not lines within files" print " -h|help show this help text" CobraCore.exit(0) def processCommandLine is shared args = CobraCore.commandLineArgs listArgs = ["-l", "-list"] ignoreArgs = ["-i", "-ignore"] nameArgs = ["-n", "-name", "-names"] summaryArgs = ["-s", "-summary"] helpArgs = ["-h", "-help"] allArgs = listArgs.concated(ignoreArgs).concated(nameArgs).concated(summaryArgs).concated(helpArgs) lookingForIgnore = false lookingForList = false foundDir = false argError = false if args.count > 1 for arg in args[1:] if (arg in allArgs) and (lookingForList) print "unable to determine filename for list of words, expected '-l filename'" lookingForList = false argError = true if (arg in allArgs) and (lookingForIgnore) print "unable to determine filename for list of ignore extensions, expected '-i filename'" lookingForIgnore = false argError = true if arg in helpArgs .showSyntax else if arg in nameArgs .onlyNames = true else if arg in summaryArgs .showLines = false else if arg in ignoreArgs lookingForIgnore = true else if arg in listArgs lookingForList = true else if arg[0] == "-" print "unrecognized argument [arg]" argError = true else if lookingForIgnore .ignoreFilename = arg.toString lookingForIgnore = false else if lookingForList .wordsFilename = arg.toString lookingForList = false else if not foundDir # must be the directory to look in .searchDirectory = arg.toString foundDir = true else # already found directory print "more than one directory provied on commandline, expected 'directory'" argError = true if argError print .showSyntax def main is shared .wordsFilename = Path.combine(Path.getDirectoryName(CobraCore.exePath), "wordlist.txt") .ignoreFilename = "" .searchDirectory = Path.getFullPath(".") .onlyNames = false .showLines = true .words = SortedDictionary() .ignoreExtensions = List() .processCommandLine # get words to search for - regular expressions try wordsFile = StreamReader(.wordsFilename) catch ioe as IOException print 'I/O Error with [.wordsFilename]: [ioe.message]' return success wordLine = wordsFile.readLine while wordLine # We can tell if there is a word on a line using an # inline regular expression: # Regex.match( textToSearch, regularExpression ) # The textToSearch can be a hard-coded string, # but most often it will be a String. # The regularExpression is a string (hard-coded # or not) that follows the regular expression # syntax. wordMatch = Regex.match(wordLine, r".+") # The return from a match() call is a Match class # object. The success property tells us whether the # search located what we were looking for. # The toString method gives us the resulting subString. if wordMatch.success # We can create a Regex object which can later be # used to execute a search with a given regular # expression. In this case we are setting two # options (ignoring case in the search we are # creating, and pre-compiling the search, as we # may be using it many times). # Note that here we are saving the resulting # Regex object to a Dictionary for later use. # We could just as easily save the string itself, # but this way we only pay the Regex creation cost # once, rather than every time we carry out the # search. .words[wordMatch.toString] = Regex(wordMatch.toString, RegexOptions(Compiled, IgnoreCase)) wordLine = wordsFile.readLine wordsFile.close print "\nFound [.words.count] search words.\n" #optionally find extensions to ignore if .ignoreFilename.length > 0 try ignoreFile = StreamReader(.ignoreFilename) catch ioe as IOException print 'I/O Error with [.ignoreFilename]: [ioe.message]' return success ignoreLine = ignoreFile.readLine while ignoreLine if ignoreLine.length > 0 .ignoreExtensions.add("." + ignoreLine.trim) ignoreLine = ignoreFile.readLine ignoreFile.close print "\nFound [.ignoreExtensions.count] ignored extenstions.\n" .listFile = FileInfo(.wordsFilename) .startDir = DirectoryInfo(.searchDirectory) curDir = .startDir .checkDirectory(curDir) print "\nDone." def checkDirectory(curDir as DirectoryInfo) is shared if not curDir.exists print "Unable to find search directory [curDir.fullName]" return # print "Checking [curDir.fullName]..." foundWords = List() for word, wordMatch in .words # check directory name # Here we use a regular expression match # to determine whether the directory names have # any of the words we are looking for. # Each directory name is passed to the match # method call made on each of the Regex objects # in the dictionary we created earlier. dirNameCheck = wordMatch.match(curDir.name) if dirNameCheck.success foundWords.add(word) if foundWords.count print "[curDir.fullName]: directory name may have [foundWords]" # check files for subFile in curDir.getFiles # Note that here we check if the file we are about # to look at has a name which matches our list of words # or an extension we are ignoring. Neither of these # tests use regular expressions, although we could # use the same scheme for the ignore extensions, if # we wanted to support regular expressions there as well. if (subFile.fullName == .listFile.fullName) or (subFile.extension in .ignoreExtensions) continue .checkFile(subFile, curDir) for subDir in curDir.getDirectories .checkDirectory(subDir) def checkFile(curFile as FileInfo, curDir as DirectoryInfo) is shared if not curFile.exists print "Unable to find file [curFile.fullName]" return # check filename foundWords = List() for word, wordMatch in .words # Here we use a regular expression match # to determine whether the filename has # any of the words we are looking for. # The name is passed to the match method # call made on each of the Regex objects # in the dictionary we created earlier. fileNameCheck = wordMatch.match(curFile.name) if fileNameCheck.success foundWords.add(word) if foundWords.count print "[curFile.fullName]: file name may have [foundWords]" if .onlyNames return # check contents try openFile = StreamReader(curFile.fullName) catch ioe as IOException print " I/O Error with [curFile.fullName]: [ioe.message]" return if .showLines curLine = openFile.readLine lineNum = 1 while curLine foundWords = List() for word, wordMatch in .words # Finally, we use a regular expression match # on the contents of each file for each # of the words we want to find. We do this # check for each line in the file, so that # we can report the findings by line, if we # want to do so. curLineCheck = wordMatch.match(curLine) if curLineCheck.success foundWords.add(word) if foundWords.count print "[curFile.fullName]([lineNum]) may have [foundWords]" curLine = openFile.readLine lineNum += 1 else # not show lines # When we don't need to report each line in a file # we read the entire contents into a "line" and # run the same regular expression match on it. curLine = openFile.readToEnd foundWords = List() for word, wordMatch in .words curLineCheck = wordMatch.match(curLine) if curLineCheck.success foundWords.add(word) if foundWords.count print "[curFile.fullName] may have [foundWords]" openFile.close