| 1 | """ |
|---|
| 2 | FindWords |
|---|
| 3 | by Caligari |
|---|
| 4 | with mods by Chuck |
|---|
| 5 | |
|---|
| 6 | This tool can be used to find which files in a directory tree contain any |
|---|
| 7 | of a list of words. |
|---|
| 8 | |
|---|
| 9 | The list of words to search for is provided by default in a file called |
|---|
| 10 | "wordlist.txt" (located in the same directory as the executable) with one |
|---|
| 11 | regular expression per line. |
|---|
| 12 | |
|---|
| 13 | A list of filename extensions to ignore can also be provided, if needed. |
|---|
| 14 | That list has one extension per line (not including the "."), and is |
|---|
| 15 | not a regular expression. |
|---|
| 16 | |
|---|
| 17 | A description of further options is available using the "-help" command-line argument. |
|---|
| 18 | |
|---|
| 19 | Comments in the code explain the use of regular expressions. |
|---|
| 20 | """ |
|---|
| 21 | |
|---|
| 22 | use System.Text.RegularExpressions |
|---|
| 23 | |
|---|
| 24 | |
|---|
| 25 | class FindWords |
|---|
| 26 | |
|---|
| 27 | var wordsFilename = '' |
|---|
| 28 | var ignoreFilename = '' |
|---|
| 29 | var searchDirectory = '' |
|---|
| 30 | var onlyNames as bool |
|---|
| 31 | var showLines as bool |
|---|
| 32 | var words = SortedDictionary<of String, Regex>() |
|---|
| 33 | var ignoreExtensions = List<of String>() |
|---|
| 34 | var startDir as DirectoryInfo? |
|---|
| 35 | var listFile as FileInfo? |
|---|
| 36 | |
|---|
| 37 | |
|---|
| 38 | def main |
|---|
| 39 | .wordsFilename = Path.combine(Path.getDirectoryName(CobraCore.exePath), "wordlist.txt") |
|---|
| 40 | .ignoreFilename = "" |
|---|
| 41 | .searchDirectory = Path.getFullPath(".") |
|---|
| 42 | .onlyNames = false |
|---|
| 43 | .showLines = true |
|---|
| 44 | |
|---|
| 45 | .processCommandLine |
|---|
| 46 | .readRegExList |
|---|
| 47 | .readExtToIgnore |
|---|
| 48 | |
|---|
| 49 | .listFile = FileInfo(.wordsFilename) |
|---|
| 50 | .startDir = DirectoryInfo(.searchDirectory) |
|---|
| 51 | |
|---|
| 52 | .checkDirectory(.startDir to !) |
|---|
| 53 | |
|---|
| 54 | print "\nDone." |
|---|
| 55 | |
|---|
| 56 | |
|---|
| 57 | def showSyntax |
|---|
| 58 | appName = CobraCore.commandLineArgs[0] |
|---|
| 59 | if appName.lastIndexOf(Path.directorySeparatorChar) > 0 |
|---|
| 60 | appName = appName[appName.lastIndexOf(Path.directorySeparatorChar)+1:] |
|---|
| 61 | if appName.lastIndexOf(".") > 0 |
|---|
| 62 | appName = appName[:appName.lastIndexOf(".")] |
|---|
| 63 | print "Syntax:\n[appName] \[searchDirectory\] \[-l|list listFilename\] \[-i|ignore ingorelistFilename\] \[-n|-names] \[-s|-summary] \[-h|-help]" |
|---|
| 64 | print " -l|list file filename of the list of words to search for" |
|---|
| 65 | print " -i|ignore file filename of an optional list of extensions for files to ignore" |
|---|
| 66 | print " -n|names only check file and directory names" |
|---|
| 67 | print " -s|summary only show files, not lines within files" |
|---|
| 68 | print " -h|help show this help text" |
|---|
| 69 | CobraCore.exit(0) |
|---|
| 70 | |
|---|
| 71 | |
|---|
| 72 | def processCommandLine |
|---|
| 73 | args = CobraCore.commandLineArgs |
|---|
| 74 | |
|---|
| 75 | listArgs = ["-l", "-list"] |
|---|
| 76 | ignoreArgs = ["-i", "-ignore"] |
|---|
| 77 | nameArgs = ["-n", "-name", "-names"] |
|---|
| 78 | summaryArgs = ["-s", "-summary"] |
|---|
| 79 | helpArgs = ["-h", "-help"] |
|---|
| 80 | allArgs = listArgs.concated(ignoreArgs).concated(nameArgs).concated(summaryArgs).concated(helpArgs) |
|---|
| 81 | |
|---|
| 82 | lookingForIgnore = false |
|---|
| 83 | lookingForList = false |
|---|
| 84 | foundDir = false |
|---|
| 85 | argError = false |
|---|
| 86 | |
|---|
| 87 | if args.count > 1 |
|---|
| 88 | for arg in args[1:] |
|---|
| 89 | if arg in allArgs and lookingForList |
|---|
| 90 | print "unable to determine filename for list of words, expected '-l filename'" |
|---|
| 91 | lookingForList = false |
|---|
| 92 | argError = true |
|---|
| 93 | if arg in allArgs and lookingForIgnore |
|---|
| 94 | print "unable to determine filename for list of ignore extensions, expected '-i filename'" |
|---|
| 95 | lookingForIgnore = false |
|---|
| 96 | argError = true |
|---|
| 97 | |
|---|
| 98 | if arg in helpArgs |
|---|
| 99 | .showSyntax |
|---|
| 100 | else if arg in nameArgs |
|---|
| 101 | .onlyNames = true |
|---|
| 102 | else if arg in summaryArgs |
|---|
| 103 | .showLines = false |
|---|
| 104 | else if arg in ignoreArgs |
|---|
| 105 | lookingForIgnore = true |
|---|
| 106 | else if arg in listArgs |
|---|
| 107 | lookingForList = true |
|---|
| 108 | else if arg[0] == "-" |
|---|
| 109 | print "unrecognized argument [arg]" |
|---|
| 110 | argError = true |
|---|
| 111 | else |
|---|
| 112 | if lookingForIgnore |
|---|
| 113 | .ignoreFilename = arg.toString |
|---|
| 114 | lookingForIgnore = false |
|---|
| 115 | else if lookingForList |
|---|
| 116 | .wordsFilename = arg.toString |
|---|
| 117 | lookingForList = false |
|---|
| 118 | else if not foundDir # must be the directory to look in |
|---|
| 119 | .searchDirectory = arg.toString |
|---|
| 120 | foundDir = true |
|---|
| 121 | else # already found directory |
|---|
| 122 | print "more than one directory provied on commandline, expected 'directory'" |
|---|
| 123 | argError = true |
|---|
| 124 | |
|---|
| 125 | if argError |
|---|
| 126 | print |
|---|
| 127 | .showSyntax |
|---|
| 128 | |
|---|
| 129 | |
|---|
| 130 | def readRegExList |
|---|
| 131 | try |
|---|
| 132 | using wordsFile = StreamReader(.wordsFilename) |
|---|
| 133 | |
|---|
| 134 | wordLine = wordsFile.readLine |
|---|
| 135 | |
|---|
| 136 | while wordLine |
|---|
| 137 | # We can tell if there is a word on a line using an |
|---|
| 138 | # inline regular expression: |
|---|
| 139 | # Regex.match(textToSearch, regularExpression) |
|---|
| 140 | # The textToSearch can be a hard-coded string, |
|---|
| 141 | # but most often it will be a String. |
|---|
| 142 | # The regularExpression is a string (hard-coded |
|---|
| 143 | # or not) that follows the regular expression |
|---|
| 144 | # syntax. |
|---|
| 145 | wordMatch = Regex.match(wordLine, r".+") |
|---|
| 146 | |
|---|
| 147 | # The return from a match() call is a Match class |
|---|
| 148 | # object. The success property tells us whether the |
|---|
| 149 | # search located what we were looking for. |
|---|
| 150 | # The toString method gives us the resulting subString. |
|---|
| 151 | if wordMatch.success |
|---|
| 152 | # We can create a Regex object which can later be |
|---|
| 153 | # used to execute a search with a given regular |
|---|
| 154 | # expression. In this case we are setting two |
|---|
| 155 | # options (ignoring case in the search we are |
|---|
| 156 | # creating, and pre-compiling the search, as we |
|---|
| 157 | # may be using it many times). |
|---|
| 158 | # Note that here we are saving the resulting |
|---|
| 159 | # Regex object to a Dictionary for later use. |
|---|
| 160 | # We could just as easily save the string itself, |
|---|
| 161 | # but this way we only pay the Regex creation cost |
|---|
| 162 | # once, rather than every time we carry out the |
|---|
| 163 | # search. |
|---|
| 164 | .words[wordMatch.toString] = Regex(wordMatch.toString, RegexOptions(Compiled, IgnoreCase)) |
|---|
| 165 | |
|---|
| 166 | wordLine = wordsFile.readLine |
|---|
| 167 | catch ioe as IOException |
|---|
| 168 | .reportError(.wordsFilename, ioe) |
|---|
| 169 | return |
|---|
| 170 | print "\nFound [.words.count] search words.\n" |
|---|
| 171 | |
|---|
| 172 | |
|---|
| 173 | def readExtToIgnore |
|---|
| 174 | # optionally find extensions to ignore |
|---|
| 175 | if .ignoreFilename.length > 0 |
|---|
| 176 | try |
|---|
| 177 | using ignoreFile = StreamReader(.ignoreFilename) |
|---|
| 178 | ignoreLine = ignoreFile.readLine |
|---|
| 179 | while ignoreLine |
|---|
| 180 | if ignoreLine.length > 0 |
|---|
| 181 | .ignoreExtensions.add("." + ignoreLine.trim) |
|---|
| 182 | ignoreLine = ignoreFile.readLine |
|---|
| 183 | catch ioe as IOException |
|---|
| 184 | .reportError(.ignoreFilename, ioe) |
|---|
| 185 | return |
|---|
| 186 | print "\nFound [.ignoreExtensions.count] ignored extenstions.\n" |
|---|
| 187 | |
|---|
| 188 | |
|---|
| 189 | def checkDirectory(curDir as DirectoryInfo) |
|---|
| 190 | if not curDir.exists |
|---|
| 191 | print "Unable to find search directory [curDir.fullName]" |
|---|
| 192 | return |
|---|
| 193 | |
|---|
| 194 | # print "Checking [curDir.fullName]..." |
|---|
| 195 | |
|---|
| 196 | foundWords = List<of String>() |
|---|
| 197 | |
|---|
| 198 | for word, wordMatch in .words |
|---|
| 199 | # check directory name |
|---|
| 200 | # Here we use a regular expression match |
|---|
| 201 | # to determine whether the directory names have |
|---|
| 202 | # any of the words we are looking for. |
|---|
| 203 | # Each directory name is passed to the match |
|---|
| 204 | # method call made on each of the Regex objects |
|---|
| 205 | # in the dictionary we created earlier. |
|---|
| 206 | dirNameCheck = wordMatch.match(curDir.name) |
|---|
| 207 | |
|---|
| 208 | if dirNameCheck.success |
|---|
| 209 | foundWords.add(word) |
|---|
| 210 | |
|---|
| 211 | if foundWords.count |
|---|
| 212 | print "[curDir.fullName]: directory name may have [foundWords]" |
|---|
| 213 | |
|---|
| 214 | # check files |
|---|
| 215 | |
|---|
| 216 | for subFile in curDir.getFiles |
|---|
| 217 | # Note that here we check if the file we are about |
|---|
| 218 | # to look at has a name which matches our list of words |
|---|
| 219 | # or an extension we are ignoring. Neither of these |
|---|
| 220 | # tests use regular expressions, although we could |
|---|
| 221 | # use the same scheme for the ignore extensions, if |
|---|
| 222 | # we wanted to support regular expressions there as well. |
|---|
| 223 | if subFile.fullName == .listFile.fullName or subFile.extension in .ignoreExtensions |
|---|
| 224 | continue |
|---|
| 225 | .checkFile(subFile, curDir) |
|---|
| 226 | |
|---|
| 227 | for subDir in curDir.getDirectories |
|---|
| 228 | .checkDirectory(subDir) |
|---|
| 229 | |
|---|
| 230 | |
|---|
| 231 | def checkFile(curFile as FileInfo, curDir as DirectoryInfo) |
|---|
| 232 | if not curFile.exists |
|---|
| 233 | print "Unable to find file [curFile.fullName]" |
|---|
| 234 | return |
|---|
| 235 | |
|---|
| 236 | # check filename |
|---|
| 237 | foundWords = List<of String>() |
|---|
| 238 | |
|---|
| 239 | for word, wordMatch in .words |
|---|
| 240 | # Here we use a regular expression match |
|---|
| 241 | # to determine whether the filename has |
|---|
| 242 | # any of the words we are looking for. |
|---|
| 243 | # The name is passed to the match method |
|---|
| 244 | # call made on each of the Regex objects |
|---|
| 245 | # in the dictionary we created earlier. |
|---|
| 246 | fileNameCheck = wordMatch.match(curFile.name) |
|---|
| 247 | |
|---|
| 248 | if fileNameCheck.success |
|---|
| 249 | foundWords.add(word) |
|---|
| 250 | |
|---|
| 251 | if foundWords.count |
|---|
| 252 | print "[curFile.fullName]: file name may have [foundWords]" |
|---|
| 253 | |
|---|
| 254 | if .onlyNames |
|---|
| 255 | return |
|---|
| 256 | |
|---|
| 257 | # check contents |
|---|
| 258 | try |
|---|
| 259 | openFile = StreamReader(curFile.fullName) |
|---|
| 260 | catch ioe as IOException |
|---|
| 261 | .reportError(curFile.fullName, ioe) |
|---|
| 262 | return |
|---|
| 263 | |
|---|
| 264 | if .showLines |
|---|
| 265 | try |
|---|
| 266 | lineNum, curLine = 1, openFile.readLine |
|---|
| 267 | while curLine |
|---|
| 268 | foundWords = List<of String>() |
|---|
| 269 | |
|---|
| 270 | for word, wordMatch in .words |
|---|
| 271 | # Finally, we use a regular expression match |
|---|
| 272 | # on the contents of each file for each |
|---|
| 273 | # of the words we want to find. We do this |
|---|
| 274 | # check for each line in the file, so that |
|---|
| 275 | # we can report the findings by line, if we |
|---|
| 276 | # want to do so. |
|---|
| 277 | curLineCheck = wordMatch.match(curLine) |
|---|
| 278 | |
|---|
| 279 | if curLineCheck.success |
|---|
| 280 | foundWords.add(word) |
|---|
| 281 | |
|---|
| 282 | if foundWords.count |
|---|
| 283 | print "[curFile.fullName]([lineNum]) has [foundWords]" |
|---|
| 284 | |
|---|
| 285 | curLine = openFile.readLine |
|---|
| 286 | lineNum += 1 |
|---|
| 287 | catch ioe as IOException |
|---|
| 288 | .reportError(curFile.fullName, ioe) |
|---|
| 289 | return |
|---|
| 290 | else # not show lines |
|---|
| 291 | # When we don't need to report each line in a file |
|---|
| 292 | # we read the entire contents into a "line" and |
|---|
| 293 | # run the same regular expression match on it. |
|---|
| 294 | try |
|---|
| 295 | curLine = openFile.readToEnd |
|---|
| 296 | catch ioe as IOException |
|---|
| 297 | .reportError(curFile.fullName, ioe) |
|---|
| 298 | return |
|---|
| 299 | foundWords = List<of String>() |
|---|
| 300 | for word, wordMatch in .words |
|---|
| 301 | curLineCheck = wordMatch.match(curLine) |
|---|
| 302 | if curLineCheck.success |
|---|
| 303 | foundWords.add(word) |
|---|
| 304 | if foundWords.count |
|---|
| 305 | print "[curFile.fullName] has [foundWords]" |
|---|
| 306 | |
|---|
| 307 | openFile.close |
|---|
| 308 | |
|---|
| 309 | |
|---|
| 310 | def reportError(fileName, exc as Exception) |
|---|
| 311 | print 'Error reading "[fileName]":' |
|---|
| 312 | print exc.message |
|---|
| 313 | print 'Run with -h for options.' |
|---|