Regex sample
Posted: Wed Mar 25, 2009 6:26 am
Well, I've had occasion to make a small app using Regex in the last week or so. Here's what I have at the moment. It is more like a sample than a how-to in its current form. Advice and comments welcome.
- Caligari
"""
FindWords
This tool can be used to find which files in a directory tree contain any of a list of words.
The list of words to search for is provided by default in a file called "wordlist.txt" (located
in the same directory as the executable) with one regex per line.
A description of further options is available using the "-help" command-line argument.
"""
use System.Text.RegularExpressions
class FindWords
var wordsFilename as String is shared
var searchDirectory as String is shared
var onlyNames as bool is shared
var showLines as bool is shared
var words as SortedDictionary<of String, Regex> is shared
var startDir as DirectoryInfo is shared
var listFile as FileInfo is shared
def showSyntax is shared
appName = CobraCore.commandLineArgs[0]
if appName.lastIndexOf(r"\") > 0
appName = appName[appName.lastIndexOf(r"\")+1:]
if appName.lastIndexOf(".") > 0
appName = appName[:appName.lastIndexOf(".")]
print "Syntax:\n[appName] \[searchDirectory\] \[-l listFilename\] \[-n|-names] \[-s|-summary] \[-h|-help]"
print " -n|names only check file and directory names"
print " -s|summary only show files, not lines within files"
print " -h|help show this help text"
def main is shared
.wordsFilename = Path.getDirectoryName(CobraCore.exePath) + r"\wordlist.txt"
.searchDirectory = r".\"
.onlyNames = false
.showLines = true
.words = SortedDictionary<of String, Regex>()
args = CobraCore.commandLineArgs
lookingForList = false
foundDir = false
argError = false
if args.count > 1
for arg in args[1:]
if arg == "-h" or arg == "-help"
.showSyntax
return
else if arg == "-n" or arg == "-names" or arg == "-name"
.onlyNames = true
if lookingForList
print "unable to determine filename for list of words, expected '-l filename'"
lookingForList = false
argError = true
else if arg == "-s" or arg == "-summary"
.showLines = false
if lookingForList
print "unable to determine filename for list of words, expected '-l filename'"
lookingForList = false
argError = true
else if arg == "-l"
lookingForList = true
else if arg[0] == "-"
print "unrecognized argument [arg]"
argError = true
else
if lookingForList
.wordsFilename = arg.toString
lookingForList = false
else if not foundDir # must be the directory to look in
.searchDirectory = arg.toString
foundDir = true
else # already found directory
print "more than one directory provied on commandline, expected 'directory'"
argError = true
if argError
print
.showSyntax
return
try
wordsFile = StreamReader(.wordsFilename)
catch ioe as IOException
print 'I/O Error with [.wordsFilename]: [ioe.message]'
return
success
wordLine = wordsFile.readLine
while wordLine
wordMatch = Regex.match(wordLine, r".+")
if wordMatch.success
.words[wordMatch.toString] = Regex(wordMatch.toString, RegexOptions.Compiled | RegexOptions.IgnoreCase)
wordLine = wordsFile.readLine
wordsFile.close
print "\nFound [.words.count] search words.\n"
.listFile = FileInfo(.wordsFilename)
.startDir = DirectoryInfo(.searchDirectory)
curDir = .startDir
.checkDirectory(curDir)
print "\nDone."
def checkDirectory(curDir as DirectoryInfo) is shared
if not curDir.exists
print "Unable to find search directory [curDir.fullName]"
return
# print "Checking [curDir.fullName]..."
foundWords = List<of String>()
for word, wordMatch in .words
# check directory name
dirNameCheck = wordMatch.match(curDir.name)
if dirNameCheck.success
foundWords.add(word)
if foundWords.count
print "[curDir.fullName]: directory name may have [foundWords]"
# check files
for subFile in curDir.getFiles
if curDir.fullName == .startDir.fullName and subFile.fullName == .listFile.fullName
continue
.checkFile(subFile, curDir)
for subDir in curDir.getDirectories
.checkDirectory(subDir)
def checkFile(curFile as FileInfo, curDir as DirectoryInfo) is shared
if not curFile.exists
print "Unable to find file [curFile.fullName]"
return
# check filename
foundWords = List<of String>()
for word, wordMatch in .words
fileNameCheck = wordMatch.match(curFile.name)
if fileNameCheck.success
foundWords.add(word)
if foundWords.count
print "[curFile.fullName]: file name may have [foundWords]"
if .onlyNames
return
# check contents
try
openFile = StreamReader(curFile.fullName)
catch ioe as IOException
print " I/O Error with [curFile.fullName]: [ioe.message]"
return
success
if .showLines
curLine = openFile.readLine
lineNum = 1
while curLine
foundWords = List<of String>()
for word, wordMatch in .words
curLineCheck = wordMatch.match(curLine)
if curLineCheck.success
foundWords.add(word)
if foundWords.count
print "[curFile.fullName]([lineNum]) may have [foundWords]"
curLine = openFile.readLine
lineNum += 1
else # not show lines
curLine = openFile.readToEnd
foundWords = List<of String>()
for word, wordMatch in .words
curLineCheck = wordMatch.match(curLine)
if curLineCheck.success
foundWords.add(word)
if foundWords.count
print "[curFile.fullName] may have [foundWords]"
openFile.close
- Caligari