class Cobra2HTML
"""
Parses a string containing Cobra code and reformats it as a syntax highlighted HTML string.
Assumes the existence of a css that defines at least the following classes:
.c1 /* Single line comment */
.cm /* Doc string */
.mf /* Literal float */
.m /* Literal decimal */
.mi /* Literal integer */
.sc /* Literal char */
.err /* Error */
.s /* Literal string */
.k /* Keyword */
.si /* String substitution value */
.o /* Operator */
.ni /* Normal text */
It only creates the body section of a page, so you are going to have to provide the html and
head sections yourself. Example:
class Program
def main is shared
source = File.readAllText('Cobra2HTML.cobra')
html = '
'
html += '
'
html += Cobra2HTML(source).rendered
html += '
'
File.writeAllText('test.html', html)
would produce a highlighted HTML page of what you're looking at now, provided you had a
cobra-highlight-styles.css similar to the one generated in the Supplements folder of the
Cobra distribution (though, you may want to play with the color scheme a bit).
Cobra2HTML depends upon the Cobra tokenizer, so to build the example given above, you would need
to include the following list of files:
Cobra2HTML
Utils
Tokenizer
CobraTokenizer
"""
var _rendered as String
get rendered as String
if not _isRendered
.render
return _rendered[:-1]
var _verbosity as int
var _isRendered as bool
# constructors
def init(source as String)
.init(source, 0, true)
def init(source as String, verbosity as int, spacesOnly as bool)
# TODO: Figure out why it blows up without an EOL at the end.
_rendered = source + '\n'
_verbosity = verbosity
# At least on my machine, tabs are being rendered as 8 spaces,
# so to facilitate cutting and pasting from the HTML, convert
# the tabs to 4 spaces.
if spacesOnly
_rendered = _rendered.replace('\t', ' ')
# TODO: Figure out why charNum is incorrect with files that
# use \r\n line endings. This will mess up raw strings
# that contain \r\n, but those should be far less common
# than files that are edited in Windows. The \r\n needs
# to be replaced in the rendered string anyway, but it
# would be better to do it by manipulating the EOL tokens.
if '\r\n' in _rendered
_rendered = _rendered.replace('\r\n', '\n')
_isRendered = false
# protected methods
def render is protected
"""
Walks backwards through the token list inserting tags where
needed.
"""
for token in .reverse
branch token.which
on 'BLANK_TABS_LINE_1'
pass
on 'BLANK_TABS_LINE_2'
pass
on 'INDENT_MIXED_TSNS'
pass
on 'INDENT_MIXED_TS'
pass
on 'INDENT_MIXED_ST'
pass
on 'INDENT_ALL_TABS'
pass
on 'INDENT_ALL_SPACES'
pass
on 'NO_INDENT'
pass
on 'EOL'
pass
on 'SINGLE_LINE_COMMENT'
.span(token, 'c1')
on 'SPACE'
pass
on 'OPEN_GENERIC'
sub = .escape(token.text)
.replaceToken(token.charNum, token.text, sub)
on 'CLOSE_GENERIC'
sub = .escape(token.text)
.replaceToken(token.charNum, token.text, sub)
on 'OPEN_IF'
.closeSpan(token.charNum + token.text.length - 1)
.openSpan(token.charNum, 'k')
on 'OPEN_CALL'
.span(token, 'ni')
on 'FLOAT_LIT'
.span(token, 'mf')
on 'DECIMAL_LIT'
.span(token, 'm')
on 'INTEGER_LIT'
.span(token, 'mi')
on 'INT_SIZE'
pass
on 'UINT_SIZE'
pass
on 'FLOAT_SIZE'
pass
on 'CHAR_LIT_SINGLE'
.span(token, 'sc')
on 'CHAR_LIT_DOUBLE'
.span(token, 'sc')
on 'DOC_STRING_START'
.openSpan(token.charNum, 'cm')
on 'DOC_STRING_BODY_TEXT'
sub = .escape(token.text)
.replaceToken(token.charNum, token.text, sub)
on 'DOC_STRING_STOP'
.closeSpan(token.charNum + token.text.length)
on 'SHARP_SINGLE'
.span(token, 'err')
on 'SHARP_DOUBLE'
.span(token, 'err')
on 'STRING_RAW_SINGLE'
.span(token, 's')
on 'STRING_RAW_DOUBLE'
.span(token, 's')
on 'RBRACKET_SPECIAL'
pass
on 'STRING_START_SINGLE'
sub = .escape(token.text)
.replaceToken(token.charNum, token.text, sub)
.openSpan(token.charNum, 's')
on 'STRING_PART_SINGLE'
pass
on 'STRING_STOP_SINGLE'
sub = .escape(token.text)
.replaceToken(token.charNum, token.text, sub)
.closeSpan(token.charNum + sub.length)
on 'STRING_START_DOUBLE'
sub = .escape(token.text)
.replaceToken(token.charNum, token.text, sub)
.openSpan(token.charNum, 's')
on 'STRING_PART_DOUBLE'
pass
on 'STRING_STOP_DOUBLE'
sub = .escape(token.text)
.replaceToken(token.charNum, token.text, sub)
.closeSpan(token.charNum + sub.length)
on 'STRING_PART_FORMAT'
pass
on 'STRING_NOSUB_SINGLE'
.span(token, 's')
on 'STRING_NOSUB_DOUBLE'
.span(token, 's')
on 'STRING_SINGLE'
.span(token, 's')
on 'STRING_DOUBLE'
.span(token, 's')
on 'TOQ'
.span(token, 'k')
on 'ID'
.span(token, 'ni')
on 'SHARP_OPEN'
pass
on 'SINGLE_QUOTE'
pass
on 'DOUBLE_QUOTE'
pass
on 'DOT'
.span(token, 'ni')
on 'DOTDOT'
.span(token, 'o')
on 'COLON'
.span(token, 'ni')
on 'PLUS'
.span(token, 'o')
on 'PLUSPLUS'
.span(token, 'o')
on 'MINUSMINUS'
.span(token, 'o')
on 'MINUS'
.span(token, 'o')
on 'STARSTAR'
.span(token, 'o')
on 'STAR'
.span(token, 'o')
on 'SLASHSLASH'
.span(token, 'o')
on 'SLASH'
.span(token, 'o')
on 'PERCENTPERCENT'
.span(token, 'o')
on 'PERCENT'
.span(token, 'o')
on 'AMPERSAND'
.span(token, 'o')
on 'VERTICAL_BAR'
.span(token, 'o')
on 'CARET'
.span(token, 'o')
on 'DOUBLE_LT'
.span(token, 'o')
on 'DOUBLE_GT'
.span(token, 'o')
on 'ASSIGN'
.span(token, 'o')
on 'LPAREN'
.span(token, 'ni')
on 'RPAREN'
.span(token, 'ni')
on 'LBRACKET'
.span(token, 'ni')
on 'RBRACKET'
.span(token, 'ni')
on 'LCURLY'
.span(token, 'ni')
on 'RCURLY'
.span(token, 'ni')
on 'SEMI'
pass
on 'COMMA'
.span(token, 'ni')
on 'DICT_OPEN'
pass
on 'DICT_CLOSE'
pass
on 'QUESTION'
.span(token, 'k')
on 'BANG'
.span(token, 'k')
on 'ARRAY_OPEN'
pass
on 'EQ'
.span(token, 'o')
on 'NE'
.span(token, 'o')
on 'LT'
.span(token, 'o')
on 'GT'
.span(token, 'o')
on 'LE'
.span(token, 'o')
on 'GE'
.span(token, 'o')
on 'PLUS_EQUALS'
.span(token, 'o')
on 'MINUS_EQUALS'
.span(token, 'o')
on 'STAR_EQUALS'
.span(token, 'o')
on 'STARSTAR_EQUALS'
.span(token, 'o')
on 'SLASH_EQUALS'
.span(token, 'o')
on 'SLASHSLASH_EQUALS'
.span(token, 'o')
on 'PERCENT_EQUALS'
.span(token, 'o')
on 'QUESTION_EQUALS'
.span(token, 'o')
on 'BANG_EQUALS'
.span(token, 'o')
on 'CONTINUATION'
pass
on 'SYNTAX_ERROR'
.span(token, 'err')
else
if token.isKeyword
.span(token, 'k')
# Highlight the comments.
.spanComments
# Now that we've located the comments, convert the
# escaped #'s back to what they're supposed to be.
# The following line won't be rendered properly.
_rendered = _rendered.replace('~~~akgjjtldf~~~', '#')
_isRendered = true
def spanComments is protected
"""
We escaped the #'s that were found in string literals and doc strings
as we were processing the tokens, so any remaining #'s must be single
line comments.
"""
front = ''
back = _rendered
while '#' in back
front += back[:back.indexOf('#')]
back = back[back.indexOf('#'):]
front += ''
if '\n' in back
front += back[:back.indexOf('\n')]
back = back[back.indexOf('\n'):]
else
front += back
back = ''
front += ''
front += back
_rendered = front
def span(token as IToken, highlightClass as String) is protected
"""
Brackets the token in a tag.
"""
sub = .escape(token.text)
.replaceToken(token.charNum, token.text, sub)
.closeSpan(token.charNum + sub.length)
.openSpan(token.charNum, highlightClass)
def openSpan(location as int, highlightClass as String) is protected
"""
Inserts a tag.
"""
require
location > 0
location <= _rendered.length
body
_rendered = _rendered[:location - 1] + '' + _rendered[location - 1:]
def closeSpan(location as int) is protected
"""
Inserts a tag.
"""
require
location > 0
location <= _rendered.length
body
_rendered = _rendered[:location - 1] + '' + _rendered[location - 1:]
def replaceToken(location as int, f as String, t as String) is protected
"""
Replaces the string in f with the string in t at the specifed location.
"""
require
location > 0
location <= _rendered.length
body
_rendered = _rendered[:location - 1] + t + _rendered[location - 1 + f.length:]
def escape(text as String) as String is protected
"""
Escapes characters that cause the HTML rendering to sometimes be unhappy.
"""
ret = text.replace('&', '&')
ret = ret.replace('"', '"')
ret = ret.replace('<', '<')
ret = ret.replace('>', '>')
ret = ret.replace("'", ''')
# TODO: Figure out how to escape #, i.e. the following line won't be
# rendered properly. Alternatively, figure out another way to
# identify comments.
ret = ret.replace('#', '~~~akgjjtldf~~~')
return ret
def tokens as IEnumerable is protected
"""
Tokenizes the source.
"""
tokenizer = CobraTokenizer(_verbosity)
try
tokens = tokenizer.startSource(_rendered).allCachedTokens
error = false
catch te as TokenizerError
error = true
errorMessage = te.message
if error
yield Token('(no file)', 0, 0, 0, 'SYNTAX_ERROR', errorMessage + '\n' + _rendered, nil)
else
genericOpens = 0
for i = 0 .. tokens.count
if tokens[i].text == '_' and tokens[i].which == 'ID'
if i < tokens.count - 1 and tokens[i + 1].which == 'EOL'
yield Token(tokens[i].fileName, tokens[i].lineNum, tokens[i].colNum, tokens[i].charNum, 'CONTINUATION', '_', nil)
else
yield Token(tokens[i].fileName, tokens[i].lineNum, tokens[i].colNum, tokens[i].charNum, 'SYNTAX_ERROR', '_', nil)
else if tokens[i].which == 'OPEN_GENERIC'
genericOpens += 1
yield tokens[i]
else if tokens[i].which == 'GT' and genericOpens > 0
genericOpens -= 1
yield Token(tokens[i].fileName, tokens[i].lineNum, tokens[i].colNum, tokens[i].charNum, 'CLOSE_GENERIC', '>', nil)
else
yield tokens[i]
def reverse as IEnumerable is protected
"""
Generates a backwards listing of the tokens in the source. Looks a little silly,
but I didn't know if I would have to do token substitutions on the reversed list
as well.
"""
reversed = List(.tokens)
reversed.reverse
for token in reversed
yield token