class Cobra2HTML """ Parses a string containing Cobra code and reformats it as a syntax highlighted HTML string. Assumes the existence of a css that defines at least the following classes: .c1 /* Single line comment */ .cm /* Doc string */ .mf /* Literal float */ .m /* Literal decimal */ .mi /* Literal integer */ .sc /* Literal char */ .err /* Error */ .s /* Literal string */ .k /* Keyword */ .si /* String substitution value */ .o /* Operator */ .ni /* Normal text */ It only creates the body section of a page, so you are going to have to provide the html and head sections yourself. Example: class Program def main is shared source = File.readAllText('Cobra2HTML.cobra') html = '' html += '
'
			html += Cobra2HTML(source).rendered
			html += '
' File.writeAllText('test.html', html) would produce a highlighted HTML page of what you're looking at now, provided you had a cobra-highlight-styles.css similar to the one generated in the Supplements folder of the Cobra distribution (though, you may want to play with the color scheme a bit). Cobra2HTML depends upon the Cobra tokenizer, so to build the example given above, you would need to include the following list of files: Cobra2HTML Utils Tokenizer CobraTokenizer """ var _rendered as String get rendered as String if not _isRendered .render return _rendered[:-1] var _verbosity as int var _isRendered as bool # constructors def init(source as String) .init(source, 0, true) def init(source as String, verbosity as int, spacesOnly as bool) # TODO: Figure out why it blows up without an EOL at the end. _rendered = source + '\n' _verbosity = verbosity # At least on my machine, tabs are being rendered as 8 spaces, # so to facilitate cutting and pasting from the HTML, convert # the tabs to 4 spaces. if spacesOnly _rendered = _rendered.replace('\t', ' ') # TODO: Figure out why charNum is incorrect with files that # use \r\n line endings. This will mess up raw strings # that contain \r\n, but those should be far less common # than files that are edited in Windows. The \r\n needs # to be replaced in the rendered string anyway, but it # would be better to do it by manipulating the EOL tokens. if '\r\n' in _rendered _rendered = _rendered.replace('\r\n', '\n') _isRendered = false # protected methods def render is protected """ Walks backwards through the token list inserting tags where needed. """ for token in .reverse branch token.which on 'BLANK_TABS_LINE_1' pass on 'BLANK_TABS_LINE_2' pass on 'INDENT_MIXED_TSNS' pass on 'INDENT_MIXED_TS' pass on 'INDENT_MIXED_ST' pass on 'INDENT_ALL_TABS' pass on 'INDENT_ALL_SPACES' pass on 'NO_INDENT' pass on 'EOL' pass on 'SINGLE_LINE_COMMENT' .span(token, 'c1') on 'SPACE' pass on 'OPEN_GENERIC' sub = .escape(token.text) .replaceToken(token.charNum, token.text, sub) on 'CLOSE_GENERIC' sub = .escape(token.text) .replaceToken(token.charNum, token.text, sub) on 'OPEN_IF' .closeSpan(token.charNum + token.text.length - 1) .openSpan(token.charNum, 'k') on 'OPEN_CALL' .span(token, 'ni') on 'FLOAT_LIT' .span(token, 'mf') on 'DECIMAL_LIT' .span(token, 'm') on 'INTEGER_LIT' .span(token, 'mi') on 'INT_SIZE' pass on 'UINT_SIZE' pass on 'FLOAT_SIZE' pass on 'CHAR_LIT_SINGLE' .span(token, 'sc') on 'CHAR_LIT_DOUBLE' .span(token, 'sc') on 'DOC_STRING_START' .openSpan(token.charNum, 'cm') on 'DOC_STRING_BODY_TEXT' sub = .escape(token.text) .replaceToken(token.charNum, token.text, sub) on 'DOC_STRING_STOP' .closeSpan(token.charNum + token.text.length) on 'SHARP_SINGLE' .span(token, 'err') on 'SHARP_DOUBLE' .span(token, 'err') on 'STRING_RAW_SINGLE' .span(token, 's') on 'STRING_RAW_DOUBLE' .span(token, 's') on 'RBRACKET_SPECIAL' pass on 'STRING_START_SINGLE' sub = .escape(token.text) .replaceToken(token.charNum, token.text, sub) .openSpan(token.charNum, 's') on 'STRING_PART_SINGLE' pass on 'STRING_STOP_SINGLE' sub = .escape(token.text) .replaceToken(token.charNum, token.text, sub) .closeSpan(token.charNum + sub.length) on 'STRING_START_DOUBLE' sub = .escape(token.text) .replaceToken(token.charNum, token.text, sub) .openSpan(token.charNum, 's') on 'STRING_PART_DOUBLE' pass on 'STRING_STOP_DOUBLE' sub = .escape(token.text) .replaceToken(token.charNum, token.text, sub) .closeSpan(token.charNum + sub.length) on 'STRING_PART_FORMAT' pass on 'STRING_NOSUB_SINGLE' .span(token, 's') on 'STRING_NOSUB_DOUBLE' .span(token, 's') on 'STRING_SINGLE' .span(token, 's') on 'STRING_DOUBLE' .span(token, 's') on 'TOQ' .span(token, 'k') on 'ID' .span(token, 'ni') on 'SHARP_OPEN' pass on 'SINGLE_QUOTE' pass on 'DOUBLE_QUOTE' pass on 'DOT' .span(token, 'ni') on 'DOTDOT' .span(token, 'o') on 'COLON' .span(token, 'ni') on 'PLUS' .span(token, 'o') on 'PLUSPLUS' .span(token, 'o') on 'MINUSMINUS' .span(token, 'o') on 'MINUS' .span(token, 'o') on 'STARSTAR' .span(token, 'o') on 'STAR' .span(token, 'o') on 'SLASHSLASH' .span(token, 'o') on 'SLASH' .span(token, 'o') on 'PERCENTPERCENT' .span(token, 'o') on 'PERCENT' .span(token, 'o') on 'AMPERSAND' .span(token, 'o') on 'VERTICAL_BAR' .span(token, 'o') on 'CARET' .span(token, 'o') on 'DOUBLE_LT' .span(token, 'o') on 'DOUBLE_GT' .span(token, 'o') on 'ASSIGN' .span(token, 'o') on 'LPAREN' .span(token, 'ni') on 'RPAREN' .span(token, 'ni') on 'LBRACKET' .span(token, 'ni') on 'RBRACKET' .span(token, 'ni') on 'LCURLY' .span(token, 'ni') on 'RCURLY' .span(token, 'ni') on 'SEMI' pass on 'COMMA' .span(token, 'ni') on 'DICT_OPEN' pass on 'DICT_CLOSE' pass on 'QUESTION' .span(token, 'k') on 'BANG' .span(token, 'k') on 'ARRAY_OPEN' pass on 'EQ' .span(token, 'o') on 'NE' .span(token, 'o') on 'LT' .span(token, 'o') on 'GT' .span(token, 'o') on 'LE' .span(token, 'o') on 'GE' .span(token, 'o') on 'PLUS_EQUALS' .span(token, 'o') on 'MINUS_EQUALS' .span(token, 'o') on 'STAR_EQUALS' .span(token, 'o') on 'STARSTAR_EQUALS' .span(token, 'o') on 'SLASH_EQUALS' .span(token, 'o') on 'SLASHSLASH_EQUALS' .span(token, 'o') on 'PERCENT_EQUALS' .span(token, 'o') on 'QUESTION_EQUALS' .span(token, 'o') on 'BANG_EQUALS' .span(token, 'o') on 'CONTINUATION' pass on 'SYNTAX_ERROR' .span(token, 'err') else if token.isKeyword .span(token, 'k') # Highlight the comments. .spanComments # Now that we've located the comments, convert the # escaped #'s back to what they're supposed to be. # The following line won't be rendered properly. _rendered = _rendered.replace('~~~akgjjtldf~~~', '#') _isRendered = true def spanComments is protected """ We escaped the #'s that were found in string literals and doc strings as we were processing the tokens, so any remaining #'s must be single line comments. """ front = '' back = _rendered while '#' in back front += back[:back.indexOf('#')] back = back[back.indexOf('#'):] front += '' if '\n' in back front += back[:back.indexOf('\n')] back = back[back.indexOf('\n'):] else front += back back = '' front += '' front += back _rendered = front def span(token as IToken, highlightClass as String) is protected """ Brackets the token in a tag. """ sub = .escape(token.text) .replaceToken(token.charNum, token.text, sub) .closeSpan(token.charNum + sub.length) .openSpan(token.charNum, highlightClass) def openSpan(location as int, highlightClass as String) is protected """ Inserts a tag. """ require location > 0 location <= _rendered.length body _rendered = _rendered[:location - 1] + '' + _rendered[location - 1:] def closeSpan(location as int) is protected """ Inserts a tag. """ require location > 0 location <= _rendered.length body _rendered = _rendered[:location - 1] + '' + _rendered[location - 1:] def replaceToken(location as int, f as String, t as String) is protected """ Replaces the string in f with the string in t at the specifed location. """ require location > 0 location <= _rendered.length body _rendered = _rendered[:location - 1] + t + _rendered[location - 1 + f.length:] def escape(text as String) as String is protected """ Escapes characters that cause the HTML rendering to sometimes be unhappy. """ ret = text.replace('&', '&') ret = ret.replace('"', '"') ret = ret.replace('<', '<') ret = ret.replace('>', '>') ret = ret.replace("'", ''') # TODO: Figure out how to escape #, i.e. the following line won't be # rendered properly. Alternatively, figure out another way to # identify comments. ret = ret.replace('#', '~~~akgjjtldf~~~') return ret def tokens as IEnumerable is protected """ Tokenizes the source. """ tokenizer = CobraTokenizer(_verbosity) try tokens = tokenizer.startSource(_rendered).allCachedTokens error = false catch te as TokenizerError error = true errorMessage = te.message if error yield Token('(no file)', 0, 0, 0, 'SYNTAX_ERROR', errorMessage + '\n' + _rendered, nil) else genericOpens = 0 for i = 0 .. tokens.count if tokens[i].text == '_' and tokens[i].which == 'ID' if i < tokens.count - 1 and tokens[i + 1].which == 'EOL' yield Token(tokens[i].fileName, tokens[i].lineNum, tokens[i].colNum, tokens[i].charNum, 'CONTINUATION', '_', nil) else yield Token(tokens[i].fileName, tokens[i].lineNum, tokens[i].colNum, tokens[i].charNum, 'SYNTAX_ERROR', '_', nil) else if tokens[i].which == 'OPEN_GENERIC' genericOpens += 1 yield tokens[i] else if tokens[i].which == 'GT' and genericOpens > 0 genericOpens -= 1 yield Token(tokens[i].fileName, tokens[i].lineNum, tokens[i].colNum, tokens[i].charNum, 'CLOSE_GENERIC', '>', nil) else yield tokens[i] def reverse as IEnumerable is protected """ Generates a backwards listing of the tokens in the source. Looks a little silly, but I didn't know if I would have to do token substitutions on the reversed list as well. """ reversed = List(.tokens) reversed.reverse for token in reversed yield token