"""

Rules:
* If an numeric literal contains a decimal point or the exponential symbol (E),
  it is a number (decimal, float64, etc.).
* Else, the type is the first of int, uint, int64, uint64 that will fit the value.

"""

use System.Collections
use System.Text.RegularExpressions


class CobraTokenizer inherits Tokenizer

	test
		# There are plenty of external Cobra source code tests that will exercise the lexer.
		# But here are a few basic tests to make sure the tokenizer has some viability.
		t = CobraTokenizer()
		# TODO:
		t.startSource('foo bar')
		tokens = t.allTokens
		Tokenizer.checkTokens(tokens, 'ID ID EOL')

		t.restart
		t.startSource('class Foo\n\tdef foo()\n\t\treturn 1')
#		t.startSource(
#'''class Foo
#	def foo()
#		return 1
#''')
		tokens = t.allTokens
		Tokenizer.checkTokens(tokens, 'CLASS ID EOL INDENT DEF OPEN_CALL RPAREN EOL INDENT RETURN INTEGER_LIT EOL DEDENT DEDENT')

		t.restart
		t.startSource('class Foo\n\tpass\n\nclass Bar\n\tpass')
#		t.startSource(
#	'''class Foo
#		pass
#
#	class Bar
#		pass
#	''')
		tokens = t.allTokens
		Tokenizer.checkTokens(tokens, 'CLASS ID EOL INDENT PASS EOL EOL DEDENT CLASS ID EOL INDENT PASS EOL DEDENT')

		t.restart
		t.startSource('class Foo\n\tdef foo()\n\t\treturn 1\n\nclass Bar\n\tpass\n')
#	t.startSource(
#'''class Foo
#	def foo()
#		return 1
#
#class Bar
#	pass
#''')
		tokens = t.allTokens
		Tokenizer.checkTokens(tokens, 'CLASS ID EOL INDENT DEF OPEN_CALL RPAREN EOL INDENT RETURN INTEGER_LIT EOL EOL DEDENT DEDENT CLASS ID EOL INDENT PASS EOL DEDENT')

	var _indentCount as int
	var _substLBracketCount as int
	var _inSubstStringSingle = false
	var _inSubstStringDouble = false
	var _inDocString = false
	var _inCommentBlock = 0

	cue init
		base.init

	cue init(verbosity as int)
		base.init
		_verbosity = verbosity

	def _reuse
		base._reuse
		_indentCount = 0
		_substLBracketCount = 0
		_inSubstStringSingle = false
		_inSubstStringDouble = false
		_inDocString = false
		_inCommentBlock = 0
		
	def addInfo(sb as StringBuilder) is override
		base.addInfo(sb)
		sb.append('_indentCount=[_indentCount], ')
		sb.append('_substLBracketCount=[_substLBracketCount], ')
		sb.append('_inSubstStringSingle=[_inSubstStringSingle], ')
		sb.append('_inSubstStringDouble=[_inSubstStringDouble], ')
		sb.append('_inDocString=[_inDocString]')
		sb.append('_inCommentBlock=[_inCommentBlock]')

	pro willReturnComments from var as bool

	pro willReturnDirectives from var as bool
		
	# Note: The Tokenizer class handles it's input one line at a time,
	#       and retains the \n at the end of the line. This affects
	#       the regex design for the tokens below.


	def orderedTokenSpecs as IList is override
		return [
			# whitespace
			WhiteSpaceLineTokenDef(r'^[\t ]+$'),
			WhiteSpaceCommentTokenDef(1, r'^[\t]+[ ]*\#.*$'),
			WhiteSpaceCommentTokenDef(2, r'^[ ]+[\t]*\#.*$'),
			SingleLineCommentBlockTokenDef(r'^[ \t]*\/\#.*\#\/[ \t]*$'),
			CommentBlockStartTokenDef(r'^[ \t]*\/\#.*$'),
			IndentMixedTokenDef(1, r'^[\t]+[ ]+(?=[^\t ])'),
			IndentMixedTokenDef(2, r'^[\t]+[ ]+'),
			r'INDENT_MIXED_ST		r	^[ ]+[\t]+',
			IndentAllTokenDef('INDENT_ALL_TABS', c'\t', r'^[\t]+'),
			IndentAllTokenDef('INDENT_ALL_SPACES', c' ', r'^[ ]+'),
			r'NO_INDENT				r	^(?=[^\t\n#\/])',
			CharTokenDef('EOL', c'\n'),
			r'INLINE_COMMENT		r	\/\#.*\#/',
			r'SINGLE_LINE_COMMENT	r	\#.*',
			r'AMBIGUOUS_COMMENT		r	\/\#.*',
			SpaceTokenDef('SPACE', r'[ \t]+'),

			r'AT_ID					r	@[A-Za-z_][A-Za-z0-9_]*',

			OpenComboTokenDef(),
			# OpenGenericTokenDef('OPEN_GENERIC', r'[A-Za-z_][A-Za-z0-9_]*<of([ \n\r\t]|(?=[,>]))'),
			# r'OPEN_DO				s	do(',
			# r'OPEN_IF				s	if(',
			# OpenCallTokenDef('OPEN_CALL', r'[A-Za-z_][A-Za-z0-9_]*\('),

			r'HEX_LIT_UNSIGN		r	0x[\dA-Fa-f][\dA-Fa-f]*(_?u)(8|16|32|64)?',
			r'HEX_LIT_EXPLICIT		r	0x[\dA-Fa-f][\dA-Fa-f]*_(8|16|32|64)?',
			r'HEX_LIT				r	0x[\dA-Fa-f][\dA-Fa-f]*',

			r'FLOAT_LIT_1			r	\d[\d_]*\.\d+_?f(32|64)?',
			r'FLOAT_LIT_2			r	\d[\d_]*(_?f)(32|64)?',
			r'DECIMAL_LIT			r	\d[\d_]*(\.\d+)?(_?d)',
			r'NUMBER_LIT			r	\d[\d_]*(\.\d+)?(_?n)',
			r'FRACTIONAL_LIT		r	\d[\d_]*\.\d+',
			r'INTEGER_LIT_EXPLICIT	r	\d[\d_]*_?[iu](8|16|32|64)?',
			r'INTEGER_LIT			r	\d[\d_]*',

			r'INT_SIZE				r	int[0-9]+(?=[^A-Za-z0-9_])',
			r'UINT_SIZE				r	uint[0-9]+(?=[^A-Za-z0-9_])',
			r'FLOAT_SIZE			r	float[0-9]+(?=[^A-Za-z0-9_])',

			r"CHAR_LIT_SINGLE		r	c'(?:\\'|\\?[^'])'",
			r'CHAR_LIT_DOUBLE		r	c"(?:\\"|\\?[^"])"',

			# doc strings
			r'DOC_STRING_START		r	"""[ \t]*\n',
			r'DOC_STRING_LINE		r	""".*"""[ \t]*\n',

			# sharp strings
			r"SHARP_SINGLE			r	sharp'(?:\\.?|[^'\n])*'",
			r'SHARP_DOUBLE			r	sharp"(?:\\.?|[^"\n])*"',

			# raw strings
			r"STRING_RAW_SINGLE		r	r'(?:\\.?|[^'\n])*'",
			r'STRING_RAW_DOUBLE		r	r"(?:\\.?|[^"\n])*"',

			# substituted strings
			r'RBRACKET_SPECIAL		s	]',
			StringStartTokenDef('STRING_START_SINGLE', c"'"),
			StringPartTokenDef('STRING_PART_SINGLE', c"'"),
			StringStopTokenDef('STRING_STOP_SINGLE', c"'"),

			StringStartTokenDef('STRING_START_DOUBLE', c'"'),
			StringPartTokenDef('STRING_PART_DOUBLE', c'"'),
			StringStopTokenDef('STRING_STOP_DOUBLE', c'"'),

			r'STRING_PART_FORMAT	r	:[^X"\n\[]*(?=])'.replace('X', "'"),

			# plain strings
			r"STRING_NOSUB_SINGLE	r	ns'(?:\\.?|[^'\n])*'",
			r'STRING_NOSUB_DOUBLE	r	ns"(?:\\.?|[^"\n])*"',

			StringLiteralTokenDef('STRING_SINGLE', c"'"),			
			StringLiteralTokenDef('STRING_DOUBLE', c'"'),

			r'TOQ					s	to?',
			IdTokenDef('ID', r'[A-Za-z_][A-Za-z0-9_]*'),
		]

	def unorderedTokenSpecs as IList is override
		return [
			r'SHARP_OPEN		s	$sharp(',  # deprecated. $ is reserved for future language level regex support
			r"SINGLE_QUOTE		s	'",
			r'DOUBLE_QUOTE		s	"',
			r'DOT				s	.',
			r'DOTDOT			s	..',
			r'COLON				s	:',
			r'PLUS				s	+',
			r'PLUSPLUS			s	++',
			r'MINUSMINUS		s	--',
			r'MINUS				s	-',
			r'STARSTAR			s	**',
			r'STAR				s	*',
			r'SLASHSLASH		s	//',
			r'SLASH				s	/',
			r'PERCENTPERCENT	s	%%',
			r'PERCENT			s	%',
			r'ASSIGN			s	=',
			r'LPAREN			s	(',
			r'RPAREN			s	)',
			r'LBRACKET			s	[',
			r'RBRACKET			s	]',
			r'LCURLY			s	{',
			r'RCURLY			s	}',
			r'SEMI				s	;',
			r'COMMA				s	,',
			r'DOUBLE_LT			s	<<',
			r'DOUBLE_GT			s	>>',
			r'QUESTION			s	?',
			r'BANG				s	!',
			r'ARRAY_OPEN		s	@[',
			r'AMPERSAND			s	&',
			r'VERTICAL_BAR		s	|',
			r'CARET				s	^',
			r'TILDE				s	~',

			r'EQ				s	==',
			r'NE				s	<>',
			r'LT				s	<',
			r'GT				s	>',
			r'LE				s	<=',
			r'GE				s	>=',

			r'PLUS_EQUALS		s	+=',
			r'MINUS_EQUALS		s	-=',
			r'STAR_EQUALS		s	*=',
			r'SLASH_EQUALS		s	/=',
			r'SLASHSLASH_EQUALS	s	//=',
			r'PERCENT_EQUALS	s	%=',

			r'STARSTAR_EQUALS	s	**=',

			r'AMPERSAND_EQUALS		s	&=',
			r'VERTICAL_BAR_EQUALS	s	|=',
			r'CARET_EQUALS			s	^=',
			r'DOUBLE_LT_EQUALS		s	<<=',
			r'DOUBLE_GT_EQUALS		s	>>=',

			r'QUESTION_EQUALS	s	?=',
			r'BANG_EQUALS		s	!=',
		]

	get keywords as IList<of String> is override
		return KeywordSpecs.keywords

	pro typeProvider from var as ITypeProvider?

	def _reset is override
		base._reset
		_indentCount = 0
		_substLBracketCount = 0

	def allTokens as List<of IToken>
		tokens = base.allTokens
		if _inCommentBlock, .throwError('Unfinished multi-line comment.')
		return tokens

	## popular single chars
	
	def makeASSIGN(definition as String) as TokenDef?
		return CharTokenDef('ASSIGN', c'=')
	
	def makeDOT(definition as String) as TokenDef?
		return CharTokenDef('DOT', c'.')
	
	def makeCOMMA(definition as String) as TokenDef?
		return CharTokenDef('COMMA', c',')
	
	def makeLPAREN(definition as String) as TokenDef?
		return CharTokenDef('LPAREN', c'(')
	
	def makeRPAREN(definition as String) as TokenDef?
		return CharTokenDef('RPAREN', c')')

	def makeLBRACKET(definition as String) as TokenDef?
		return CharTokenDef('LBRACKET', c'[')
	
	def makeRBRACKET(definition as String) as TokenDef?
		return CharTokenDef('RBRACKET', c']')
	
	## end popular single chars
	
	## popular strings
	
	def makeDOTDOT(definition as String) as TokenDef?
		return StringTokenDef('DOTDOT', '..')

	def makeOPEN_IF(definition as String) as TokenDef?
		return StringTokenDef('OPEN_IF', 'if(')

	def makeTOQ(definition as String) as TokenDef?
		return StringTokenDef('TOQ', 'to?')
	
	def makeOPEN_DO(definition as String) as TokenDef?
		return StringTokenDef('OPEN_DO', 'do(')
	
	def makeEQ(definition as String) as TokenDef?
		return StringTokenDef('EQ', '==')

	def makeNE(definition as String) as TokenDef?
		return StringTokenDef('NE', '<>')

	## end popular strings
	

	def afterStart is override
		base.afterStart
		# CC:
		# _tokenDefsByWhich['STRING_PART_SINGLE'].isActiveCall = def(tokenizer)=tokenizer.inSubstStringSingle
		# _tokenDefsByWhich['STRING_STOP_SINGLE'].isActiveCall = def(tokenizer)=tokenizer.inSubstStringSingle

		# recover from multiline comments
		while _tokenDefsByWhich.containsKey('COMMENT_BLOCK_LINE')
			.popTokenDefs
		_inCommentBlock = 0

		inactivate = [
			'RBRACKET_SPECIAL' ,
			'STRING_PART_SINGLE', 'STRING_STOP_SINGLE',
			'STRING_PART_DOUBLE', 'STRING_STOP_DOUBLE',
			'STRING_PART_FORMAT',
		]
		for which in inactivate
			_tokenDefsByWhich[which].isActive = false

	def isActiveCall(tok as TokenDef) as bool is override
		if tok.which=='STRING_PART_SINGLE' or tok.which=='STRING_STOP_SINGLE'
			return _inSubstStringSingle
		return true

	def _nextToken as IToken is override
		# overridden to deliver the final DEDENTS to close out indentation
		tok = base._nextToken
		if tok.isEOF
			colNum = 0
			while _indentCount > 0
				if colNum == 0, colNum = .lastToken.colNum + 1
				t = Token(.lastToken.fileName, .lastToken.lineNum, colNum, .lastToken.charNum, 'DEDENT', '', '')
				_tokenQueue.enqueue(t)
				_indentCount -= 1
				colNum += 1
			if _tokenQueue.count, return _nextToken
		return tok

	def onWHITESPACE_LINE(tok as IToken) as IToken?
		# Eat these.
		# Don't muck with perceived indentation level as
		# these kinds of lines are irrelevant.
		#print '<> onWHITESPACE_LINE'
		return nil

	def onWHITESPACE_COMMENT_1(tok as IToken) as IToken?
		#print '<> onWHITESPACE_COMMENT_1'
		if .checkForCommentDirective(tok)
			return .directiveToken(tok)
		else
			return .commentToken(tok)

	def onWHITESPACE_COMMENT_2(tok as IToken) as IToken?
		#print '<> onWHITESPACE_COMMENT_2'
		if .checkForCommentDirective(tok)
			return .directiveToken(tok)
		else
			return .commentToken(tok)
		return nil
	
	##
	## Comment out block
	##

	var _commentBlockDefs as List<of TokenDef>?	

	def onCOMMENT_BLOCK_START(tok as IToken) as IToken?
		#print '<> onCOMMENT_BLOCK_START', tok
		assert _inCommentBlock >= 0, tok
		# narrow the tokenizer's token defs to a new shorter set
		if _inCommentBlock == 0
			if _commentBlockDefs is nil
				defs = List<of TokenDef>()
				defs.add(.tokenDefsByWhich['SINGLE_LINE_COMMENT_BLOCK'])
				defs.add(.tokenDefsByWhich['COMMENT_BLOCK_START'])
				defs.add(TokenRegexDef('COMMENT_BLOCK_STOP', r'[^#]*\#\/.*$'))  # to-do: the .*$ portion looks wrong as in that text will never be processed
				defs.add(TokenRegexDef('COMMENT_BLOCK_LINE', '.*\n'))
				_commentBlockDefs = defs
			.pushTokenDefs(_commentBlockDefs to !)
		_inCommentBlock += 1
		return .commentToken(tok)
		
	def onCOMMENT_BLOCK_LINE(tok as IToken) as IToken?
		#print '<> onCOMMENT_BLOCK_LINE', tok.lineNum 
		assert _inCommentBlock > 0, tok
		return .commentToken(tok)
		
	def onCOMMENT_BLOCK_STOP(tok as IToken) as IToken?
		#print '<> onCOMMENT_BLOCK_STOP', tok.lineNum
		assert _inCommentBlock > 0, tok
		_inCommentBlock -= 1
		if _inCommentBlock == 0, .popTokenDefs
		return .commentToken(tok)

	def onINDENT_MIXED_TSNS(tok as IToken) as IToken?
		# expecting tabs, spaces, non-whitespace
		assert tok.text.startsWith('\t')
		assert tok.text.endsWith(' ')
		# this is okay on continued lines
		if .justDidLineContinuation
			indentLevel = tok.text.count(c'\t') + tok.text.count(c' ') // 4
			return _processNumIndentLevels(indentLevel)  # will check continuation indentation rules
		else
			return .onINDENT_MIXED_TS(tok)

	def onINDENT_MIXED_TS(tok as IToken) as IToken?
		sb = StringBuilder()
		for c in tok.text
			branch c
				on c' ', sb.append(r'[SPACE]')
				on c'\t', sb.append(r'[TAB]')
				else, sb.append(c)
		# to-do: for people using tabs a stray space is most common error here,
		# such as ...[TAB][SPACE][TAB]...
		# could probably detect this and invoke .recordError so compilation can continue.
		.throwError('Cannot mix tabs and spaces in indentation. [sb]...')
		return nil  # make compiler happy.

	def onINDENT_MIXED_ST(tok as IToken) as IToken?
		return .onINDENT_MIXED_TS(tok)

	def onINDENT_ALL_TABS(tok as IToken) as IToken?
		numTabs = tok.text.count(c'\t')
		return _processNumIndentLevels(numTabs)

	def onINDENT_ALL_SPACES(tok as IToken) as IToken?
		numSpaces = tok.text.count(c' ')
		if numSpaces % 4 and not .justDidLineContinuation # yes, 4. hard coded, intentionally.
			# TODO: should really just record an error and take (numSpaces/4).round as the indent
			.throwError('Space-based indentation must be a multiple of 4. This line has a remainder of [numSpaces%4].')
		return _processNumIndentLevels(numSpaces // 4)

	def onNO_INDENT(tok as IToken) as IToken?
		require tok.text==''
		_curTokenDef.ignoreCount = 1
		t = _processNumIndentLevels(0)
		return t

	def _processNumIndentLevels(numTabs as int) as IToken?
		if .justDidLineContinuation
			if numTabs < _indentCount
				.recordError('Must indent same amount or more on a continued line.')
			return nil
		firstTok as IToken?
		lastTok as IToken?
		while numTabs > _indentCount
			_indentCount += 1
			newTok = Token(_fileName, _lineNum, 1, _charNum, 'INDENT', '', '')
			if lastTok
				lastTok.nextToken = newTok
				lastTok = newTok
			else
				firstTok = lastTok = newTok
		if firstTok
			return firstTok
		while numTabs < _indentCount
			_indentCount -= 1
			newTok = Token(_fileName, _lineNum, 1, _charNum, 'DEDENT', '', '')
			if lastTok
				lastTok.nextToken = newTok
				lastTok = newTok
			else
				firstTok = lastTok = newTok
		return firstTok

	var _didLineContinuation as bool  # only meaningful after an EOL

	get justDidLineContinuation as bool
		return .lastToken.which == 'EOL' and _didLineContinuation

	def onEOL(tok as IToken) as IToken?
		_didLineContinuation = .lastToken.text == '_' and .lastToken.which == 'ID'
		return tok

	def onSINGLE_LINE_COMMENT_BLOCK(tok as IToken) as IToken?
		return .onSINGLE_LINE_COMMENT(tok)

	def onSINGLE_LINE_COMMENT(tok as IToken) as IToken?
		return if(.checkForCommentDirective(tok), .directiveToken(tok), .commentToken(tok))

	def onINLINE_COMMENT(tok as IToken) as IToken?
		return .commentToken(tok)

	def onAMBIGUOUS_COMMENT(tok as IToken) as IToken?
		.throwError('Ambiguous comment at /#. For an end-of-line comment, put a space between / and #. For an inline comment, end it with #/. For a block comment, put /# at the beginning of a line.')
		return .commentToken(tok)

	def onSPACE(tok as IToken) as IToken?
		# eat these
		return nil

	def onAT_ID(tok as IToken) as IToken?
		tok.value = tok.text[1:]
		return tok

	def onOPEN_CALL(tok as IToken) as IToken?
		tok.value = tok.text[:-1]
		return tok

	def onOPEN_GENERIC(tok as IToken) as IToken?
		require tok.text.trim.endsWith('<of')
		s = tok.text.trim
		tok.value = s[:-3]
		return tok

	def onID(tok as IToken) as IToken?
		tok = .keywordOrWhich(tok, 'ID')
		if tok.which<>'ID'
			tok.isKeyword = true
		return tok

	def onFLOAT_LIT_1(tok as IToken) as IToken?
		ensure
			result.which == 'FLOAT_LIT'
			(result.info to int) in [32, 64]  # CC: axe cast
		body
			s = tok.text.replace('_', '')
			if s.endsWith('f')
				size = 64
				s = s[:-1]
			else if s.endsWith('f32')
				size = 32
				s = s[:-3]
			else if s.endsWith('f64')
				size = 64
				s = s[:-3]
			else
				# cannot have other size specs given regex
				size = 64
			try
				tok.value = float.parse(s, Utils.cultureInfoForNumbers)
			catch FormatException
				assert false, 'not expecting to get here given regex'
			catch OverflowException
				.recordError('Range overflow for float literal "[tok.text]".')
			tok.which = 'FLOAT_LIT'
			tok.info = size
			return tok

	def onFLOAT_LIT_2(tok as IToken) as IToken?
		ensure
			result.which == 'FLOAT_LIT'
			(result.info to int) in [32, 64]  # CC: axe cast
		body
			return .onFLOAT_LIT_1(tok)

	def onDECIMAL_LIT(tok as IToken) as IToken?
		s = tok.text
		assert s.endsWith('d')
		s = s[:-1]
		s = s.replace('_', '')
		try
			tok.value = decimal.parse(s, Utils.cultureInfoForNumbers)
		catch FormatException
			assert false, 'not expecting to get here given regex'
		catch OverflowException
			.recordError('Range overflow for decimal literal "[tok.text]".')
		return tok

	def onNUMBER_LIT(tok as IToken) as IToken?
		require tok.text.endsWith('n')
		tok.which = 'FRACTIONAL_LIT'
		tok.text = tok.text[:-1]
		return .onFRACTIONAL_LIT(tok)

	def onFRACTIONAL_LIT(tok as IToken) as IToken?
		s = tok.text.replace('_', '')
		try
			assert _typeProvider
			numberType = if(.typeProvider, .typeProvider.numberType, DecimalType())
			# parse literal to same type as numberType
			if numberType inherits DecimalType
				tok.value = decimal.parse(s, Utils.cultureInfoForNumbers)
			else if numberType inherits FloatType
				tok.value = float.parse(s, Utils.cultureInfoForNumbers)
				tok.which = 'FLOAT_LIT'
				tok.info = numberType.size
			else
				throw FallThroughException(numberType)
		catch FormatException
			assert false, 'not expecting to get here given regex'
		catch OverflowException
			.recordError('[numberType.name.capitalized] range overflow for fractional literal "[tok.text]".')
		return tok

	def onINTEGER_LIT(tok as IToken) as IToken?
		try
			val = uint64.parse(tok.text.replace('_', ''), Utils.cultureInfoForNumbers)
		catch FormatException
			assert false, 'not expecting to get here given regex'
		catch OverflowException
			.recordError('Range overflow for integer literal "[tok.text]".')
			return tok
		_inferIntSize(tok, val)
		return tok

	def onINTEGER_LIT_EXPLICIT(tok as IToken) as IToken?
		require
			'i' in tok.text or 'u' in tok.text
		ensure
			tok.which == 'INTEGER_LIT'
			(tok.info to int) in [-8, 8, -16, 16, -32, 32, -64, 64]  # CC: axe cast
		body
			s = tok.text.replace('_', '')
			for c in s, if not c.isDigit, break
			# c will be 'i' or 'u'
			if s[s.length-1] == c
				size = 32
				s = s[:-1]
			else if s.endsWith('32')
				size = 32
				s = s[:-3]
			else if s.endsWith('64')
				size = 64
				s = s[:-3]
			else if s.endsWith('16')
				size = 16
				s = s[:-3]
			else if s.endsWith('8')
				size = 8
				s = s[:-2]
			else
				# cannot have other size specs given regex
				size = 32
			try
				tok.value = uint64.parse(s, Utils.cultureInfoForNumbers)
			catch FormatException
				assert false, 'not expecting to get here given regex'
			catch OverflowException
				.recordError('Range overflow for integer literal "[tok.text]".')
			tok.which = 'INTEGER_LIT'
			tok.info = if(c == c'i', -1, +1) * size
			return tok

	def onHEX_LIT_UNSIGN(tok as IToken) as IToken? 
		require
			'u' in tok.text
		body
			return .onHEX_LIT_EXPLICIT(tok)
	
	def onHEX_LIT_EXPLICIT(tok as IToken) as IToken? 
		ensure
			tok.which == 'INTEGER_LIT'
			(tok.info to int) in [8, 16, 32, 64]  # CC: axe cast
		body
			size = 32
			h = tok.text
			s = tok.text
			if s.endsWith('32')
				size = 32
				s = s[:-2]
			else if s.endsWith('64')
				size = 64
				s = s[:-2]
			else if s.endsWith('16')
				size = 16
				s = s[:-2]
			else if s.endsWith('8')
				size = 8
				s = s[:-1]
			if s.endsWith('u')
				s = s[:-1] 
			s = s.replace('_', '')
			tok.text = s
			tok = .onHEX_LIT(tok) to !
			tok.info = size  # unsigned
			tok.text = h
			return tok

	def onHEX_LIT(tok as IToken) as IToken? 
		ensure
			tok.which == 'INTEGER_LIT'
			(tok.info to int) in [-32, 32, -64, 64]  # CC: axe cast
		body
			try
				val = uint64.parse(tok.text[2:], System.Globalization.NumberStyles.HexNumber)
			catch FormatException
				assert false, 'not expecting to get here given regex'
			catch OverflowException
				.recordError('Range overflow for hex literal "[tok.text]".')
			tok.which = 'INTEGER_LIT'
			_inferIntSize(tok, val)
			return tok

	def _inferIntSize(tok as IToken, val as uint64)
		if val <= int.maxValue
			tok.value = val to int
			tok.info = -32
		else if val <= uint.maxValue
			tok.value = val to uint
			tok.info = 32
		else if val <= int64.maxValue
			tok.value = val to int64
			tok.info = -64
		else
			tok.value = val
			tok.info = 64

	def onINT_SIZE(tok as IToken) as IToken?
		size = int.parse(tok.text[3:])
		tok.value = size
		return tok

	def onUINT_SIZE(tok as IToken) as IToken?
		size = int.parse(tok.text[4:])
		tok.value = size
		return tok

	def onFLOAT_SIZE(tok as IToken) as IToken?
		size = int.parse(tok.text[5:])
		tok.value = size
		return tok

	def onCHAR_LIT_SINGLE(tok as IToken) as IToken?
		return _onCharLit(tok)

	def onCHAR_LIT_DOUBLE(tok as IToken) as IToken?
		return _onCharLit(tok)

	def _onCharLit(tok as IToken) as IToken?
		require tok.text.startsWith('c')
		s = tok.text[2:-1]
		assert s.length==1 or s.length==2
		tok.value = s
		return tok


	##
	## String substitution handling
	##

	def tokValueForString(s as String) as String
		"""
		Utility class for onSTRING_START|PART|STOP_SINGLE|DOUBLE.
		"""
		require
			s.length >= 2  # CC: #s[0] in [c'"', c"'"] #s[s.length-1] in [c'"', c"'"]
		body
			s = s.substring(1, s.length-2)
			chars = StringBuilder(s.length)
			last = c'\0'
			next as char?
			for c in s
				next = nil
				if last == c'\\'
					branch c
						on c'a', next = c'\a'
						on c'b', next = c'\b'
						on c'f', next = c'\f'
						on c'n', next = c'\n'
						on c'r', next = c'\r'
						on c't', next = c'\t'
						on c'v', next = c'\v'
						on c"'", next = c"'"
						on c'"', next = c'"'
						on c'?', next = c'?'
						on c'0', next = c'\0'
						on c'\\'
							chars.append(c'\\')
							# cannot have `last` being a backslash anymore--it's considered consumed now
							last = c'\0'
							continue
						else, next = c # TODO: should probably be error: Invalid escape sequence
				else if c <> '\\'
					next = c
				if next is not nil
					chars.append(next)
				last = c
			return chars.toString

	def onSTRING_START_SINGLE(tok as IToken) as IToken
		require not _inSubstStringSingle
		_inSubstStringSingle = true
		tok.value = .tokValueForString(tok.text)
		_tokenDefsByWhich['STRING_PART_SINGLE'].isActive = true
		_tokenDefsByWhich['STRING_STOP_SINGLE'].isActive = true
		_tokenDefsByWhich['STRING_PART_FORMAT'].isActive = true
		return tok

	def onSTRING_PART_SINGLE(tok as IToken) as IToken
		require _inSubstStringSingle
		tok.value = .tokValueForString(tok.text)
		return tok

	def onSTRING_STOP_SINGLE(tok as IToken) as IToken
		require _inSubstStringSingle
		_inSubstStringSingle = false
		tok.value = .tokValueForString(tok.text)
		_tokenDefsByWhich['STRING_PART_SINGLE'].isActive = false
		_tokenDefsByWhich['STRING_STOP_SINGLE'].isActive = false
		_tokenDefsByWhich['STRING_PART_FORMAT'].isActive = false
		return tok


	def onSTRING_START_DOUBLE(tok as IToken) as IToken
		require not _inSubstStringDouble
		_inSubstStringDouble = true
		tok.value = .tokValueForString(tok.text)
		_tokenDefsByWhich['STRING_PART_DOUBLE'].isActive = true
		_tokenDefsByWhich['STRING_STOP_DOUBLE'].isActive = true
		_tokenDefsByWhich['STRING_PART_FORMAT'].isActive = true
		return tok

	def onSTRING_PART_DOUBLE(tok as IToken) as IToken
		require _inSubstStringDouble
		tok.value = .tokValueForString(tok.text)
		return tok

	def onSTRING_STOP_DOUBLE(tok as IToken) as IToken
		require _inSubstStringDouble
		_inSubstStringDouble = false
		tok.value = .tokValueForString(tok.text)
		_tokenDefsByWhich['STRING_PART_DOUBLE'].isActive = false
		_tokenDefsByWhich['STRING_STOP_DOUBLE'].isActive = false
		_tokenDefsByWhich['STRING_PART_FORMAT'].isActive = false
		return tok


	def onLBRACKET(tok as IToken) as IToken
		if _inSubstStringSingle or _inSubstStringDouble
			_substLBracketCount += 1
			if _substLBracketCount==1
				_tokenDefsByWhich['RBRACKET_SPECIAL'].isActive = true
				assert _tokenDefsByWhich['STRING_PART_FORMAT'].isActive
				_tokenDefsByWhich['STRING_PART_FORMAT'].isActive = false
		return tok

	def onRBRACKET_SPECIAL(tok as IToken) as IToken
		require
			_inSubstStringSingle or _inSubstStringDouble
			_substLBracketCount
		body
			_substLBracketCount -= 1
			if _substLBracketCount == 0
				_tokenDefsByWhich['RBRACKET_SPECIAL'].isActive = false
				assert not _tokenDefsByWhich['STRING_PART_FORMAT'].isActive
				_tokenDefsByWhich['STRING_PART_FORMAT'].isActive = true
			tok.which = 'RBRACKET'  # tricky, tricky. the parser never sees an RBRACKET_SPECIAL
			return tok


	##
	## Doc Strings
	##

	def onDOC_STRING_START(tok as IToken) as IToken
		assert not _inDocString
		# narrow the tokenizer's token defs to a new shorter set
		# TODO: cache the tokens below
		t = List<of TokenDef>()
		t.add(TokenRegexDef('DOC_STRING_STOP', r'[ \t]*"""[ \t]*\n'))
		t.add(TokenRegexDef('DOC_STRING_BAD_STOP', r'[ \t]*"""(.+)\n'))
		t.add(TokenRegexDef('DOC_STRING_BODY_TEXT', '.*\n'))
		.pushTokenDefs(t)
		_inDocString = true
		return tok

	def onDOC_STRING_STOP(tok as IToken) as IToken
		assert _inDocString, tok
		_inDocString = false
		.popTokenDefs
		return tok

	def onDOC_STRING_BAD_STOP(tok as IToken) as IToken
		assert _inDocString, tok
		_inDocString = false
		.popTokenDefs
		.recordError('Incorrectly finished multi-line comment.')
		tok.which = 'DOC_STRING_STOP'
		return tok

	def onDOC_STRING_BODY_TEXT(tok as IToken) as IToken
		assert _inDocString, tok
		return tok

	def onDOC_STRING_LINE(tok as IToken) as IToken
		tok.value = tok.text.trim[3:-3].trim
		return tok

	##
	## Simple string literals
	##

	def onSTRING_RAW_SINGLE(tok as IToken) as IToken
		require tok.text.startsWith('r')
		tok.value = tok.text.substring(2, tok.text.length-3)
		tok.which = 'STRING_SINGLE'
		return tok

	def onSTRING_RAW_DOUBLE(tok as IToken) as IToken
		require tok.text.startsWith('r')
		tok.value = tok.text.substring(2, tok.text.length-3)
		tok.which = 'STRING_DOUBLE'
		return tok

	def onSTRING_NOSUB_SINGLE(tok as IToken) as IToken
		require tok.text.startsWith('ns')
		tok.value = .tokValueForString(tok.text.substring(2))
		tok.which = 'STRING_SINGLE'
		return tok

	def onSTRING_NOSUB_DOUBLE(tok as IToken) as IToken
		require tok.text.startsWith('ns')
		tok.value = .tokValueForString(tok.text.substring(2))
		tok.which = 'STRING_DOUBLE'
		return tok

	def onSTRING_SINGLE(tok as IToken) as IToken
		tok.value = .tokValueForString(tok.text)
		return tok

	def onSTRING_DOUBLE(tok as IToken) as IToken
		tok.value = .tokValueForString(tok.text)
		return tok


	##
	## Self util
	##

	var _directiveRE = Regex(r'#\s?\.([\w\-]+)\.($|\s)', RegexOptions.Compiled)

	def checkForCommentDirective(tok as IToken) as bool
		# check for .no-warnings.
		reMatch = _directiveRE.match(tok.text)
		if reMatch.success
			tok.which = 'DIRECTIVE'
			name = reMatch.groups[1].value
			branch name
				on 'no-warnings', .addNoWarning(tok)
				# for testify
				on 'args', pass  # TODO: actually this could be worth implementing outside of testify
				on 'compile-only', pass
				on 'error', pass
				on 'multi', pass
				on 'multipart', pass
				on 'require', pass
				on 'skip', pass
				on 'warning', pass
				on 'warning-lax', pass
				else, .throwError('Unrecognized compiler directive "[name]".')
			return true
		return false

	def commentToken(tok as IToken) as IToken?
		if .willReturnComments
			tok.which = 'COMMENT'
			return tok
		else
			return nil

	def directiveToken(tok as IToken) as IToken?
		return if(.willReturnDirectives, tok, nil)


###
### Custom Token Defs
###


class CommentBlockStartTokenDef inherits TokenDef
	"""
	^[ \t]*\/\#.*$
	"""

	test
		tokDef = CommentBlockStartTokenDef('')
		cases = [
			'/# ...',
			'/#',
			'/##',
			'/## /',
		]
		for case in cases
			assert tokDef.match(case)
			assert tokDef.match(' ' + case)
			assert tokDef.match(case + ' ')
			assert tokDef.match(case + '\n')
			assert tokDef.match(case + ' \n')
		cases = [
			'/ # ...',
		]
		for case in cases
			assert tokDef.match(case) is nil

	cue init(definition as String)
		base.init('COMMENT_BLOCK_START')
		_requiresBOL = true

	get length as int is override
		throw Exception('ordered token')
	
	get firstChars as List<of char> is override
		return [c'\t', c' ', c'/']
	
	def _match(input as String) as TokenMatch? is override
		state = 0
		for i in input.length
			c = input[i]
			branch state
				on 0  # [ \t]*
					if c == c' ' or c == c'\t', pass
					else if c == c'/', state = 1
					else, return nil
				on 1  # \#.*$
					if c == c'#', state = 2
					else, return nil
				on 2  # .*$
					if c == c'\n', return TokenMatch(input.substring(0, i))
		if state == 2, return TokenMatch(input)
		else, return nil


class IdTokenDef inherits TokenDef
	"""
	[A-Za-z_][A-Za-z0-9_]*
	"""

	test
		tokDef = IdTokenDef('OPEN_CALL', '')
		assert tokDef.match('x').text == 'x'
		assert tokDef.match('foo').text == 'foo'
		assert tokDef.match('Foo').text == 'Foo'
		assert tokDef.match('Foo ').text == 'Foo'
		assert tokDef.match('Foo aoeu').text == 'Foo'
		assert tokDef.match('1234') is nil
		assert tokDef.match('(') is nil

	cue init(which as String, definition as String)
		base.init(which)
	
	get length as int is override
		throw Exception('ordered token')
	
	get firstChars as List<of char> is override
		t = List<of char>()
		for c in 'ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz_', t.add(c)
		return t
	
	def _match(input as String) as TokenMatch? is override
		state = 0
		for i in input.length
			c = input[i]
			branch state
				on 0  # [A-Za-z_]
					if (c >= c'A' and c <= c'Z') or (c >= c'a' and c <= c'z') or (c == c'_'), state = 1
					else
						return nil
				on 1  # [A-Za-z0-9_]*
					if (c >= c'A' and c <= c'Z') or (c >= c'a' and c <= c'z') or (c == c'_') _
						or (c >= c'0' and c <= c'9'), pass
					else, return TokenMatch(input.substring(0, i))
		return TokenMatch(input.substring(0, i))


class IndentAllTokenDef inherits TokenDef
	"""
	r'INDENT_ALL_TABS		^[\t]+',
	r'INDENT_ALL_SPACES		^[ ]+',
	"""

	test
		tokDef = IndentAllTokenDef('INDENT_ALL_SPACES', c' ', '')
		cases = [' ', '  ', '    ', ' x', '   foo']
		for case in cases
			match = tokDef.match(case)
			assert match
			assert match.text.trim == ''
		assert not tokDef.match('x  ')

	var _ch as char
	
	cue init(which as String, ch as char, definition as String)
		base.init(which)
		_ch, _requiresBOL = ch, true

	get length as int is override
		throw Exception('ordered token')
	
	get firstChars as List<of char> is override
		return [_ch]

	def _match(input as String) as TokenMatch? is override
		ch = _ch
		for i in input.length
			if input[i] == ch, pass
			else, break
		if i > 0, return TokenMatch(.which, input.substring(0, i))
		else, return nil


class IndentMixedTokenDef inherits TokenDef
	"""
	1	r'INDENT_MIXED_TSNS		^[\t]+[ ]+(?=[^\t ])',
	2	r'INDENT_MIXED_TS		^[\t]+[ ]+',
	"""

	test
		tokDef = IndentMixedTokenDef(1, '')
		cases = [
			'\t x',
			'\t\t foo',
			'\t   bar',
		]
		for case in cases
			match = tokDef.match(case)
			assert match, case
			assert match.which == 'INDENT_MIXED_TSNS'
			assert match.text.trim == ''
		tokDef = IndentMixedTokenDef(2, '')
		cases = [
			'\t x',
			'\t\t foo',
			'\t   bar',
		]
		for case in cases
			match = tokDef.match(case)
			assert match
			assert match.which == 'INDENT_MIXED_TS'
			assert match.text.trim == ''

	var _num as int

	cue init(num as int, definition as String)
		require num in [1, 2]
		base.init('INDENT_MIXED_'+if(num==1, 'TSNS', 'TS'))
		_num, _requiresBOL = num, true

	get length as int is override
		throw Exception('ordered token')
	
	get firstChars as List<of char> is override
		return [c'\t']
	
	def _match(input as String) as TokenMatch? is override
		state = 0
		for i in input.length
			c = input[i]
			branch state
				on 0  # [\t]+
					if c <> c'\t', return nil
					else, state = 2
				on 2  # [\t]+[ ]+
					if c == c'\t', pass
					else if c == c' ', state = 3
					else, return nil
				on 3  # [ ]+
					if c == c' '
						pass
					else
						if _num == 1  # (?=[^\t ])
							if c <> c'\t' and c <> c' '
								return TokenMatch(.which, input.substring(0, i))
							else
								return nil
						else
							return TokenMatch(.which, input.substring(0, i))
		return nil


class OpenComboTokenDef inherits TokenDef
	"""
	Combines these which appear sequentially in the token list:
	
		OpenGenericTokenDef('OPEN_GENERIC', r'[A-Za-z_][A-Za-z0-9_]*<of([ \n\r\t]|(?=[,>]))'),
		r'OPEN_DO				s	do(',
		r'OPEN_IF				s	if(',
		OpenCallTokenDef('OPEN_CALL', r'[A-Za-z_][A-Za-z0-9_]*\('),
	"""

	test
		tokDef = OpenComboTokenDef()
		# OPEN_GENERIC
		cases = ['Foo<of ', 'Foo<of>', 'Foo<of,>']
		for case in cases
			match = tokDef.match(case)
			assert match and match.which == 'OPEN_GENERIC' and match.text.trim.endsWith('<of')
		# OPEN_DO
		match = tokDef.match('do(')
		assert match and match.which == 'OPEN_DO'
		# OPEN_IF
		match = tokDef.match('if(')
		assert match and match.which == 'OPEN_IF'
		# OPEN_CALL
		cases = ['x(', 'foo(', 'Foo(']
		for case in cases
			match = tokDef.match(case)
			assert match and match.which == 'OPEN_CALL'
		# negative cases
		cases = ['foo', 'Foo<off ', '<of', 'Foo<fo', '<', '(foo', '(']
		for case in cases, assert tokDef.match(case) is nil
	
	cue init
		base.init('OpenComboTokenDef')
	
	get length as int is override
		throw Exception('ordered token')
	
	get firstChars as List<of char> is override
		t = List<of char>()
		for c in 'ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz_', t.add(c)
		return t

	shared
	
		var _spaces = @[c' ', c'\n', c'\r', c'\t']

	def _match(input as String) as TokenMatch? is override
		# time savings here included:
		#  1. eliminating use of StringBuilder() and instead returning input.substring(0, i+1)
		#  2. switch from: c in 'ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz_'
		#              to: (c >= c'A' and c <= c'Z') or (c >= c'a' and c <= c'z') or (c == c'_')
		state = 0
		for i in input.length
			c = input[i]
			branch state
				on 0  # [A-Za-z_]
					if (c >= c'A' and c <= c'Z') or (c >= c'a' and c <= c'z') or (c == c'_'), state = 1
					else, return nil
				on 1  # [A-Za-z0-9_]*
					if (c >= c'A' and c <= c'Z') or (c >= c'a' and c <= c'z') or (c == c'_') _
						or (c >= c'0' and c <= c'9'), pass
					else if c == c'<', state = 2
					else if c == c'('
						text = input.substring(0, i+1)
						if text == 'do(', return TokenMatch('OPEN_DO', text)
						else if text == 'if(', return TokenMatch('OPEN_IF', text)
						else, return TokenMatch('OPEN_CALL', text)
					else, return nil
				on 2  # o - <of
					if c == c'o', state = 3
					else, return nil
				on 3  # f - <of
					if c == c'f', state = 4
					else, return nil
				on 4  # ([ \n\r\t]|(?=[,>]))'
					if c in _spaces
						return TokenMatch('OPEN_GENERIC', input.substring(0, i+1))
					else if c == c',' or c == c'>' or i == input.length-1
						# positive lookahead - (?=[,>])
						# so don't add to sb
						return TokenMatch('OPEN_GENERIC', input.substring(0, i))
					else
						return nil
		return nil


class SingleLineCommentBlockTokenDef inherits TokenDef
	"""
	^[ \t]*\/\#.*\#\/[ \t]*$
	"""

	test
		tokDef = SingleLineCommentBlockTokenDef('')
		cases = [
			'/# ... #/',
			'/##/',
			'/######/',
		]
		for case in cases
			assert tokDef.match(case)
			assert tokDef.match(' ' + case)
			assert tokDef.match(case + ' ')
			assert tokDef.match(case + '\n')
			assert tokDef.match(case + ' \n')
		cases = [
			'/# ...',
			'/ # ... ',
			'/# ... # /',
			'/# ...\n...#/',
		]
		for case in cases
			assert tokDef.match(case) is nil

	cue init(definition as String)
		base.init('SINGLE_LINE_COMMENT_BLOCK')
		_requiresBOL = true

	get length as int is override
		throw Exception('ordered token')
	
	get firstChars as List<of char> is override
		return [c'\t', c' ', c'/']
	
	def _match(input as String) as TokenMatch? is override
		state, lenM1 = 0, input.length - 1
		for i in input.length
			c = input[i]
			branch state
				on 0  # [ \t]*
					if c == c' ' or c == c'\t', pass
					else if c == c'/', state = 1
					else, return nil
				on 1  # \#.*\#\/[ \t]*$
					if c == c'#', state = 2
					else, return nil
				on 2  # .*\#\/[ \t]*$
					if c == c'\n', return nil
					else if c == c'#' and i < lenM1 and input[i+1] == c'/', state = 3
				on 3  # \/[ \t]*$
					assert c == c'/'
					state = 4
				on 4
					if c == c' ' or c == c'\t', pass
					else if c == c'\n', return TokenMatch(input.substring(0, i+1))
					else, return nil
		if state == 2, return nil
		assert state == 3 or state == 4
		return TokenMatch(input)


class SpaceTokenDef inherits TokenDef
	
	test
		# .timeIt
		pass
		
	def timeIt is shared
		input1, input2 = '\t\tx = 5', '# foo'
		reps = 10_000_000

		re = Regex(r'[ \t]+', RegexOptions.Compiled)
		sw = System.Diagnostics.Stopwatch()
		sw.start
		for i in reps
			re.match(input1)
			re.match(input2)
		sw.stop
		timeRE = sw.elapsedMilliseconds

		td = SpaceTokenDef('SPACE', r'[ \t]+')
		sw = System.Diagnostics.Stopwatch()
		sw.start
		for i in reps
			td.match(input1)
			td.match(input2)
		sw.stop
		timeTD = sw.elapsedMilliseconds
		
		ratio = timeRE / timeTD
		trace timeRE, timeTD, ratio

		# trace: timeRE=6875 (Int64); timeTD=462 (Int64); ratio=14.88 (Decimal);
		# so at least for SpaceTokenDef, its .match is more than 14 X faster than a compiled regex!

	cue init(which as String, definition as String)
		base.init(which)
		assert definition == r'[ \t]+'
	
	get length as int is override
		throw Exception('ordered token')

	get firstChars as List<of char> is override
		return [c' ', c'\t']

	def _match(input as String) as TokenMatch? is override
		for i in input.length
			c = input[i]
			if c <> c' ' and c <> c'\t', break
		return if(i==0, nil, TokenMatch(input.substring(0, i)))


class StringLiteralTokenDef inherits TokenDef
	"""
	STRING_SINGLE	'(?:\\.?|[^'\n])*'
	STRING_DOUBLE	"(?:\\.?|[^"\n])*"
	"
	"""

	test			
		cases = [
			r'foo',
			r'',
			r'foo\n',
			r'foo\nbar',
			r'...',
		]
		for delimeter in ['"', "'"]
			tokDef = StringLiteralTokenDef('STRING_FOO', delimeter[0])
			for case in cases
				if case.length, assert tokDef.match(case) is nil  # no delimeters have been added to the case yet
				case = delimeter + case + delimeter
				match = tokDef.match(case)
				assert match, {'case': case, 'tokDef': tokDef}
				assert match.text.startsWith(delimeter) and match.text.endsWith(delimeter)
				assert match.text.length == case.length

	cue init(which as String, delimiter as char)
		require delimiter in [c'"', c"'"]
		base.init(which)
		_delimiter = delimiter
	
	get delimiter from var as char

	def _innerToString as String is override
		return base._innerToString + ', [.delimiter.toTechString]'

	get length as int is override
		throw Exception('ordered token')

	get firstChars as List<of char> is override
		return [_delimiter]
	
	def _match(input as String) as TokenMatch? is override
		state = 0
		for i in input.length
			c = input[i]
			branch state
				on 0
					if c == _delimiter, state = 1
					else, return nil
				on 1  # inside string
					if c == _delimiter, return TokenMatch(input.substring(0, i+1))
					else if c == c'\\', state = 2
					else if c <> '\n' and c <> _delimiter, pass
					else, return nil
				on 2  # \x
					state = 1
		return nil


class AbstractStringThingTokenDef inherits TokenDef is abstract
	"""
	Regexes were:
		r"STRING_START_SINGLE	'[^'\n\[]*\[",
		r"STRING_PART_SINGLE	\][^'\n\[]*\[",
		r"STRING_STOP_SINGLE	\][^'\n\[]*'",

		r'STRING_START_DOUBLE	"[^"\n\[]*\[',
		r'STRING_PART_DOUBLE	\][^"\n\[]*\[',
		r'STRING_STOP_DOUBLE	\][^"\n\[]*"',

	But there is also the need to escape left brackets which turned out difficult to express in
	regexes. I did manage to do it, but then the regexes became *extremely* slow.
	"""

	var _quote as char
	
	cue init(which as String, quote as char)
		base.init(which)
		_quote = quote

	get length as int is override
		throw Exception('ordered token')

	def _match(input as String) as TokenMatch? is override
		s = _match2(input)
		if s, return TokenMatch(s)
		else, return nil

	def _match2(input as String) as String? is abstract

	def _matchBetween(input as String, startch as char, stopch as char, breakch as char) as String?
		"""
		Matches between a range of characters.
		"""
		if input[0] <> startch, return nil
		sb = StringBuilder(input[0].toString)
		isEscaped = false
		for i in 1 : input.length
			c = input[i]
			if c == breakch and not isEscaped, return nil
			if c == c'\n', return nil
			sb.append(c)
			if isEscaped
				isEscaped = false
			else
				if c == stopch, return sb.toString
				if c == c'\\', isEscaped = true
		return nil
		
	def toString as String is override
		return '[.getType.name]([CobraCore.toTechString(_quote)])'


class StringStartTokenDef inherits AbstractStringThingTokenDef
	
	cue init(which as String, quote as char)
		base.init(which, quote)

	get firstChars as List<of char> is override
		return [_quote]

	def _match2(input as String) as String? is override
		test
			x = StringStartTokenDef('foo', c"\'")
			# normal:
			assert x._match2('aoeu') is nil
			assert x._match2(r"'foo [bar]") == r"'foo ["
			# odd:
			assert x._match2(r"'foo[") == r"'foo["
			# escaped:
			assert x._match2(r"'foo\[ foo[") == r"'foo\[ foo["
			assert x._match2(r"'foo\\[foo]") == r"'foo\\["
			assert x._match2(r"'foo\\\[ foo[") == r"'foo\\\[ foo["
			# not this token def:
			assert x._match2(r"]foo [bar]") is nil
			assert x._match2(r"]foo[") is nil
			assert x._match2(r"]foo[") is nil
			assert x._match2(r"]foo' + ") is nil
			assert x._match2(r"]foo ' bah blah") is nil
			assert x._match2(r"]foo\[ foo'") is nil
		body
			return _matchBetween(input, _quote, c'[', _quote)


class StringPartTokenDef inherits AbstractStringThingTokenDef
	
	cue init(which as String, quote as char)
		base.init(which, quote)

	get firstChars as List<of char> is override
		return [c']']

	def _match2(input as String) as String? is override
		test
			x = StringPartTokenDef('foo', c"\'")
			# normal:
			assert x._match2('aoeu') is nil
			assert x._match2(r"]foo [bar]") == r"]foo ["
			# odd:
			assert x._match2(r"]foo[") == r"]foo["
			# escaped:
			assert x._match2(r"]foo\[ foo[") == r"]foo\[ foo["
			assert x._match2(r"]foo\\[foo]") == r"]foo\\["
			assert x._match2(r"]foo\\\[ foo[") == r"]foo\\\[ foo["
			# not this token def:
			assert x._match2(r"'foo [bar]") is nil
			assert x._match2(r"'foo[") is nil
			assert x._match2(r"'foo\[ foo[") is nil
			assert x._match2(r"]foo' + ") is nil
			assert x._match2(r"]foo ' bah blah") is nil
			assert x._match2(r"]foo\[ foo'") is nil
		body
			return _matchBetween(input, c']', c'[', _quote)


class StringStopTokenDef inherits AbstractStringThingTokenDef
	
	cue init(which as String, quote as char)
		base.init(which, quote)

	get firstChars as List<of char> is override
		return [c']']

	def _match2(input as String) as String? is override
		test
			x = StringStopTokenDef('foo', c"\'")
			# normal:
			assert x._match2('aoeu') is nil
			assert x._match2(r"]foo' + ") == r"]foo'"
			assert x._match2(r"] '") == r"] '"
			# odd:
			assert x._match2(r"]foo ' bah blah") == r"]foo '"
			# escaped:
			assert x._match2(r"]foo\[ foo'") == r"]foo\[ foo'"
			assert x._match2(r"]foo\\\[ foo'") == r"]foo\\\[ foo'"
			# not this token def:
			assert x._match2(r"'foo [bar]") is nil
			assert x._match2(r"'foo[") is nil
			assert x._match2(r"'foo\[ foo[") is nil
			assert x._match2(r"]foo[") is nil
			assert x._match2(r"'foo [bar]") is nil
			assert x._match2(r"'foo[") is nil
			assert x._match2(r"]foo\\[foo]") is nil
		body
			return _matchBetween(input, c']', _quote, c'[' )


class WhiteSpaceCommentTokenDef inherits TokenDef
	"""
	r'WHITESPACE_COMMENT_1	^[\t]+[ ]*\#.*$',
	r'WHITESPACE_COMMENT_2	^[ ]+[\t]*\#.*$',
	to-do: collapse this into one tokendef rather than a '1' and a '2'. set the TokenMatch .which
	"""
	
	test
		tokDefs = [WhiteSpaceCommentTokenDef(1, ''), WhiteSpaceCommentTokenDef(2, '')]
		cases = [
			[1, '\t\t#foo'],
			[1, '\t\t  #foo'],
			[2, '  #foo'],
			[2, '  \t\t#foo'],
		]
		for num, case in cases
			assert tokDefs[num-1].match(case)
		assert tokDefs[0].match('\t\tfoo') is nil
		assert tokDefs[0].match('\t\t \tfoo') is nil

	var _num as int

	cue init(num as int, definition as String)
		require num in [1, 2]
		base.init('WHITESPACE_COMMENT_'+num.toString)
		_num, _requiresBOL = num, true

	get length as int is override
		throw Exception('ordered token')
	
	get firstChars as List<of char> is override
		return [c'\t', c' ']

	def _match(input as String) as TokenMatch? is override
		state = if(_num==1, 1, 4)
		for i in input.length
			c = input[i]
			branch state
				# _num == 1
				on 1  # [\t]+
					if c <> c'\t', return nil
					else, state = 2
				on 2  # [\t]+\#
					if c == c'\t', pass
					else if c == c'#', state = 8
					else if c == c' ', state = 3
					else, return nil
				on 3  # [ ]*\#
					if c == c' ', pass
					else if c == c'#', state = 8
					else, return nil

				# _num == 2
				on 4  # [ ]+
					if c <> c' ', return nil
					else, state = 5
				on 5  # [ ]+\#
					if c == c' ', pass
					else if c == c'#', state = 8
					else if c == c'\t', state = 6
					else, return nil
				on 6  # [\t]*\#
					if c == c'\t', pass
					else if c == c'#', state = 8
					else, return nil
				
				# either num
				on 8  # .*$
					if c == c'\n', return TokenMatch(input.substring(0, i))
		if state == 8, return TokenMatch(input)
		else, return nil


class WhiteSpaceLineTokenDef inherits TokenDef
	"""
	^[\t ]+$
	"""

	test
		tokDef = WhiteSpaceLineTokenDef('')
		cases = [
			' ', '  ', '\t  ',
			'\t', '\t\t', ' \t\t'
		]
		for case in cases
			assert tokDef.match(case)
			assert tokDef.match(case+'\n')
		assert tokDef.match('    foo') is nil
		assert tokDef.match('foo') is nil

	cue init(definition as String)
		base.init('WHITESPACE_LINE')
		_requiresBOL = true
	
	get length as int is override
		throw Exception('ordered token')
	
	get firstChars as List<of char> is override
		return [c'\t', c' ']
	
	def _match(input as String) as TokenMatch? is override
		for i in input.length
			c = input[i]
			# [\t ]+
			if c == c'\t' or c == c' '
				pass
			else if c == '\n'
				return TokenMatch(input.substring(0, i))
			else
				return nil
		return TokenMatch(input.substring(0, i))