Ticket #120: bigraw.patch
File bigraw.patch, 15.3 KB (added by hopscc, 15 years ago) |
---|
-
Source/CobraTokenizer.cobra
54 54 var _inSubstStringDouble = false 55 55 var _inDocString = false 56 56 var _inCommentBlock = 0 57 var _inBigRawString = false 58 var _bigRawIndent = -1 59 var _isBigRawBounded = false 57 60 58 61 cue init 59 62 base.init … … 70 73 sb.append('_inSubstStringDouble=[_inSubstStringDouble], ') 71 74 sb.append('_inDocString=[_inDocString]') 72 75 sb.append('_inCommentBlock=[_inCommentBlock]') 76 sb.append('_inBigRawString=[_inBigRawString]') 73 77 74 78 pro willReturnComments from var as bool 75 79 … … 136 140 r"STRING_RAW_SINGLE r'(?:\\.?|[^'\n])*'", 137 141 r'STRING_RAW_DOUBLE r"(?:\\.?|[^"\n])*"', 138 142 143 # multiline raw strings 144 r"STRING_BIGRAW_SINGLE_ML R'(?:\\.?|[^'\n])*\n", 145 r'STRING_BIGRAW_DOUBLE_ML R"(?:\\.?|[^"\n])*\n', 146 147 r"STRING_BIGRAW_SINGLE R'(?:\\.?|[^'\n])*'", 148 r'STRING_BIGRAW_DOUBLE R"(?:\\.?|[^"\n])*"', 149 139 150 # substituted strings 140 151 r'RBRACKET_SPECIAL ]', 141 r"STRING_START_SINGLE '", # see "def make _STRING_FOO_BAR"152 r"STRING_START_SINGLE '", # see "def makeSTRING_FOO_BAR" 142 153 r"STRING_PART_SINGLE '", 143 154 r"STRING_STOP_SINGLE '", 144 155 … … 797 808 t.add(TokenRegexDef('DOC_STRING_BODY_TEXT', '.*\n')) 798 809 .pushTokenDefs(t) 799 810 _inDocString = true 811 tok.value = tok.text.trim[3:].trim 800 812 return tok 801 813 802 814 def onDOC_STRING_STOP(tok as IToken) as IToken … … 841 853 tok.which = 'STRING_DOUBLE' 842 854 return tok 843 855 856 def onSTRING_BIGRAW_SINGLE_ML(tok as IToken) as IToken 857 return .doSTRING_BIGRAW_START(tok, 'SINGLE') 858 859 def onSTRING_BIGRAW_DOUBLE_ML(tok as IToken) as IToken 860 return .doSTRING_BIGRAW_START(tok, 'DOUBLE') 861 862 def doSTRING_BIGRAW_START(tok as IToken, which as String ) as IToken 863 require tok.text.startsWith('R') 864 assert not _inBigRawString 865 bigRawStopPtn = r"(?:\\.?|[^'\n])*'" 866 if which =='DOUBLE' 867 bigRawStopPtn = r'(?:\\.?|[^"\n])*"' 868 # narrow the tokenizer's token defs to a new shorter set 869 # TODO: cache the tokens below 870 t = List<of TokenDef>() 871 #t.add(TokenRegexDef('STRING_BIGRAW_STOP', r"(?:\\'?|[^'\n])*'")) 872 t.add(TokenRegexDef('STRING_BIGRAW_STOP', bigRawStopPtn)) 873 t.add(TokenRegexDef('STRING_BIGRAW_TEXT', '.*\n')) 874 .pushTokenDefs(t) 875 _inBigRawString = true 876 tok.value = tok.text[2:] 877 tok.which = 'STRING_BIGRAW_TEXT' 878 if tok.value == '+|\n' 879 _isBigRawBounded = true 880 tok.value = '' 881 return tok 882 883 def onSTRING_BIGRAW_STOP(tok as IToken) as IToken 884 assert _inBigRawString, tok 885 text = tok.text[:-1] 886 if _bigRawIndent == -1 # first line after leadin of bigRaw String 887 _bigRawIndent = _calcBigRawIndent(text) 888 tok.value = _bigRawFixup(text, _bigRawIndent, _isBigRawBounded) 889 _inBigRawString = false 890 _isBigRawBounded = false 891 _bigRawIndent = -1 892 .popTokenDefs 893 return tok 894 895 def onSTRING_BIGRAW_TEXT(tok as IToken) as IToken 896 assert _inBigRawString, tok 897 if _bigRawIndent == -1 # first full line after leadin of bigRaw String 898 _bigRawIndent = _calcBigRawIndent(tok.text) 899 tok.value = _bigRawFixup(tok.text, _bigRawIndent, _isBigRawBounded) 900 return tok 901 902 def onSTRING_BIGRAW_SINGLE(tok as IToken) as IToken 903 require tok.text.startsWith('R') 904 tok.value = tok.text[2:-1] 905 tok.which = 'STRING_BIGRAW_LINE' 906 return tok 907 908 def onSTRING_BIGRAW_DOUBLE(tok as IToken) as IToken 909 require tok.text.startsWith('R') 910 tok.value = tok.text[2:-1] 911 tok.which = 'STRING_BIGRAW_LINE' 912 return tok 913 914 def _calcBigRawIndent(firstLine as String) as int 915 """ 916 Return a value for expected indentation level on BigRaw strings. 917 Will be 918 Current IndentLevel+1 if given string (assumed as first line after first \n) 919 has sufficient whitespace to be trimmed, 920 0 otherwise. 921 Effect is to clear indentlevel if first line isnt sufficiently indented 922 """ 923 curlvl = (_indentCount + 1) * 4 924 for i in 0 : firstLine.length 925 c = firstLine[i] 926 if c == ' ', curlvl -= 1 927 else if c == '\t', curlvl -= 4 928 else # less wspace on first line than assumed indent level 929 return 0 # set indentlevel so no wspace pruning 930 break 931 if curlvl <= 0, break 932 return _indentCount + 1 # attempt LHS wspace pruning to this level 933 934 # much code commonality with method above but purpose different 935 def _bigRawFixup(s as String, indentLevel as int, maybeBounded as bool) as String 936 """ 937 Return string with whitespace trimmed to bound edge characters or 938 on LHS to indentLevel or to first non whitespace. 939 """ 940 if maybeBounded 941 nl = '\n' #Environment.newLine 942 hasEOL = s.endsWith('\n') 943 probe = s.trim 944 if probe[0] == '|' 945 s = s.trimStart(nil) to ! 946 s = s[1:] 947 if probe.endsWith('|') 948 s = s.trimEnd(nil) to ! 949 s = s[:-1] + if(hasEOL, nl, '') 950 return s 951 952 # This does the assumed indent level indentation removal 953 if indentLevel <= 0 954 return s 955 curlvl = indentLevel * 4 956 for i in 0 : s.length 957 c = s[i] 958 if c == ' ', curlvl -= 1 959 else if c == '\t', curlvl -= 4 960 else 961 break 962 if curlvl <= 0 963 i += 1 964 break 965 return s[i:] 966 844 967 def onSTRING_SINGLE(tok as IToken) as IToken 845 968 tok.value = .tokValueForString(tok.text) 846 969 return tok -
Source/CobraParser.cobra
2782 2782 return StringLit(.grab) 2783 2783 else if peek=='STRING_DOUBLE' 2784 2784 return StringLit(.grab) 2785 else if peek == 'STRING_BIGRAW_TEXT' or peek == 'STRING_BIGRAW_LINE' 2786 return .stringBigRaw 2785 2787 else if peek=='INTEGER_LIT' 2786 2788 return IntegerLit(.grab) 2787 2789 else if peek=='DECIMAL_LIT' … … 3274 3276 .throwError('Expecting more string contents or the end of string after the bracketed expression.') 3275 3277 return StringSubstLit(items) 3276 3278 3279 def stringBigRaw as StringLit 3280 token = .expect('STRING_BIGRAW_LINE', 'STRING_BIGRAW_TEXT') 3281 if token.which == 'STRING_BIGRAW_LINE' 3282 return StringLit(token) 3283 3284 sb = StringBuilder(token.value to String) 3285 sbt = StringBuilder(token.text) 3286 while true 3287 next = .grab 3288 if next.which == 'STRING_BIGRAW_TEXT' 3289 sb.append(next.value) 3290 sbt.append(next.text) 3291 else if next.which == 'STRING_BIGRAW_STOP' 3292 sb.append(next.value) 3293 sbt.append(next.text) 3294 break 3295 else 3296 if _verbosity>=4 3297 print '<> stringBigRaw, value=[sb.toString]' 3298 .throwError('Expecting either another bigRaw string line or end of bigRaw string but got "[next.which]".') 3299 token.text = sbt.toString 3300 token.value = sb.toString 3301 return StringLit(token) 3302 3277 3303 def typeExpr as TypeExpr 3278 3304 return TypeExpr(.typeId) 3279 3305 -
Tests/100-basics/064-2-string-bigraw-bounded.cobra
1 class BigRawTst2 2 """ 3 Test of bigRaw string handling - '|' bounding 4 5 If first 3 chars of bigRaw string are '+|\n' then those 3 chars are removed. 6 Any indentation before a leading '|' and the leading and any trailing '|' are removed 7 from each line of the string 8 this allows visual formatting of display text in code 9 Lines without leading or trailing '|' have no adjustments done to them at all 10 """ 11 def main is shared 12 # alignment bars 13 a = R'+| 14 | hello |' 15 assert a == ' hello ' 16 17 a = R'+| 18 | hello2' 19 assert a == ' hello2' 20 21 a = R'+| 22 hello3 |' 23 assert a == '\t\thello3 ' 24 25 a = R'+| 26 | |' 27 assert a == ' ' 28 29 a = R'+| 30 ||' 31 assert a == '' 32 33 a = R'+| 34 |||' 35 assert a == '|' 36 37 a = R'+| 38 | hello | 39 ||' 40 assert a == ' hello \n' 41 42 # trailing spaces after '|' removed 43 a = R'+| 44 | hello | 45 || ' 46 assert a == ' hello \n' 47 48 a = R'+| 49 | 0 | 50 | say | 51 | kan | 52 | U | 53 | C |' 54 assert a == ' 0 \n say \n kan \n U \n C ' 55 56 # No indent removal if no bars 57 b = R'+| 58 xxx 59 yyy' 60 assert b == '\t\t\txxx\n\t\t\tyyy' 61 62 c = R'+| 63 |align left | 64 | centered text | 65 | align right|' 66 assert c == 'align left \n centered text \n align right' 67 68 69 # " delimited 70 71 a = R"+| 72 | hello | 73 |.|" 74 assert a == ' hello \n.' 75 76 b = R"+| 77 xxx 78 yyy" 79 assert b == '\t\t\txxx\n\t\t\tyyy' 80 81 a = R"+| 82 hello3 | " 83 assert a == '\t\thello3 ' 84 85 a = R"+| 86 |||" 87 assert a == '|' 88 89 a = R" 90 ||| 91 " 92 assert a == '\n|||\n' 93 -
Tests/100-basics/064-2-string-bigraw.cobra
1 class BigRaw 2 """ 3 Test of bigRaw string handling 4 5 BigRaw string is multiline 'raw' string, No substitutions ([expr]), 6 and no escaped char (\n) processing 7 All other standard control chars left in string as is. (WYDSIWYG) 8 9 Convenience: 10 If the first line of the bigRaw string after the one starting the bigRaw string 11 ( R'.....\n) starts one indent level in from the line containing the R'....\n then 12 indentation to that level on all lines with sufficient indentation is removed. 13 This allows alignment of lines for display viewability without losing code 14 formatting 15 i.e 16 x=R'x 17 y 18 z' 19 is same string as 20 x=R'x 21 y 22 z' 23 which is same as 'x\ny\nz' 24 25 If the first 3 chars of bigRaw string are '+|\n' then those 3 chars are removed and 26 on subsequent lines any whitespace before a leading '|' and the leading '|' 27 and any trailing '|' and whitespace are removed from each line of the bigRaw string. 28 Lines without leading or trailing '|' have no adjustments done to them at all 29 This allows visual formatting of display text in code with the bars acting as 30 a visual representation of the edges of the displayed text. 31 e.g. 32 x = R'+| 33 |align left | 34 | centered text | 35 | align right|' 36 37 assert x == 'align left \n centered text \n align right' 38 """ 39 40 def main is shared 41 42 .testRaw # same tests as (little) raw strings 43 .testEscapes 44 .testIndentRemoval 45 .testNoIndentRemoval 46 .testMisc 47 .testDouble 48 49 def testRaw is shared 50 s as String = '' 51 52 s = R'aoeu' 53 assert s.length==4 54 assert s[3]==c'u' 55 56 s = R'\'' 57 assert s.length==2 58 assert s[0]==c'\\' 59 assert s[1]==c"'" 60 61 s = R'\\' # lexing fail w/o trailing ' but r'//' OK 62 assert s.length==2 63 assert s[0]==c'\\' 64 assert s[1]==c'\\' 65 66 s = R'[' 67 assert s.length==1 68 assert s[0]==c'[' 69 70 s = R'[]' 71 assert s.length==2 72 assert s[0]==c'[' 73 assert s[1]==c']' 74 75 assert R'\"' == r"\"" 76 assert R'cobra\language' == "cobra\\language" 77 assert R'[]' == r"[]" 78 assert R'[]' == r'[]' 79 80 def testEscapesOld is shared 81 # escaped single quote becomes quote 82 assert R'ab\n\'c' == r"ab\n'c" 83 assert R'xx\'' == "xx'" 84 assert R'\'xx' == "'xx" 85 86 assert R'\n' == "\\n" 87 assert R'hello\' 88 noddy' == "hello\\'\nnoddy" 89 90 def testEscapes is shared 91 # escaped single quote becomes quote 92 assert R'ab\n\'c' == r"ab\n\'c" 93 assert R'xx\'' == "xx\\'" 94 assert R'\'xx' == "\\'xx" 95 96 assert R'\n' == "\\n" 97 assert R'hello\' 98 noddy' == "hello\\'\nnoddy" 99 100 assert R'hello\' 101 noddy' == "hello\\'\nnoddy" 102 103 104 def testIndentRemoval is shared 105 a = R'aaa 106 bbb 107 cc' 108 assert a == 'aaa\nbbb\ncc' 109 110 a = R'aaa\n 111 bbb 112 ..cc' 113 assert a == 'aaa\\n\nbbb\n..cc' 114 115 assert R'hello 116 noddy' == "hello\nnoddy" 117 118 # first line one indent level in, remove indentation 119 b = R'{ 120 "profile_color":"9ae4e8", 121 "url":null, 122 "statuses_count":1, 123 "protected":false}' 124 assert b.startsWith('{\n"profile_color"') 125 lines = b.splitLines 126 assert lines[0] == '{' 127 assert lines[1] =='"profile_color":"9ae4e8",' 128 assert lines[2] =='"url":null,' 129 assert lines[3] == '"statuses_count":1,' 130 assert lines[4] == '"protected":false}' 131 132 # first line > one indent level in, remove indentation upto current 133 b = R'{ 134 "color":"9ae4e8", 135 "status_count":1, 136 "prot":false}' 137 assert b.startsWith('{\n\t"color"') 138 lines = b.splitLines 139 assert lines[0] == '{' 140 assert lines[1] =='\t"color":"9ae4e8",' 141 assert lines[2] == '\t"status_count":1,' 142 assert lines[3] == '\t"prot":false}' 143 144 assert R' 145 ' == '\n' 146 147 148 def testNoIndentRemoval is shared 149 # indent removal suppresssed 150 assert R'hello 151 no noddy' == 'hello\n\t\tno noddy' 152 153 # same with spaces not tabs 154 assert R'hello 155 no noddy' == 'hello\n no noddy' 156 157 # first line not at one indent level in, leaves indentation as displayed 158 c = R'{ 159 "profile_color":"9ae4e8", 160 "description":null, 161 "utc_offset":null, 162 "text_color":"000000", 163 "followers_count":0, 164 "following":null}' 165 assert c.startsWith('{\n\t\t"profile_color"') 166 lines = c.splitLines 167 assert lines[2] == '\t\t"description":null,' 168 assert lines[3] == '\t\t"utc_offset":null,' 169 assert lines[4] == '\t\t"text_color":"000000",' 170 assert lines[5] == '\t\t"followers_count":0,' 171 assert lines[6] == '\t\t"following":null}' 172 173 174 # mixed spaces and tabs, no indentlevel trim 175 assert R' 176 xyzzy 177 foo 178 bar 179 bat: 180 cat 181 mat ' == '\n\txyzzy\n foo\n bar\n\tbat:\n\t\tcat\n\t mat ' 182 183 def testMisc is shared 184 # Chk auto indent removal at default indent same as slam left 185 assert R'x 186 y 187 z' == R'x 188 y 189 z' 190 191 def testDouble is shared 192 # selected tests above with "" delimiters 193 s = R"\"" 194 assert s == r'\"' 195 196 s = R"\\" # weird - fail lexing w/o another " trailing 197 assert s == r'\\' 198 199 s = R"\\ 200 " 201 assert s == '\\\\\n' 202 203 s = R"[" 204 assert s.length==1 205 assert s[0]==c'[' 206 207 s = R"[]" 208 assert s == r'[]' 209 210 assert R"\n\"\c" == r'\n\"\c' 211 212 a = R"aaa 213 bbb 214 cc" 215 assert a == "aaa\nbbb\ncc" 216 217 assert R"hello 218 no noddy" == 'hello\n\t\tno noddy' 219 -
Developer/IntermediateReleaseNotes.text
48 48 49 49 * Sets with duplicate members such as `{1, 0, 1}` now generate a warning. 50 50 51 * added big raw string support (multiline raw strings ) Ticket 142 52 e.g. 53 s = R'line1 54 line2' 55 assert s == 'line1\nline2' 56 51 57 ================================================================================ 52 58 Library 53 59 ================================================================================