@@ -27,9 +27,9 @@ let string s =
27
27
while ! i < String. length s - 1 do
28
28
let c = if s.[! i] <> '\\' then s.[! i] else
29
29
match (incr i; s.[! i]) with
30
- | 'n' -> '\n '
31
- | 'r' -> '\r '
32
- | 't' -> '\t '
30
+ | 'n' -> '\x0a '
31
+ | 'r' -> '\x0d '
32
+ | 't' -> '\x09 '
33
33
| '\\' -> '\\'
34
34
| '\' ' -> '\' '
35
35
| '\"' -> '\"'
@@ -61,10 +61,12 @@ let letter = ['a'-'z''A'-'Z']
61
61
let symbol =
62
62
['+''-''*''/''\\''^''~''=''<''>''!''?''@''#''$''%''&''|'':''`''.''\' ']
63
63
64
- let space = [' ''\t''\n''\r' ]
64
+ let ascii_newline = ['\x0a''\x0d' ]
65
+ let newline = ascii_newline | " \x0a\x0d "
66
+ let space = [' ''\x09''\x0a''\x0d' ]
65
67
let control = ['\x00' - '\x1f' ] # space
66
68
let ascii = ['\x00' - '\x7f' ]
67
- let ascii_no_nl = ascii # '\x0a'
69
+ let ascii_no_nl = ascii # ascii_newline
68
70
let utf8cont = ['\x80' - '\xbf' ]
69
71
let utf8enc =
70
72
['\xc2' - '\xdf' ] utf8cont
@@ -127,8 +129,8 @@ rule token = parse
127
129
| float as s { FLOAT s }
128
130
129
131
| string as s { STRING (string s) }
130
- | '"' character* ('\n' | eof) { error lexbuf " unclosed string literal" }
131
- | '"' character* [ '\x00' - '\x09''\x0b' - '\x1f''\x7f' ]
132
+ | '"' character* (newline | eof) { error lexbuf " unclosed string literal" }
133
+ | '"' character* (control#ascii_newline)
132
134
{ error lexbuf " illegal control character in string literal" }
133
135
| '"' character* '\\' _
134
136
{ error_nest (Lexing. lexeme_end_p lexbuf) lexbuf " illegal escape" }
@@ -698,11 +700,11 @@ rule token = parse
698
700
| id as s { VAR s }
699
701
700
702
| " ;;" utf8_no_nl* eof { EOF }
701
- | " ;;" utf8_no_nl* '\n' { Lexing. new_line lexbuf; token lexbuf }
703
+ | " ;;" utf8_no_nl* newline { Lexing. new_line lexbuf; token lexbuf }
702
704
| " ;;" utf8_no_nl* { token lexbuf (* causes error on following position *) }
703
705
| " (;" { comment (Lexing. lexeme_start_p lexbuf) lexbuf; token lexbuf }
704
- | space#'\n' { token lexbuf }
705
- | '\n' { Lexing. new_line lexbuf; token lexbuf }
706
+ | space#ascii_newline { token lexbuf }
707
+ | newline { Lexing. new_line lexbuf; token lexbuf }
706
708
| eof { EOF }
707
709
708
710
| reserved { unknown lexbuf }
@@ -713,7 +715,7 @@ rule token = parse
713
715
and comment start = parse
714
716
| " ;)" { () }
715
717
| " (;" { comment (Lexing. lexeme_start_p lexbuf) lexbuf; comment start lexbuf }
716
- | '\n' { Lexing. new_line lexbuf; comment start lexbuf }
718
+ | newline { Lexing. new_line lexbuf; comment start lexbuf }
717
719
| utf8_no_nl { comment start lexbuf }
718
720
| eof { error_nest start lexbuf " unclosed comment" }
719
721
| _ { error lexbuf " malformed UTF-8 encoding" }
0 commit comments