Skip to content

Commit 43d405f

Browse files
authored
[spec/interpreter/test] Align definition of newline with Unicode recommendation (#1684)
1 parent b39baf7 commit 43d405f

File tree

5 files changed

+21
-16
lines changed

5 files changed

+21
-16
lines changed

document/core/text/lexical.rst

Lines changed: 6 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -85,7 +85,9 @@ The allowed formatting characters correspond to a subset of the |ASCII|_ *format
8585
\production{white space} & \Tspace &::=&
8686
(\text{~~} ~|~ \Tformat ~|~ \Tcomment)^\ast \\
8787
\production{format} & \Tformat &::=&
88-
\unicode{09} ~|~ \unicode{0A} ~|~ \unicode{0D} \\
88+
\Tnewline ~|~ \unicode{09} \\
89+
\production{newline} & \Tnewline &::=&
90+
\unicode{0A} ~|~ \unicode{0D} ~|~ \unicode{0D}~\unicode{0A} \\
8991
\end{array}
9092
9193
The only relevance of white space is to separate :ref:`tokens <text-token>`. It is otherwise ignored.
@@ -107,13 +109,13 @@ Block comments can be nested.
107109
\production{comment} & \Tcomment &::=&
108110
\Tlinecomment ~|~ \Tblockcomment \\
109111
\production{line comment} & \Tlinecomment &::=&
110-
\Tcommentd~~\Tlinechar^\ast~~(\unicode{0A} ~|~ \T{eof}) \\
112+
\Tcommentd~~\Tlinechar^\ast~~(\Tnewline ~|~ \T{eof}) \\
111113
\production{line character} & \Tlinechar &::=&
112-
c{:}\Tchar & (\iff c \neq \unicode{0A}) \\
114+
c{:}\Tchar & (\iff c \neq \unicode{0A} \land c \neq \unicode{0D}) \\
113115
\production{block comment} & \Tblockcomment &::=&
114116
\Tcommentl~~\Tblockchar^\ast~~\Tcommentr \\
115117
\production{block character} & \Tblockchar &::=&
116-
c{:}\Tchar & (\iff c \neq \text{;} \wedge c \neq \text{(}) \\ &&|&
118+
c{:}\Tchar & (\iff c \neq \text{;} \land c \neq \text{(}) \\ &&|&
117119
\text{;} & (\iff~\mbox{the next character is not}~\text{)}) \\ &&|&
118120
\text{(} & (\iff~\mbox{the next character is not}~\text{;}) \\ &&|&
119121
\Tblockcomment \\

document/core/util/macros.def

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -700,6 +700,7 @@
700700
.. |Tchar| mathdef:: \xref{text/lexical}{text-char}{\T{char}}
701701
.. |Tspace| mathdef:: \xref{text/lexical}{text-space}{\T{space}}
702702
.. |Tformat| mathdef:: \xref{text/lexical}{text-format}{\T{format}}
703+
.. |Tnewline| mathdef:: \xref{text/lexical}{text-newline}{\T{newline}}
703704

704705
.. |Ttoken| mathdef:: \xref{text/lexical}{text-token}{\T{token}}
705706
.. |Tkeyword| mathdef:: \xref{text/lexical}{text-keyword}{\T{keyword}}

interpreter/Makefile

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -33,7 +33,7 @@ zip: $(ZIP)
3333

3434
# Building
3535

36-
.PHONY: $(NAME) $(JSLIB)
36+
.PHONY: $(NAME) $(JSLIB)
3737

3838
$(NAME):
3939
rm -f $@

interpreter/text/lexer.mll

Lines changed: 13 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -27,9 +27,9 @@ let string s =
2727
while !i < String.length s - 1 do
2828
let c = if s.[!i] <> '\\' then s.[!i] else
2929
match (incr i; s.[!i]) with
30-
| 'n' -> '\n'
31-
| 'r' -> '\r'
32-
| 't' -> '\t'
30+
| 'n' -> '\x0a'
31+
| 'r' -> '\x0d'
32+
| 't' -> '\x09'
3333
| '\\' -> '\\'
3434
| '\'' -> '\''
3535
| '\"' -> '\"'
@@ -61,10 +61,12 @@ let letter = ['a'-'z''A'-'Z']
6161
let symbol =
6262
['+''-''*''/''\\''^''~''=''<''>''!''?''@''#''$''%''&''|'':''`''.''\'']
6363

64-
let space = [' ''\t''\n''\r']
64+
let ascii_newline = ['\x0a''\x0d']
65+
let newline = ascii_newline | "\x0a\x0d"
66+
let space = [' ''\x09''\x0a''\x0d']
6567
let control = ['\x00'-'\x1f'] # space
6668
let ascii = ['\x00'-'\x7f']
67-
let ascii_no_nl = ascii # '\x0a'
69+
let ascii_no_nl = ascii # ascii_newline
6870
let utf8cont = ['\x80'-'\xbf']
6971
let utf8enc =
7072
['\xc2'-'\xdf'] utf8cont
@@ -127,8 +129,8 @@ rule token = parse
127129
| float as s { FLOAT s }
128130

129131
| string as s { STRING (string s) }
130-
| '"'character*('\n'|eof) { error lexbuf "unclosed string literal" }
131-
| '"'character*['\x00'-'\x09''\x0b'-'\x1f''\x7f']
132+
| '"'character*(newline|eof) { error lexbuf "unclosed string literal" }
133+
| '"'character*(control#ascii_newline)
132134
{ error lexbuf "illegal control character in string literal" }
133135
| '"'character*'\\'_
134136
{ error_nest (Lexing.lexeme_end_p lexbuf) lexbuf "illegal escape" }
@@ -698,11 +700,11 @@ rule token = parse
698700
| id as s { VAR s }
699701

700702
| ";;"utf8_no_nl*eof { EOF }
701-
| ";;"utf8_no_nl*'\n' { Lexing.new_line lexbuf; token lexbuf }
703+
| ";;"utf8_no_nl*newline { Lexing.new_line lexbuf; token lexbuf }
702704
| ";;"utf8_no_nl* { token lexbuf (* causes error on following position *) }
703705
| "(;" { comment (Lexing.lexeme_start_p lexbuf) lexbuf; token lexbuf }
704-
| space#'\n' { token lexbuf }
705-
| '\n' { Lexing.new_line lexbuf; token lexbuf }
706+
| space#ascii_newline { token lexbuf }
707+
| newline { Lexing.new_line lexbuf; token lexbuf }
706708
| eof { EOF }
707709

708710
| reserved { unknown lexbuf }
@@ -713,7 +715,7 @@ rule token = parse
713715
and comment start = parse
714716
| ";)" { () }
715717
| "(;" { comment (Lexing.lexeme_start_p lexbuf) lexbuf; comment start lexbuf }
716-
| '\n' { Lexing.new_line lexbuf; comment start lexbuf }
718+
| newline { Lexing.new_line lexbuf; comment start lexbuf }
717719
| utf8_no_nl { comment start lexbuf }
718720
| eof { error_nest start lexbuf "unclosed comment" }
719721
| _ { error lexbuf "malformed UTF-8 encoding" }

test/core/comments.wast

544 Bytes
Binary file not shown.

0 commit comments

Comments
 (0)