From 6c606f1c11c9c33b32181adfe93adb03ae3384f2 Mon Sep 17 00:00:00 2001 From: Brian Ward Date: Mon, 31 Mar 2025 15:07:25 -0400 Subject: [PATCH 1/5] Fix identifier lexing of := --- src/ocaml/preprocess/lexer_ident.mll | 2 +- tests/test-dirs/locate/issue1915.t | 14 ++++++++++---- 2 files changed, 11 insertions(+), 5 deletions(-) diff --git a/src/ocaml/preprocess/lexer_ident.mll b/src/ocaml/preprocess/lexer_ident.mll index e9690dbd27..bfbe8adcfe 100644 --- a/src/ocaml/preprocess/lexer_ident.mll +++ b/src/ocaml/preprocess/lexer_ident.mll @@ -94,6 +94,7 @@ rule token = parse | "(" { LPAREN } | ")" { RPAREN } | "." { DOT } + | ":=" { COLONEQUAL } | "!" symbolchar + { PREFIXOP(Lexing.lexeme lexbuf) } | ['~' '?'] symbolchar + @@ -147,7 +148,6 @@ rule token = parse | ".." | ":" | "::" - | ":=" | ":>" | ";" | ";;" diff --git a/tests/test-dirs/locate/issue1915.t b/tests/test-dirs/locate/issue1915.t index f5481290fa..cf36ac9d10 100644 --- a/tests/test-dirs/locate/issue1915.t +++ b/tests/test-dirs/locate/issue1915.t @@ -7,12 +7,18 @@ Testing the behavior of custom operators > EOF $ $MERLIN single locate -look-for ml -position 2:17 \ - > -filename ./main.ml < ./main.ml | jq '.value' - "Not a valid identifier" + > -filename ./main.ml < ./main.ml | jq '.value.pos' + { + "line": 1, + "col": 4 + } $ $MERLIN single locate -look-for ml -position 3:12 \ - > -filename ./main.ml < ./main.ml | jq '.value' - "Not a valid identifier" + > -filename ./main.ml < ./main.ml | jq '.value.pos' + { + "line": 1, + "col": 4 + } Testing custom indexing operators From 5fa38576b9fabde92618a0f54ff1ea55c653b193 Mon Sep 17 00:00:00 2001 From: Brian Ward Date: Mon, 31 Mar 2025 15:56:28 -0400 Subject: [PATCH 2/5] Heuristic for custom indexing --- src/kernel/mreader_lexer.ml | 11 ++++++ src/ocaml/preprocess/lexer_ident.mll | 2 ++ tests/test-dirs/locate/issue1915.t | 52 +++++++++++++++++++++------- 3 files changed, 52 insertions(+), 13 deletions(-) diff --git a/src/kernel/mreader_lexer.ml b/src/kernel/mreader_lexer.ml index 28d77d2599..b346d42b97 100644 --- a/src/kernel/mreader_lexer.ml +++ b/src/kernel/mreader_lexer.ml @@ -123,6 +123,12 @@ let comments t = open Parser_raw +let pair_bracket = function + | '{' -> Some '}' + | '(' -> Some ')' + | '[' -> Some ']' + | _ -> None + let is_operator = function | PREFIXOP s | LETOP s @@ -148,6 +154,11 @@ let is_operator = function | AMPERAMPER -> Some "&&" | COLONEQUAL -> Some ":=" | PLUSEQ -> Some "+=" + | DOTOP s -> ( + let last = String.get s (String.length s - 1) in + match pair_bracket last with + | Some pair -> Some (s ^ String.make 1 pair) + | None -> None) | _ -> None (* [reconstruct_identifier] is impossible to read at the moment, here is a diff --git a/src/ocaml/preprocess/lexer_ident.mll b/src/ocaml/preprocess/lexer_ident.mll index bfbe8adcfe..495bc65d4e 100644 --- a/src/ocaml/preprocess/lexer_ident.mll +++ b/src/ocaml/preprocess/lexer_ident.mll @@ -93,6 +93,8 @@ rule token = parse | "'" { QUOTE } | "(" { LPAREN } | ")" { RPAREN } + | "." dotsymbolchar+ ['(' '{' '[' ] + { DOTOP(Lexing.lexeme lexbuf) } | "." { DOT } | ":=" { COLONEQUAL } | "!" symbolchar + diff --git a/tests/test-dirs/locate/issue1915.t b/tests/test-dirs/locate/issue1915.t index cf36ac9d10..cdeb2004ca 100644 --- a/tests/test-dirs/locate/issue1915.t +++ b/tests/test-dirs/locate/issue1915.t @@ -29,36 +29,62 @@ Testing custom indexing operators > let () = name.%{2;4} > let () = name.%{5} > let () = ( .%{ } ) name 3 + > let () = ( .%{;..} ) name 7 > EOF $ $MERLIN single locate -look-for ml -position 4:15 \ - > -filename ./main.ml < ./main.ml | jq '.value' - "Not in environment '%'" + > -filename ./main.ml < ./main.ml | jq '.value.pos' + { + "line": 2, + "col": 4 + } $ $MERLIN single locate -look-for ml -position 4:16 \ - > -filename ./main.ml < ./main.ml | jq '.value' - "Not a valid identifier" + > -filename ./main.ml < ./main.ml | jq '.value.pos' + { + "line": 2, + "col": 4 + } $ $MERLIN single locate -look-for ml -position 5:15 \ - > -filename ./main.ml < ./main.ml | jq '.value' - "Not in environment '%'" + > -filename ./main.ml < ./main.ml | jq '.value.pos' + { + "line": 2, + "col": 4 + } $ $MERLIN single locate -look-for ml -position 5:15 \ - > -filename ./main.ml < ./main.ml | jq '.value' - "Not in environment '%'" + > -filename ./main.ml < ./main.ml | jq '.value.pos' + { + "line": 2, + "col": 4 + } $ $MERLIN single locate -look-for ml -position 5:16 \ - > -filename ./main.ml < ./main.ml | jq '.value' - "Not a valid identifier" + > -filename ./main.ml < ./main.ml | jq '.value.pos' + { + "line": 2, + "col": 4 + } $ $MERLIN single locate -look-for ml -position 6:13 \ - > -filename ./main.ml < ./main.ml | jq '.value' - "Not in environment '%'" + > -filename ./main.ml < ./main.ml | jq '.value.pos' + { + "line": 2, + "col": 4 + } $ $MERLIN single locate -look-for ml -position 6:14 \ + > -filename ./main.ml < ./main.ml | jq '.value.pos' + { + "line": 2, + "col": 4 + } + + $ $MERLIN single locate -look-for ml -position 6:15 \ > -filename ./main.ml < ./main.ml | jq '.value' "Not a valid identifier" - $ $MERLIN single locate -look-for ml -position 6:15 \ + $ $MERLIN single locate -look-for ml -position 7:15 \ > -filename ./main.ml < ./main.ml | jq '.value' "Not a valid identifier" From 4e24f0a392acd2db8ba05dccb35de954a9661a4d Mon Sep 17 00:00:00 2001 From: Brian Ward Date: Mon, 31 Mar 2025 16:06:12 -0400 Subject: [PATCH 3/5] Exact parsing of reified custom indexing in reconstruct_identifier_from_tokens --- src/kernel/mreader_lexer.ml | 88 +++++++++++++++++-- src/ocaml/preprocess/lexer_ident.mll | 8 +- .../locate/context-detection/cd-test.t/run.t | 8 +- tests/test-dirs/locate/issue1915.t | 36 +++++--- 4 files changed, 117 insertions(+), 23 deletions(-) diff --git a/src/kernel/mreader_lexer.ml b/src/kernel/mreader_lexer.ml index b346d42b97..e43e5f91fc 100644 --- a/src/kernel/mreader_lexer.ml +++ b/src/kernel/mreader_lexer.ml @@ -124,9 +124,9 @@ let comments t = open Parser_raw let pair_bracket = function - | '{' -> Some '}' - | '(' -> Some ')' - | '[' -> Some ']' + | '{' -> Some RBRACE + | '(' -> Some RPAREN + | '[' -> Some RBRACKET | _ -> None let is_operator = function @@ -157,8 +157,17 @@ let is_operator = function | DOTOP s -> ( let last = String.get s (String.length s - 1) in match pair_bracket last with - | Some pair -> Some (s ^ String.make 1 pair) - | None -> None) + | Some pair -> + (* note: this is a heuristic which ignores the difference between + the following three operators: + [.%( )] + [.%(;..)] + [.%(;..)<-] + It will always return the first one. Now, typically, if one + is defined, all are, with the same semantics, but this is + still unfortunate. *) + Some (s ^ Parser_printer.print_token pair) + | None -> Some s) | _ -> None (* [reconstruct_identifier] is impossible to read at the moment, here is a @@ -244,6 +253,75 @@ let reconstruct_identifier_from_tokens tokens pos = (* LIDENT always begin a new identifier *) | ((LIDENT _, _, _) as item) :: items -> if acc = [] then look_for_dot [ item ] items else check acc (item :: items) + (* Reified custom indexing *) + (* e.g. [( .%(;..) )] *) + | (RPAREN, _, _) + :: (token, _, tend) + :: (DOTDOT, _, _) + :: (SEMI, _, _) + :: (DOTOP s, tstart, _) + :: (LPAREN, _, _) + :: items + when acc = [] -> ( + let last = String.get s (String.length s - 1) in + match pair_bracket last with + | Some pair when pair = token -> + let item = + (DOTOP (s ^ ";.." ^ Parser_printer.print_token pair), tstart, tend) + in + look_for_dot [ item ] items + | _ -> check acc items + (* e.g. [( .%(;..)<- )] *)) + | (RPAREN, _, _) + :: (LESSMINUS, _, tend) + :: (token, _, _) + :: (DOTDOT, _, _) + :: (SEMI, _, _) + :: (DOTOP s, tstart, _) + :: (LPAREN, _, _) + :: items + when acc = [] -> ( + let last = String.get s (String.length s - 1) in + match pair_bracket last with + | Some pair when pair = token -> + let item = + ( DOTOP (s ^ ";.." ^ Parser_printer.print_token pair ^ "<-"), + tstart, + tend ) + in + look_for_dot [ item ] items + | _ -> check acc items + (* e.g. [( .%( ) )] *)) + | (RPAREN, _, _) + :: (token, _, tend) + :: (DOTOP s, tstart, _) + :: (LPAREN, _, _) + :: items + when acc = [] -> ( + let last = String.get s (String.length s - 1) in + match pair_bracket last with + | Some pair when pair = token -> + let item = + (DOTOP (s ^ Parser_printer.print_token pair), tstart, tend) + in + look_for_dot [ item ] items + | _ -> check acc items + (* e.g. [( .%( )<- )] *)) + | (RPAREN, _, _) + :: (LESSMINUS, _, tend) + :: (token, _, _) + :: (DOTOP s, tstart, _) + :: (LPAREN, _, _) + :: items + when acc = [] -> ( + let last = String.get s (String.length s - 1) in + match pair_bracket last with + | Some pair when pair = token -> + let item = + (DOTOP (s ^ Parser_printer.print_token pair ^ "<-"), tstart, tend) + in + look_for_dot [ item ] items + | _ -> check acc items) (* Reified operators behave like LIDENT *) | (RPAREN, _, _) :: ((token, _, _) as item) :: (LPAREN, _, _) :: items when is_operator token <> None && acc = [] -> look_for_dot [ item ] items diff --git a/src/ocaml/preprocess/lexer_ident.mll b/src/ocaml/preprocess/lexer_ident.mll index 495bc65d4e..d10f3791fa 100644 --- a/src/ocaml/preprocess/lexer_ident.mll +++ b/src/ocaml/preprocess/lexer_ident.mll @@ -93,6 +93,11 @@ rule token = parse | "'" { QUOTE } | "(" { LPAREN } | ")" { RPAREN } + | "}" { RBRACE } + | "]" { RBRACKET } + | ".." { DOTDOT } + | "<-" { LESSMINUS } + | ";" { SEMI } | "." dotsymbolchar+ ['(' '{' '[' ] { DOTOP(Lexing.lexeme lexbuf) } | "." { DOT } @@ -147,11 +152,9 @@ rule token = parse | "*" | "," | "->" - | ".." | ":" | "::" | ":>" - | ";" | ";;" | "<" | "<-" @@ -176,7 +179,6 @@ rule token = parse | "[@@" | "[@@@" | "!" - | "!=" | "+" | "+." diff --git a/tests/test-dirs/locate/context-detection/cd-test.t/run.t b/tests/test-dirs/locate/context-detection/cd-test.t/run.t index 867e37d011..c2640c5806 100644 --- a/tests/test-dirs/locate/context-detection/cd-test.t/run.t +++ b/tests/test-dirs/locate/context-detection/cd-test.t/run.t @@ -79,7 +79,13 @@ FIXME we failed to parse/reconstruct the ident, that's interesting $ $MERLIN single locate -look-for ml -position 16:16 -filename ./test.ml < ./test.ml { "class": "return", - "value": "Not a valid identifier", + "value": { + "file": "$TESTCASE_ROOT/test.ml", + "pos": { + "line": 13, + "col": 11 + } + }, "notifications": [] } diff --git a/tests/test-dirs/locate/issue1915.t b/tests/test-dirs/locate/issue1915.t index cdeb2004ca..4e50781bef 100644 --- a/tests/test-dirs/locate/issue1915.t +++ b/tests/test-dirs/locate/issue1915.t @@ -28,10 +28,11 @@ Testing custom indexing operators > let name = "baz" > let () = name.%{2;4} > let () = name.%{5} - > let () = ( .%{ } ) name 3 > let () = ( .%{;..} ) name 7 + > let () = ( .%{ } ) name 3 > EOF +Should be on line 1 $ $MERLIN single locate -look-for ml -position 4:15 \ > -filename ./main.ml < ./main.ml | jq '.value.pos' { @@ -46,45 +47,52 @@ Testing custom indexing operators "col": 4 } - $ $MERLIN single locate -look-for ml -position 5:15 \ + $ $MERLIN single locate -look-for ml -position 6:13 \ > -filename ./main.ml < ./main.ml | jq '.value.pos' { - "line": 2, + "line": 1, "col": 4 } - $ $MERLIN single locate -look-for ml -position 5:15 \ + $ $MERLIN single locate -look-for ml -position 6:14 \ > -filename ./main.ml < ./main.ml | jq '.value.pos' { - "line": 2, + "line": 1, "col": 4 } - $ $MERLIN single locate -look-for ml -position 5:16 \ + $ $MERLIN single locate -look-for ml -position 6:15 \ > -filename ./main.ml < ./main.ml | jq '.value.pos' { - "line": 2, + "line": 1, "col": 4 } - $ $MERLIN single locate -look-for ml -position 6:13 \ +Should be on line 2 + $ $MERLIN single locate -look-for ml -position 5:15 \ > -filename ./main.ml < ./main.ml | jq '.value.pos' { "line": 2, "col": 4 } - $ $MERLIN single locate -look-for ml -position 6:14 \ + $ $MERLIN single locate -look-for ml -position 5:15 \ > -filename ./main.ml < ./main.ml | jq '.value.pos' { "line": 2, "col": 4 } - $ $MERLIN single locate -look-for ml -position 6:15 \ - > -filename ./main.ml < ./main.ml | jq '.value' - "Not a valid identifier" + $ $MERLIN single locate -look-for ml -position 5:16 \ + > -filename ./main.ml < ./main.ml | jq '.value.pos' + { + "line": 2, + "col": 4 + } $ $MERLIN single locate -look-for ml -position 7:15 \ - > -filename ./main.ml < ./main.ml | jq '.value' - "Not a valid identifier" + > -filename ./main.ml < ./main.ml | jq '.value.pos' + { + "line": 2, + "col": 4 + } From 009906faa1d6b70cfede58a5966076d1fb1e0f03 Mon Sep 17 00:00:00 2001 From: Brian Ward Date: Mon, 31 Mar 2025 16:52:51 -0400 Subject: [PATCH 4/5] Improve comments --- src/kernel/mreader_lexer.ml | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/src/kernel/mreader_lexer.ml b/src/kernel/mreader_lexer.ml index e43e5f91fc..44571a2aac 100644 --- a/src/kernel/mreader_lexer.ml +++ b/src/kernel/mreader_lexer.ml @@ -253,7 +253,7 @@ let reconstruct_identifier_from_tokens tokens pos = (* LIDENT always begin a new identifier *) | ((LIDENT _, _, _) as item) :: items -> if acc = [] then look_for_dot [ item ] items else check acc (item :: items) - (* Reified custom indexing *) + (* Reified custom indexing operators *) (* e.g. [( .%(;..) )] *) | (RPAREN, _, _) :: (token, _, tend) @@ -270,8 +270,8 @@ let reconstruct_identifier_from_tokens tokens pos = (DOTOP (s ^ ";.." ^ Parser_printer.print_token pair), tstart, tend) in look_for_dot [ item ] items - | _ -> check acc items - (* e.g. [( .%(;..)<- )] *)) + | _ -> check acc items) + (* e.g. [( .%(;..)<- )] *) | (RPAREN, _, _) :: (LESSMINUS, _, tend) :: (token, _, _) @@ -290,8 +290,8 @@ let reconstruct_identifier_from_tokens tokens pos = tend ) in look_for_dot [ item ] items - | _ -> check acc items - (* e.g. [( .%( ) )] *)) + | _ -> check acc items) + (* e.g. [( .%( ) )] *) | (RPAREN, _, _) :: (token, _, tend) :: (DOTOP s, tstart, _) @@ -305,8 +305,8 @@ let reconstruct_identifier_from_tokens tokens pos = (DOTOP (s ^ Parser_printer.print_token pair), tstart, tend) in look_for_dot [ item ] items - | _ -> check acc items - (* e.g. [( .%( )<- )] *)) + | _ -> check acc items) + (* e.g. [( .%( )<- )] *) | (RPAREN, _, _) :: (LESSMINUS, _, tend) :: (token, _, _) From 9f3168c8e54fc8175b89194bd5d6e6d95ffeab5e Mon Sep 17 00:00:00 2001 From: Brian Ward Date: Wed, 9 Jul 2025 15:28:46 -0400 Subject: [PATCH 5/5] Remove FIXME for resolved test --- tests/test-dirs/locate/context-detection/cd-test.t/run.t | 2 -- 1 file changed, 2 deletions(-) diff --git a/tests/test-dirs/locate/context-detection/cd-test.t/run.t b/tests/test-dirs/locate/context-detection/cd-test.t/run.t index c2640c5806..e5fb445e8a 100644 --- a/tests/test-dirs/locate/context-detection/cd-test.t/run.t +++ b/tests/test-dirs/locate/context-detection/cd-test.t/run.t @@ -74,8 +74,6 @@ This should say "Already at definition point" (we're defining the label): "notifications": [] } -FIXME we failed to parse/reconstruct the ident, that's interesting - $ $MERLIN single locate -look-for ml -position 16:16 -filename ./test.ml < ./test.ml { "class": "return",