diff --git a/library/init/lean/parser/reader/basic.lean b/library/init/lean/parser/reader/basic.lean index b2ec3356a8..97c4d96505 100644 --- a/library/init/lean/parser/reader/basic.lean +++ b/library/init/lean/parser/reader/basic.lean @@ -61,9 +61,11 @@ structure reader_state := (tokens : list token_config) -- note: stored in reverse for efficient append (errors : list lean.message) - -def reader_state.empty : reader_state := -⟨[], []⟩ +/- Start position of the current token. This might not be equal to the parser + position for two reasons: + * We plan to eagerly parse leading whitespace so as not to do so multiple times + * During error recovery, skipped input should be associated to the next token -/ +(token_start : string.iterator) structure reader_config := mk @@ -84,17 +86,14 @@ instance : monad_except (parsec.message syntax) read_m := infer_instance protected def run (cfg : reader_config) (st : reader_state) (s : string) (r : read_m syntax) : syntax × list message := -match (((r.run (monad_parsec.error "no recursive parser at top level")).run cfg).run st).parse_with_left_over s with -| except.ok ((a, st), it) := - let errors := - if it.remaining = 0 then st.errors - else to_string { parsec.message . expected := dlist.singleton "end of input", it := it, custom := () } :: st.errors in - (a, errors.reverse) -| except.error msg := (msg.custom, [to_string msg]) -end read_m +match (((r.run (monad_parsec.error "no recursive parser at top level")).run cfg).run st).parse_with_eoi s with +| except.ok (a, st) := (a, st.errors.reverse) +| except.error msg := (msg.custom, [to_string msg]) def log_error (e : message) : read_m unit := modify (λ st, {st with errors := to_string e :: st.errors}) +end read_m + namespace reader open monad_parsec @@ -102,7 +101,18 @@ protected def parse (cfg : reader_config) (s : string) (r : reader) : syntax × list message := -- the only hardcoded tokens, because they are never directly mentioned by a `reader` let tokens : list token_config := [⟨"/-", none⟩, ⟨"--", none⟩] in -r.read.run cfg ⟨r.tokens ++ tokens, []⟩ s +do { + stx ← catch r.read $ λ (msg : parsec.message _), read_m.log_error (to_string msg) *> pure msg.custom, + whitespace, + -- add `eoi` node and store any residual input in its prefix + catch eoi $ λ msg, read_m.log_error (to_string msg), + tk_start ← reader_state.token_start <$> get, + let stop := tk_start.to_end in + pure $ syntax.node ⟨name.anonymous, [ + stx, + syntax.node ⟨`eoi, [syntax.atom ⟨some ⟨⟨tk_start, stop⟩, stop.offset, ⟨stop, stop⟩⟩, atomic_val.string ""⟩]⟩ + ]⟩ +}.run cfg ⟨r.tokens ++ tokens, [], s.mk_iterator⟩ s namespace combinators def node' (m : name) (rs : list reader) : reader := diff --git a/library/init/lean/parser/reader/module.lean b/library/init/lean/parser/reader/module.lean index eaade947a1..916a27938e 100644 --- a/library/init/lean/parser/reader/module.lean +++ b/library/init/lean/parser/reader/module.lean @@ -118,14 +118,19 @@ private def commands_aux : bool → list syntax → nat → read_m syntax -- unknown command: try to skip token, or else single character when (¬ recovering) $ do { it ← left_over, - log_error $ to_string { parsec.message . expected := dlist.singleton "command", it := it, custom := () } + read_m.log_error $ to_string { parsec.message . expected := dlist.singleton "command", it := it, custom := () } }, - catch (token *> pure ()) (λ _, any *> pure ()), + tk_start ← reader_state.token_start <$> get, + -- since the output of the following parser is never captured in a syntax tree... + try (token *> pure ()) <|> (any *> pure ()), + -- ...restore `token_start` after it + modify $ λ st, {st with token_start := tk_start}, pure (tt, none) }) $ λ msg, do { -- error inside command: advance input to error position, log error, return partial syntax tree set_iterator msg.it, - modify (λ st, {st with errors := to_string msg :: st.errors}), + modify $ λ st, {st with token_start := msg.it}, + read_m.log_error (to_string msg), pure (tt, some msg.custom) }, commands_aux recovering (c.to_monad++cs) n diff --git a/library/init/lean/parser/reader/token.lean b/library/init/lean/parser/reader/token.lean index 14a833eea6..3fbd7c22b1 100644 --- a/library/init/lean/parser/reader/token.lean +++ b/library/init/lean/parser/reader/token.lean @@ -62,13 +62,16 @@ do start ← left_over, pure ⟨start, stop⟩ def with_source_info {α : Type} (r : read_m α) : read_m (α × source_info) := -do leading ← whitespace, - p ← pos, +do token_start ← reader_state.token_start <$> get, + whitespace, + it ← left_over, a ← r, -- TODO(Sebastian): less greedy, more natural whitespace assignement -- E.g. only read up to the next line break trailing ← whitespace, - pure (a, ⟨leading, p, trailing⟩) + it2 ← left_over, + modify $ λ st, {st with token_start := it2}, + pure (a, ⟨⟨token_start, it⟩, it.offset, trailing⟩) /-- Match a string literally without consulting the token table. -/ def raw_symbol (sym : string) : reader := diff --git a/tests/lean/reader1.lean.expected.out b/tests/lean/reader1.lean.expected.out index d1d5b83e3a..e1425cfde6 100644 --- a/tests/lean/reader1.lean.expected.out +++ b/tests/lean/reader1.lean.expected.out @@ -1,77 +1,83 @@ result: -(module [(prelude "prelude")] [] []) +[(module [(prelude "prelude")] [] []) (eoi "")] result: -(module [] [(import "import" [(import_path [] me)])] []) +[(module [] [(import "import" [(import_path [] me)])] []) (eoi "")] error at line 1, column 0: expected command partial syntax tree: -(module [] [] []) +[(module [] [] []) (eoi "")] result: -(module - [(prelude "prelude")] - [(import "import" [(import_path ["." "."] a) (import_path [] b)]) - (import "import" [(import_path [] c)])] - []) +[(module + [(prelude "prelude")] + [(import "import" [(import_path ["." "."] a) (import_path [] b)]) + (import "import" [(import_path [] c)])] + []) + (eoi "")] result: -(module [] [] [(open "open" [[me [] [] [] []] [you [] [] [] []]])]) +[(module [] [] [(open "open" [[me [] [] [] []] [you [] [] [] []]])]) (eoi "")] result: -(module - [] - [] - [(open - "open" - [[me - [["as" you]] - [[["(" a] [b c] ")"]] - [[["(" "renaming"] [[a "->" b] [c "->" d]] ")"]] - [["(" "hiding" [a b] ")"]]]])]) +[(module + [] + [] + [(open + "open" + [[me + [["as" you]] + [[["(" a] [b c] ")"]] + [[["(" "renaming"] [[a "->" b] [c "->" d]] ")"]] + [["(" "hiding" [a b] ")"]]]])]) + (eoi "")] error at line 1, column 8: expected command partial syntax tree: -(module [] [] [(open "open" [[me [] [] [] []]])]) +[(module [] [] [(open "open" [[me [] [] [] []]])]) (eoi "")] error at line 1, column 5: expected identifier error at line 1, column 9: expected identifier partial syntax tree: -(module [] [] [(open "open" [[[]]]) (open "open" [[[]]])]) +[(module [] [] [(open "open" [[[]]]) (open "open" [[[]]])]) (eoi "")] error at line 1, column 8: expected command partial syntax tree: -(module - [] - [] - [(open "open" [[me [] [] [] []]]) (open "open" [[you [] [] [] []]])]) +[(module + [] + [] + [(open "open" [[me [] [] [] []]]) (open "open" [[you [] [] [] []]])]) + (eoi "")] result: -(module - [] - [] - [(open "open" [[a [] [] [] []]]) - (section - "section" - [b] - [(open "open" [[c [] [] [] []]]) - (section "section" [d] [(open "open" [[e [] [] [] []]])] "end" [d])] - "end" - [b])]) +[(module + [] + [] + [(open "open" [[a [] [] [] []]]) + (section + "section" + [b] + [(open "open" [[c [] [] [] []]]) + (section "section" [d] [(open "open" [[e [] [] [] []]])] "end" [d])] + "end" + [b])]) + (eoi "")] result: -(module [] [] [(section "section" [a] [] "end" [])]) +[(module [] [] [(section "section" [a] [] "end" [])]) (eoi "")] result: -(module - [] - [] - [(notation "notation" [[] [[["`" "Prop" "`" []] []]]] ":=" (hole "_"))]) +[(module + [] + [] + [(notation "notation" [[] [[["`" "Prop" "`" []] []]]] ":=" (hole "_"))]) + (eoi "")] error at line 10, column 19: expected "_" error at line 11, column 26: expected "_" partial syntax tree: -(module - [(prelude "prelude")] - [] - [(notation "notation" [[] [[["`" "Prop" "`" []] []]]] ":=" (hole [])) - (notation - "notation" - [[f] [[["`" "$ " "`" [[":" (base10_lit "1")]]] [[a [[":" (base10_lit "0")]]]]]]] - ":=" - (hole []))]) +[(module + [(prelude "prelude")] + [] + [(notation "notation" [[] [[["`" "Prop" "`" []] []]]] ":=" (hole [])) + (notation + "notation" + [[f] [[["`" "$ " "`" [[":" (base10_lit "1")]]] [[a [[":" (base10_lit "0")]]]]]]] + ":=" + (hole []))]) + (eoi "")]