fix(library/init/lean/parser/reader/basic): store token start position for perfect roundtripping

This commit is contained in:
Sebastian Ullrich 2018-07-30 14:40:23 -07:00
parent 312de57aec
commit 242d63af25
4 changed files with 92 additions and 68 deletions

View file

@ -61,9 +61,11 @@ structure reader_state :=
(tokens : list token_config)
-- note: stored in reverse for efficient append
(errors : list lean.message)
def reader_state.empty : reader_state :=
⟨[], []⟩
/- Start position of the current token. This might not be equal to the parser
position for two reasons:
* We plan to eagerly parse leading whitespace so as not to do so multiple times
* During error recovery, skipped input should be associated to the next token -/
(token_start : string.iterator)
structure reader_config := mk
@ -84,17 +86,14 @@ instance : monad_except (parsec.message syntax) read_m := infer_instance
protected def run (cfg : reader_config) (st : reader_state) (s : string) (r : read_m syntax) :
syntax × list message :=
match (((r.run (monad_parsec.error "no recursive parser at top level")).run cfg).run st).parse_with_left_over s with
| except.ok ((a, st), it) :=
let errors :=
if it.remaining = 0 then st.errors
else to_string { parsec.message . expected := dlist.singleton "end of input", it := it, custom := () } :: st.errors in
(a, errors.reverse)
| except.error msg := (msg.custom, [to_string msg])
end read_m
match (((r.run (monad_parsec.error "no recursive parser at top level")).run cfg).run st).parse_with_eoi s with
| except.ok (a, st) := (a, st.errors.reverse)
| except.error msg := (msg.custom, [to_string msg])
def log_error (e : message) : read_m unit :=
modify (λ st, {st with errors := to_string e :: st.errors})
end read_m
namespace reader
open monad_parsec
@ -102,7 +101,18 @@ protected def parse (cfg : reader_config) (s : string) (r : reader) :
syntax × list message :=
-- the only hardcoded tokens, because they are never directly mentioned by a `reader`
let tokens : list token_config := [⟨"/-", none⟩, ⟨"--", none⟩] in
r.read.run cfg ⟨r.tokens ++ tokens, []⟩ s
do {
stx ← catch r.read $ λ (msg : parsec.message _), read_m.log_error (to_string msg) *> pure msg.custom,
whitespace,
-- add `eoi` node and store any residual input in its prefix
catch eoi $ λ msg, read_m.log_error (to_string msg),
tk_start ← reader_state.token_start <$> get,
let stop := tk_start.to_end in
pure $ syntax.node ⟨name.anonymous, [
stx,
syntax.node ⟨`eoi, [syntax.atom ⟨some ⟨⟨tk_start, stop⟩, stop.offset, ⟨stop, stop⟩⟩, atomic_val.string ""⟩]⟩
]⟩
}.run cfg ⟨r.tokens ++ tokens, [], s.mk_iterator⟩ s
namespace combinators
def node' (m : name) (rs : list reader) : reader :=

View file

@ -118,14 +118,19 @@ private def commands_aux : bool → list syntax → nat → read_m syntax
-- unknown command: try to skip token, or else single character
when (¬ recovering) $ do {
it ← left_over,
log_error $ to_string { parsec.message . expected := dlist.singleton "command", it := it, custom := () }
read_m.log_error $ to_string { parsec.message . expected := dlist.singleton "command", it := it, custom := () }
},
catch (token *> pure ()) (λ _, any *> pure ()),
tk_start ← reader_state.token_start <$> get,
-- since the output of the following parser is never captured in a syntax tree...
try (token *> pure ()) <|> (any *> pure ()),
-- ...restore `token_start` after it
modify $ λ st, {st with token_start := tk_start},
pure (tt, none)
}) $ λ msg, do {
-- error inside command: advance input to error position, log error, return partial syntax tree
set_iterator msg.it,
modify (λ st, {st with errors := to_string msg :: st.errors}),
modify $ λ st, {st with token_start := msg.it},
read_m.log_error (to_string msg),
pure (tt, some msg.custom)
},
commands_aux recovering (c.to_monad++cs) n

View file

@ -62,13 +62,16 @@ do start ← left_over,
pure ⟨start, stop⟩
def with_source_info {α : Type} (r : read_m α) : read_m (α × source_info) :=
do leading ← whitespace,
p ← pos,
do token_start ← reader_state.token_start <$> get,
whitespace,
it ← left_over,
a ← r,
-- TODO(Sebastian): less greedy, more natural whitespace assignement
-- E.g. only read up to the next line break
trailing ← whitespace,
pure (a, ⟨leading, p, trailing⟩)
it2 ← left_over,
modify $ λ st, {st with token_start := it2},
pure (a, ⟨⟨token_start, it⟩, it.offset, trailing⟩)
/-- Match a string literally without consulting the token table. -/
def raw_symbol (sym : string) : reader :=

View file

@ -1,77 +1,83 @@
result:
(module [(prelude "prelude")] [] [])
[(module [(prelude "prelude")] [] []) (eoi "")]
result:
(module [] [(import "import" [(import_path [] me)])] [])
[(module [] [(import "import" [(import_path [] me)])] []) (eoi "")]
error at line 1, column 0:
expected command
partial syntax tree:
(module [] [] [])
[(module [] [] []) (eoi "")]
result:
(module
[(prelude "prelude")]
[(import "import" [(import_path ["." "."] a) (import_path [] b)])
(import "import" [(import_path [] c)])]
[])
[(module
[(prelude "prelude")]
[(import "import" [(import_path ["." "."] a) (import_path [] b)])
(import "import" [(import_path [] c)])]
[])
(eoi "")]
result:
(module [] [] [(open "open" [[me [] [] [] []] [you [] [] [] []]])])
[(module [] [] [(open "open" [[me [] [] [] []] [you [] [] [] []]])]) (eoi "")]
result:
(module
[]
[]
[(open
"open"
[[me
[["as" you]]
[[["(" a] [b c] ")"]]
[[["(" "renaming"] [[a "->" b] [c "->" d]] ")"]]
[["(" "hiding" [a b] ")"]]]])])
[(module
[]
[]
[(open
"open"
[[me
[["as" you]]
[[["(" a] [b c] ")"]]
[[["(" "renaming"] [[a "->" b] [c "->" d]] ")"]]
[["(" "hiding" [a b] ")"]]]])])
(eoi "")]
error at line 1, column 8:
expected command
partial syntax tree:
(module [] [] [(open "open" [[me [] [] [] []]])])
[(module [] [] [(open "open" [[me [] [] [] []]])]) (eoi "")]
error at line 1, column 5:
expected identifier
error at line 1, column 9:
expected identifier
partial syntax tree:
(module [] [] [(open "open" [[[]]]) (open "open" [[[]]])])
[(module [] [] [(open "open" [[[]]]) (open "open" [[[]]])]) (eoi "")]
error at line 1, column 8:
expected command
partial syntax tree:
(module
[]
[]
[(open "open" [[me [] [] [] []]]) (open "open" [[you [] [] [] []]])])
[(module
[]
[]
[(open "open" [[me [] [] [] []]]) (open "open" [[you [] [] [] []]])])
(eoi "")]
result:
(module
[]
[]
[(open "open" [[a [] [] [] []]])
(section
"section"
[b]
[(open "open" [[c [] [] [] []]])
(section "section" [d] [(open "open" [[e [] [] [] []]])] "end" [d])]
"end"
[b])])
[(module
[]
[]
[(open "open" [[a [] [] [] []]])
(section
"section"
[b]
[(open "open" [[c [] [] [] []]])
(section "section" [d] [(open "open" [[e [] [] [] []]])] "end" [d])]
"end"
[b])])
(eoi "")]
result:
(module [] [] [(section "section" [a] [] "end" [])])
[(module [] [] [(section "section" [a] [] "end" [])]) (eoi "")]
result:
(module
[]
[]
[(notation "notation" [[] [[["`" "Prop" "`" []] []]]] ":=" (hole "_"))])
[(module
[]
[]
[(notation "notation" [[] [[["`" "Prop" "`" []] []]]] ":=" (hole "_"))])
(eoi "")]
error at line 10, column 19:
expected "_"
error at line 11, column 26:
expected "_"
partial syntax tree:
(module
[(prelude "prelude")]
[]
[(notation "notation" [[] [[["`" "Prop" "`" []] []]]] ":=" (hole []))
(notation
"notation"
[[f] [[["`" "$ " "`" [[":" (base10_lit "1")]]] [[a [[":" (base10_lit "0")]]]]]]]
":="
(hole []))])
[(module
[(prelude "prelude")]
[]
[(notation "notation" [[] [[["`" "Prop" "`" []] []]]] ":=" (hole []))
(notation
"notation"
[[f] [[["`" "$ " "`" [[":" (base10_lit "1")]]] [[a [[":" (base10_lit "0")]]]]]]]
":="
(hole []))])
(eoi "")]