fix(library/init/lean/parser/reader/basic): store token start position for perfect roundtripping
This commit is contained in:
parent
312de57aec
commit
242d63af25
4 changed files with 92 additions and 68 deletions
|
|
@ -61,9 +61,11 @@ structure reader_state :=
|
|||
(tokens : list token_config)
|
||||
-- note: stored in reverse for efficient append
|
||||
(errors : list lean.message)
|
||||
|
||||
def reader_state.empty : reader_state :=
|
||||
⟨[], []⟩
|
||||
/- Start position of the current token. This might not be equal to the parser
|
||||
position for two reasons:
|
||||
* We plan to eagerly parse leading whitespace so as not to do so multiple times
|
||||
* During error recovery, skipped input should be associated to the next token -/
|
||||
(token_start : string.iterator)
|
||||
|
||||
structure reader_config := mk
|
||||
|
||||
|
|
@ -84,17 +86,14 @@ instance : monad_except (parsec.message syntax) read_m := infer_instance
|
|||
|
||||
protected def run (cfg : reader_config) (st : reader_state) (s : string) (r : read_m syntax) :
|
||||
syntax × list message :=
|
||||
match (((r.run (monad_parsec.error "no recursive parser at top level")).run cfg).run st).parse_with_left_over s with
|
||||
| except.ok ((a, st), it) :=
|
||||
let errors :=
|
||||
if it.remaining = 0 then st.errors
|
||||
else to_string { parsec.message . expected := dlist.singleton "end of input", it := it, custom := () } :: st.errors in
|
||||
(a, errors.reverse)
|
||||
| except.error msg := (msg.custom, [to_string msg])
|
||||
end read_m
|
||||
match (((r.run (monad_parsec.error "no recursive parser at top level")).run cfg).run st).parse_with_eoi s with
|
||||
| except.ok (a, st) := (a, st.errors.reverse)
|
||||
| except.error msg := (msg.custom, [to_string msg])
|
||||
|
||||
def log_error (e : message) : read_m unit :=
|
||||
modify (λ st, {st with errors := to_string e :: st.errors})
|
||||
end read_m
|
||||
|
||||
namespace reader
|
||||
open monad_parsec
|
||||
|
||||
|
|
@ -102,7 +101,18 @@ protected def parse (cfg : reader_config) (s : string) (r : reader) :
|
|||
syntax × list message :=
|
||||
-- the only hardcoded tokens, because they are never directly mentioned by a `reader`
|
||||
let tokens : list token_config := [⟨"/-", none⟩, ⟨"--", none⟩] in
|
||||
r.read.run cfg ⟨r.tokens ++ tokens, []⟩ s
|
||||
do {
|
||||
stx ← catch r.read $ λ (msg : parsec.message _), read_m.log_error (to_string msg) *> pure msg.custom,
|
||||
whitespace,
|
||||
-- add `eoi` node and store any residual input in its prefix
|
||||
catch eoi $ λ msg, read_m.log_error (to_string msg),
|
||||
tk_start ← reader_state.token_start <$> get,
|
||||
let stop := tk_start.to_end in
|
||||
pure $ syntax.node ⟨name.anonymous, [
|
||||
stx,
|
||||
syntax.node ⟨`eoi, [syntax.atom ⟨some ⟨⟨tk_start, stop⟩, stop.offset, ⟨stop, stop⟩⟩, atomic_val.string ""⟩]⟩
|
||||
]⟩
|
||||
}.run cfg ⟨r.tokens ++ tokens, [], s.mk_iterator⟩ s
|
||||
|
||||
namespace combinators
|
||||
def node' (m : name) (rs : list reader) : reader :=
|
||||
|
|
|
|||
|
|
@ -118,14 +118,19 @@ private def commands_aux : bool → list syntax → nat → read_m syntax
|
|||
-- unknown command: try to skip token, or else single character
|
||||
when (¬ recovering) $ do {
|
||||
it ← left_over,
|
||||
log_error $ to_string { parsec.message . expected := dlist.singleton "command", it := it, custom := () }
|
||||
read_m.log_error $ to_string { parsec.message . expected := dlist.singleton "command", it := it, custom := () }
|
||||
},
|
||||
catch (token *> pure ()) (λ _, any *> pure ()),
|
||||
tk_start ← reader_state.token_start <$> get,
|
||||
-- since the output of the following parser is never captured in a syntax tree...
|
||||
try (token *> pure ()) <|> (any *> pure ()),
|
||||
-- ...restore `token_start` after it
|
||||
modify $ λ st, {st with token_start := tk_start},
|
||||
pure (tt, none)
|
||||
}) $ λ msg, do {
|
||||
-- error inside command: advance input to error position, log error, return partial syntax tree
|
||||
set_iterator msg.it,
|
||||
modify (λ st, {st with errors := to_string msg :: st.errors}),
|
||||
modify $ λ st, {st with token_start := msg.it},
|
||||
read_m.log_error (to_string msg),
|
||||
pure (tt, some msg.custom)
|
||||
},
|
||||
commands_aux recovering (c.to_monad++cs) n
|
||||
|
|
|
|||
|
|
@ -62,13 +62,16 @@ do start ← left_over,
|
|||
pure ⟨start, stop⟩
|
||||
|
||||
def with_source_info {α : Type} (r : read_m α) : read_m (α × source_info) :=
|
||||
do leading ← whitespace,
|
||||
p ← pos,
|
||||
do token_start ← reader_state.token_start <$> get,
|
||||
whitespace,
|
||||
it ← left_over,
|
||||
a ← r,
|
||||
-- TODO(Sebastian): less greedy, more natural whitespace assignement
|
||||
-- E.g. only read up to the next line break
|
||||
trailing ← whitespace,
|
||||
pure (a, ⟨leading, p, trailing⟩)
|
||||
it2 ← left_over,
|
||||
modify $ λ st, {st with token_start := it2},
|
||||
pure (a, ⟨⟨token_start, it⟩, it.offset, trailing⟩)
|
||||
|
||||
/-- Match a string literally without consulting the token table. -/
|
||||
def raw_symbol (sym : string) : reader :=
|
||||
|
|
|
|||
|
|
@ -1,77 +1,83 @@
|
|||
result:
|
||||
(module [(prelude "prelude")] [] [])
|
||||
[(module [(prelude "prelude")] [] []) (eoi "")]
|
||||
result:
|
||||
(module [] [(import "import" [(import_path [] me)])] [])
|
||||
[(module [] [(import "import" [(import_path [] me)])] []) (eoi "")]
|
||||
error at line 1, column 0:
|
||||
expected command
|
||||
partial syntax tree:
|
||||
(module [] [] [])
|
||||
[(module [] [] []) (eoi "")]
|
||||
result:
|
||||
(module
|
||||
[(prelude "prelude")]
|
||||
[(import "import" [(import_path ["." "."] a) (import_path [] b)])
|
||||
(import "import" [(import_path [] c)])]
|
||||
[])
|
||||
[(module
|
||||
[(prelude "prelude")]
|
||||
[(import "import" [(import_path ["." "."] a) (import_path [] b)])
|
||||
(import "import" [(import_path [] c)])]
|
||||
[])
|
||||
(eoi "")]
|
||||
result:
|
||||
(module [] [] [(open "open" [[me [] [] [] []] [you [] [] [] []]])])
|
||||
[(module [] [] [(open "open" [[me [] [] [] []] [you [] [] [] []]])]) (eoi "")]
|
||||
result:
|
||||
(module
|
||||
[]
|
||||
[]
|
||||
[(open
|
||||
"open"
|
||||
[[me
|
||||
[["as" you]]
|
||||
[[["(" a] [b c] ")"]]
|
||||
[[["(" "renaming"] [[a "->" b] [c "->" d]] ")"]]
|
||||
[["(" "hiding" [a b] ")"]]]])])
|
||||
[(module
|
||||
[]
|
||||
[]
|
||||
[(open
|
||||
"open"
|
||||
[[me
|
||||
[["as" you]]
|
||||
[[["(" a] [b c] ")"]]
|
||||
[[["(" "renaming"] [[a "->" b] [c "->" d]] ")"]]
|
||||
[["(" "hiding" [a b] ")"]]]])])
|
||||
(eoi "")]
|
||||
error at line 1, column 8:
|
||||
expected command
|
||||
partial syntax tree:
|
||||
(module [] [] [(open "open" [[me [] [] [] []]])])
|
||||
[(module [] [] [(open "open" [[me [] [] [] []]])]) (eoi "")]
|
||||
error at line 1, column 5:
|
||||
expected identifier
|
||||
error at line 1, column 9:
|
||||
expected identifier
|
||||
partial syntax tree:
|
||||
(module [] [] [(open "open" [[[]]]) (open "open" [[[]]])])
|
||||
[(module [] [] [(open "open" [[[]]]) (open "open" [[[]]])]) (eoi "")]
|
||||
error at line 1, column 8:
|
||||
expected command
|
||||
partial syntax tree:
|
||||
(module
|
||||
[]
|
||||
[]
|
||||
[(open "open" [[me [] [] [] []]]) (open "open" [[you [] [] [] []]])])
|
||||
[(module
|
||||
[]
|
||||
[]
|
||||
[(open "open" [[me [] [] [] []]]) (open "open" [[you [] [] [] []]])])
|
||||
(eoi "")]
|
||||
result:
|
||||
(module
|
||||
[]
|
||||
[]
|
||||
[(open "open" [[a [] [] [] []]])
|
||||
(section
|
||||
"section"
|
||||
[b]
|
||||
[(open "open" [[c [] [] [] []]])
|
||||
(section "section" [d] [(open "open" [[e [] [] [] []]])] "end" [d])]
|
||||
"end"
|
||||
[b])])
|
||||
[(module
|
||||
[]
|
||||
[]
|
||||
[(open "open" [[a [] [] [] []]])
|
||||
(section
|
||||
"section"
|
||||
[b]
|
||||
[(open "open" [[c [] [] [] []]])
|
||||
(section "section" [d] [(open "open" [[e [] [] [] []]])] "end" [d])]
|
||||
"end"
|
||||
[b])])
|
||||
(eoi "")]
|
||||
result:
|
||||
(module [] [] [(section "section" [a] [] "end" [])])
|
||||
[(module [] [] [(section "section" [a] [] "end" [])]) (eoi "")]
|
||||
result:
|
||||
(module
|
||||
[]
|
||||
[]
|
||||
[(notation "notation" [[] [[["`" "Prop" "`" []] []]]] ":=" (hole "_"))])
|
||||
[(module
|
||||
[]
|
||||
[]
|
||||
[(notation "notation" [[] [[["`" "Prop" "`" []] []]]] ":=" (hole "_"))])
|
||||
(eoi "")]
|
||||
error at line 10, column 19:
|
||||
expected "_"
|
||||
error at line 11, column 26:
|
||||
expected "_"
|
||||
partial syntax tree:
|
||||
(module
|
||||
[(prelude "prelude")]
|
||||
[]
|
||||
[(notation "notation" [[] [[["`" "Prop" "`" []] []]]] ":=" (hole []))
|
||||
(notation
|
||||
"notation"
|
||||
[[f] [[["`" "$ " "`" [[":" (base10_lit "1")]]] [[a [[":" (base10_lit "0")]]]]]]]
|
||||
":="
|
||||
(hole []))])
|
||||
[(module
|
||||
[(prelude "prelude")]
|
||||
[]
|
||||
[(notation "notation" [[] [[["`" "Prop" "`" []] []]]] ":=" (hole []))
|
||||
(notation
|
||||
"notation"
|
||||
[[f] [[["`" "$ " "`" [[":" (base10_lit "1")]]] [[a [[":" (base10_lit "0")]]]]]]]
|
||||
":="
|
||||
(hole []))])
|
||||
(eoi "")]
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue