From 134d27dbec7075bf2d8114689a4f187dc91bd20e Mon Sep 17 00:00:00 2001 From: Sebastian Ullrich Date: Wed, 25 Jul 2018 18:07:56 -0700 Subject: [PATCH] feat(library/init/lean/parser/parsec): `longest_match` should return all longest parses In the case of overlapping notations, we will return a choice node of all possible parses. --- library/init/lean/parser/parsec.lean | 14 ++++++++------ library/init/lean/parser/reader/token.lean | 6 ++++-- 2 files changed, 12 insertions(+), 8 deletions(-) diff --git a/library/init/lean/parser/parsec.lean b/library/init/lean/parser/parsec.lean index 5e848d0a0c..5e67f430ee 100644 --- a/library/init/lean/parser/parsec.lean +++ b/library/init/lean/parser/parsec.lean @@ -479,21 +479,23 @@ error msg def unexpected_at (msg : string) (pos : position) : m α := error msg dlist.empty pos -/- Execute all parsers in `ps` and return the result of the longest parse if any, +/- Execute all parsers in `ps` and return the result of the longest parse(s) if any, or else the result of the furthest error. If there are two parses of equal length, the first parse wins. -/ -def longest_match [monad_except message m] (ps : list (m α)) : m α := +def longest_match [monad_except message m] (ps : list (m α)) : m (list α) := do it ← left_over, - r ← ps.mfoldl (λ (r : result α) p, + r ← ps.mfoldr (λ p r, catch (lookahead $ do a ← p, it ← left_over, pure $ match r with - | result.ok _ it' := if it'.offset ≥ it.offset then r else result.ok a it - | _ := result.ok a it) + | result.ok as it' := if it'.offset > it.offset then r + else if it.offset > it'.offset then result.ok [a] it + else result.ok (a::as) it + | _ := result.ok [a] it) (λ msg, pure $ match r with - | result.error msg' _ := if nat.lt msg.pos msg'.pos then r + | result.error msg' _ := if nat.lt msg.pos msg'.pos then r -- FIXME else if nat.lt msg'.pos msg.pos then result.error msg tt else result.error (merge msg msg') tt | _ := r)) diff --git a/library/init/lean/parser/reader/token.lean b/library/init/lean/parser/reader/token.lean index 84ac7c8374..1e3ee19929 100644 --- a/library/init/lean/parser/reader/token.lean +++ b/library/init/lean/parser/reader/token.lean @@ -98,10 +98,12 @@ do tk ← match_token, | none := error def token : read_m syntax := -do (r, i) ← with_source_info $ +do (r, i) ← with_source_info $ do { -- NOTE the order: if a token is both a symbol and a valid identifier (i.e. a keyword), -- we want it to be recognized as a symbol - longest_match [symbol', ident', number'], + f::_ ← longest_match [symbol', ident', number'] | failure, + pure f + }, pure (r i) --TODO(Sebastian): error messages