From 6f68a0d1ebff59bc9117759f287c3a544a1eefc9 Mon Sep 17 00:00:00 2001 From: Sebastian Ullrich Date: Wed, 19 Sep 2018 16:21:10 -0700 Subject: [PATCH] feat(library/init/lean/parser/basic): check for conflicting tokens --- library/init/io.lean | 2 +- library/init/lean/parser/basic.lean | 21 +++++++++++++++------ library/init/lean/parser/trie.lean | 9 +++++++++ tests/lean/parser1.lean | 17 ++++++++++------- 4 files changed, 35 insertions(+), 14 deletions(-) diff --git a/library/init/io.lean b/library/init/io.lean index cafdf1fa34..0a8e0624a4 100644 --- a/library/init/io.lean +++ b/library/init/io.lean @@ -46,7 +46,7 @@ open fs constant iterate {α β : Type} : α → (α → io (sum α β)) → io β -def iterate_eio {α β : Type} (a : α) (f : α → eio (sum α β)) : eio β := +def iterate_eio {ε α β : Type} (a : α) (f : α → except_t ε io (sum α β)) : except_t ε io β := iterate a $ λ r, do r ← (f r).run, match r with diff --git a/library/init/lean/parser/basic.lean b/library/init/lean/parser/basic.lean index 2d3a14704b..275cf16af4 100644 --- a/library/init/lean/parser/basic.lean +++ b/library/init/lean/parser/basic.lean @@ -35,7 +35,7 @@ structure token_config := It should return a syntax tree with a "hole" for the `source_info` surrounding the token, which will be supplied by the `token` parser. -/ -(token_parser : option (parsec' (source_info → syntax)) := none) +(suffix_parser : option (parsec' (source_info → syntax)) := none) structure parser_state := (tokens : trie token_config) @@ -108,12 +108,21 @@ do cfg ← read, def eoi : syntax_node_kind := ⟨`lean.parser.eoi⟩ -protected def parse [monad m] (cfg : parser_config) (s : string) (r : parser_t m syntax) [parser.has_tokens r] : +def mk_parser_state (tokens : list token_config) : except string parser_state := +do -- the only hardcoded tokens, because they are never directly mentioned by a `parser` + let builtin_tokens : list token_config := [⟨"/-", 0, none⟩, ⟨"--", 0, none⟩], + t ← (builtin_tokens ++ tokens).mfoldl (λ (t : trie token_config) tk, + match t.find tk.prefix with + | some tk' := if tk.lbp = tk'.lbp then pure t else throw $ + "invalid token '" ++ tk.prefix ++ "', has been defined with precendences " ++ + to_string tk.lbp ++ " and " ++ to_string tk'.lbp + | none := pure $ t.insert tk.prefix tk) + trie.mk, + pure ⟨t, message_log.empty⟩ + +protected def parse [monad m] (cfg : parser_config) (st : parser_state) (s : string) (r : parser_t m syntax) [parser.has_tokens r] : m (syntax × message_log) := --- the only hardcoded tokens, because they are never directly mentioned by a `parser` -let builtin_tokens : list token_config := [⟨"/-", 0, none⟩, ⟨"--", 0, none⟩] in -let trie := (tokens r ++ builtin_tokens).foldl (λ t cfg, trie.insert t cfg.prefix cfg) trie.mk in -parser.run cfg ⟨trie, message_log.empty⟩ s $ do +parser.run cfg st s $ do stx ← catch r $ λ (msg : parsec.message _), do { parser.log_message msg, pure msg.custom diff --git a/library/init/lean/parser/trie.lean b/library/init/lean/parser/trie.lean index 1de23aa370..b2034fcd98 100644 --- a/library/init/lean/parser/trie.lean +++ b/library/init/lean/parser/trie.lean @@ -25,6 +25,15 @@ private def insert_aux (val : α) : nat → trie α → string.iterator → trie def insert (t : trie α) (s : string) (val : α) : trie α := insert_aux val s.length t s.mk_iterator +private def find_aux : nat → trie α → string.iterator → option α +| 0 (trie.node val _) _ := val +| (n+1) (trie.node val map) it := do + t' ← rbmap_core.find (<) map it.curr, + find_aux n t' it.next + +def find (t : trie α) (s : string) : option α := +find_aux s.length t s.mk_iterator + private def match_prefix_aux : nat → trie α → string.iterator → option (string.iterator × α) → option (string.iterator × α) | 0 (trie.node val map) it acc := prod.mk it <$> val <|> acc | (n+1) (trie.node val map) it acc := diff --git a/tests/lean/parser1.lean b/tests/lean/parser1.lean index 7ca89c7ceb..63504f7383 100644 --- a/tests/lean/parser1.lean +++ b/tests/lean/parser1.lean @@ -3,7 +3,7 @@ open lean open lean.parser open lean.parser.syntax_node_kind.has_view -def show_result (p : syntax × message_log) (s : string) : eio unit := +def show_result (p : syntax × message_log) (s : string) : except_t string io unit := let (stx, msgs) := p in when (stx.reprint ≠ s) ( io.println "reprint fail:" *> @@ -18,11 +18,13 @@ match msgs.to_list with io.println "partial syntax tree:", io.println (to_string stx) -def parse_module (s : string) : syntax × message_log := -coroutine.finish (λ cmd, ()) (parser.parse ⟨""⟩ s module.parser) () +def parse_module (s : string) : except string (syntax × message_log) := +do st ← parser.mk_parser_state (parser.tokens module.parser), + pure $ coroutine.finish (λ cmd, ()) (parser.parse ⟨""⟩ st s module.parser) () -def show_parse (s : string) : eio unit := -show_result (parse_module s) s +def show_parse (s : string) : except_t string io unit := +do r ← monad_except.lift_except $ parse_module s, + show_result r s #eval show_parse "prelude" #eval show_parse "import me" @@ -57,7 +59,7 @@ universes u v -- expansion example #eval (do { - (stx, ⟨[]⟩) ← pure $ parse_module "prefix `+`:10 := _", + (stx, ⟨[]⟩) ← monad_except.lift_except $ parse_module "prefix `+`:10 := _", some {root := stx, ..} ← pure $ parser.parse.view stx, some {commands := [stx], ..} ← pure $ view module stx, some stx ← pure $ command.mixfix.expand stx | throw "expand fail", @@ -69,7 +71,8 @@ universes u v #eval do s ← io.fs.read_file "../../library/init/core.lean", let s := (s.mk_iterator.nextn 3500).prev_to_string, - let k := parser.parse ⟨"foo"⟩ s module.parser, + st ← monad_except.lift_except $ parser.mk_parser_state (tokens module.parser), + let k := parser.parse ⟨"foo"⟩ st s module.parser, io.prim.iterate_eio k $ λ k, match k.resume () with | coroutine_result_core.done p := show_result p s *> pure (sum.inr ()) | coroutine_result_core.yielded cmd k := do {