perf(library/init/lean/parser): cache consecutive calls to token at the same position

Parser performance improved by about 33%
This commit is contained in:
Sebastian Ullrich 2018-09-27 15:27:00 -07:00
parent e535cd92f7
commit 2e5ea16e2f
4 changed files with 60 additions and 22 deletions

View file

@ -37,19 +37,41 @@ structure token_config :=
by the `token` parser. -/
(suffix_parser : option (parsec' (source_info → syntax)) := none)
-- Backtrackable state
structure parser_state :=
(messages : message_log)
structure token_cache_entry :=
(start_it stop_it : string.iterator)
(tk : syntax)
-- Non-backtrackable state
structure parser_cache :=
(token_cache : option token_cache_entry := none)
structure parser_config :=
(tokens : trie token_config)
(filename : string)
@[derive monad alternative monad_state monad_parsec monad_except]
def parser_core_t (m : Type → Type) [monad m] :=
state_t parser_state $ parsec_t syntax $ state_t parser_cache $ m
@[derive monad alternative monad_reader monad_state monad_parsec monad_except]
def parser_t (ρ : Type) (m : Type → Type) [monad m] := reader_t ρ $ state_t parser_state $ parsec_t syntax m
def parser_t (ρ : Type) (m : Type → Type) [monad m] := reader_t ρ $ parser_core_t m
abbreviation basic_parser_m := parser_t parser_config id
abbreviation basic_parser := basic_parser_m syntax
abbreviation monad_basic_parser := has_monad_lift_t basic_parser_m
section
local attribute [reducible] parser_t parser_core_t
def get_cache : basic_parser_m parser_cache :=
monad_lift (get : state_t parser_cache id _)
def put_cache : parser_cache → basic_parser_m punit :=
λ c, monad_lift (put c : state_t parser_cache id _)
end
-- an arbitrary `parser` type; parsers are usually some monad stack based on `basic_parser_m` returning `syntax`
variable {ρ : Type}
@ -91,16 +113,13 @@ def message_of_parsec_message {μ : Type} (cfg : parser_config) (msg : parsec.me
-- FIXME: translate position
{filename := cfg.filename, pos := ⟨0, 0⟩, text := to_string msg}
section
local attribute [reducible] parser_t
/-- Run parser stack, returning a partial syntax tree in case of a fatal error -/
protected def run {m : Type → Type} {α ρ : Type} [monad m] [has_coe_t ρ parser_config] (cfg : ρ) (st : parser_state) (s : string) (r : parser_t ρ m α) :
protected def run {m : Type → Type} {α ρ : Type} [monad m] [has_coe_t ρ parser_config] (cfg : ρ) (s : string) (r : parser_t ρ m α) :
m (sum α syntax × message_log) :=
do r ← ((r.run cfg).run st).parse_with_eoi s,
do (r, _) ← (((r.run cfg).run {messages:=message_log.empty}).parse s).run {},
pure $ match r with
| except.ok (a, st) := (sum.inl a, st.messages)
| except.error msg := (sum.inr msg.custom, message_log.empty.add (message_of_parsec_message cfg msg))
end
open coroutine
open monad_parsec
@ -145,7 +164,7 @@ def parse.view : syntax → option parse.view_ty
on a recursive call to `command.parser`, i.e. it forgets about locally registered parsers,
but that's not an issue for our intended uses of it. -/
@[derive monad alternative monad_reader monad_state monad_parsec monad_except monad_rec]
def command_parser_m (ρ : Type) := reader_t ρ $ rec_t unit syntax $ state_t parser_state $ parsec_t syntax id
def command_parser_m (ρ : Type) := reader_t ρ $ rec_t unit syntax $ parser_core_t id
section
local attribute [reducible] parser_t command_parser_m

View file

@ -30,17 +30,20 @@ structure module_parser_output :=
(messages : message_log)
(cfg : module_parser_config)
section
local attribute [reducible] parser_core_t
/- NOTE: missing the `reader_t` from `parser_t` because the `coroutine` already provides
`monad_reader module_parser_config`. -/
@[derive monad alternative monad_reader monad_state monad_parsec monad_except monad_coroutine]
def module_parser_m := state_t parser_state $ parsec_t syntax $ coroutine module_parser_config module_parser_output
def module_parser_m := parser_core_t $ coroutine module_parser_config module_parser_output
abbreviation module_parser := module_parser_m syntax
end
instance module_parser_m.lift_parser_t (ρ : Type) [has_lift_t module_parser_config ρ] :
has_monad_lift (parser_t ρ id) module_parser_m :=
{ monad_lift := λ α x st it, do
{ monad_lift := λ α x st it nb_st, do
cfg ← read,
pure (((x.run ↑cfg).run st) it) }
pure ((((x.run ↑cfg).run st) it).run nb_st) }
namespace module
def yield_command (cmd : syntax) : module_parser_m unit :=

View file

@ -143,14 +143,30 @@ do stx ← (node_choice! number {
} : basic_parser),
pure $ λ info, update_trailing info.trailing stx
def token : basic_parser_m syntax :=
do (r, i) ← with_source_info $ do {
-- NOTE the order: if a token is both a symbol and a valid identifier (i.e. a keyword),
-- we want it to be recognized as a symbol
f::_ ← longest_match [symbol', ident'] <|> list.ret <$> number' | error "token: unreachable",
pure f
},
pure (r i)
def token : basic_parser :=
do it ← left_over,
cache ← get_cache,
-- NOTE: using `catch` instead of `<|>` so that error messages from the second block are preferred
catch (do
-- check token cache
some tkc ← pure cache.token_cache | failure,
guard (it.offset = tkc.start_it.offset),
-- hackishly update parsec position
monad_parsec.lift (λ it, parsec.result.ok () tkc.stop_it none),
pure tkc.tk
) (λ _, do
-- cache failed, update cache
(r, i) ← with_source_info $ do {
-- NOTE the order: if a token is both a symbol and a valid identifier (i.e. a keyword),
-- we want it to be recognized as a symbol
f::_ ← longest_match [symbol', ident'] <|> list.ret <$> number' | error "token: unreachable",
pure f
},
let tk := r i,
new_it ← left_over,
put_cache {cache with token_cache := some ⟨it, new_it, tk⟩},
pure tk
)
variable [monad_basic_parser m]

View file

@ -31,7 +31,7 @@ do t ← parser.mk_token_trie $
parser.tokens term.builtin_leading_parsers ++
parser.tokens term.builtin_trailing_parsers,
pure $ {
filename := "<unknown", tokens := t,
filename := "<unknown>", tokens := t,
command_parsers := command.builtin_command_parsers,
leading_term_parsers := term.builtin_leading_parsers,
trailing_term_parsers := term.builtin_trailing_parsers,
@ -39,8 +39,8 @@ do t ← parser.mk_token_trie $
def parse_module (s : string) : except string (list module_parser_output) :=
do cfg ← mk_config,
(outputs, except.ok ((), ⟨⟨[]⟩⟩)) ← pure $ coroutine.finish (λ out : module_parser_output, out.cfg)
((module.parser.run ⟨message_log.empty⟩).parse s) cfg
(outputs, sum.inl (), ⟨[]⟩) ← pure $ coroutine.finish (λ out : module_parser_output, out.cfg)
(parser.run cfg s (λ _, module.parser)) cfg
| except.error "final parser output should be empty!",
pure outputs
@ -90,7 +90,7 @@ universes u v
s ← io.fs.read_file "../../library/init/core.lean",
let s := (s.mk_iterator.nextn 6500).prev_to_string,
cfg ← monad_except.lift_except $ mk_config,
let k := (module.parser.run ⟨message_log.empty⟩).parse s,
let k := parser.run cfg s (λ _, module.parser),
outs ← io.prim.iterate_eio (k, cfg, ([] : list module_parser_output)) $ λ ⟨k, cfg, outs⟩, match k.resume cfg with
| coroutine_result_core.done p := pure (sum.inr outs.reverse)
| coroutine_result_core.yielded out k := do {