perf(library/init/lean/parser): cache consecutive calls to token at the same position
Parser performance improved by about 33%
This commit is contained in:
parent
e535cd92f7
commit
2e5ea16e2f
4 changed files with 60 additions and 22 deletions
|
|
@ -37,19 +37,41 @@ structure token_config :=
|
|||
by the `token` parser. -/
|
||||
(suffix_parser : option (parsec' (source_info → syntax)) := none)
|
||||
|
||||
-- Backtrackable state
|
||||
structure parser_state :=
|
||||
(messages : message_log)
|
||||
|
||||
structure token_cache_entry :=
|
||||
(start_it stop_it : string.iterator)
|
||||
(tk : syntax)
|
||||
|
||||
-- Non-backtrackable state
|
||||
structure parser_cache :=
|
||||
(token_cache : option token_cache_entry := none)
|
||||
|
||||
structure parser_config :=
|
||||
(tokens : trie token_config)
|
||||
(filename : string)
|
||||
|
||||
@[derive monad alternative monad_state monad_parsec monad_except]
|
||||
def parser_core_t (m : Type → Type) [monad m] :=
|
||||
state_t parser_state $ parsec_t syntax $ state_t parser_cache $ m
|
||||
|
||||
@[derive monad alternative monad_reader monad_state monad_parsec monad_except]
|
||||
def parser_t (ρ : Type) (m : Type → Type) [monad m] := reader_t ρ $ state_t parser_state $ parsec_t syntax m
|
||||
def parser_t (ρ : Type) (m : Type → Type) [monad m] := reader_t ρ $ parser_core_t m
|
||||
abbreviation basic_parser_m := parser_t parser_config id
|
||||
abbreviation basic_parser := basic_parser_m syntax
|
||||
abbreviation monad_basic_parser := has_monad_lift_t basic_parser_m
|
||||
|
||||
section
|
||||
local attribute [reducible] parser_t parser_core_t
|
||||
def get_cache : basic_parser_m parser_cache :=
|
||||
monad_lift (get : state_t parser_cache id _)
|
||||
|
||||
def put_cache : parser_cache → basic_parser_m punit :=
|
||||
λ c, monad_lift (put c : state_t parser_cache id _)
|
||||
end
|
||||
|
||||
-- an arbitrary `parser` type; parsers are usually some monad stack based on `basic_parser_m` returning `syntax`
|
||||
variable {ρ : Type}
|
||||
|
||||
|
|
@ -91,16 +113,13 @@ def message_of_parsec_message {μ : Type} (cfg : parser_config) (msg : parsec.me
|
|||
-- FIXME: translate position
|
||||
{filename := cfg.filename, pos := ⟨0, 0⟩, text := to_string msg}
|
||||
|
||||
section
|
||||
local attribute [reducible] parser_t
|
||||
/-- Run parser stack, returning a partial syntax tree in case of a fatal error -/
|
||||
protected def run {m : Type → Type} {α ρ : Type} [monad m] [has_coe_t ρ parser_config] (cfg : ρ) (st : parser_state) (s : string) (r : parser_t ρ m α) :
|
||||
protected def run {m : Type → Type} {α ρ : Type} [monad m] [has_coe_t ρ parser_config] (cfg : ρ) (s : string) (r : parser_t ρ m α) :
|
||||
m (sum α syntax × message_log) :=
|
||||
do r ← ((r.run cfg).run st).parse_with_eoi s,
|
||||
do (r, _) ← (((r.run cfg).run {messages:=message_log.empty}).parse s).run {},
|
||||
pure $ match r with
|
||||
| except.ok (a, st) := (sum.inl a, st.messages)
|
||||
| except.error msg := (sum.inr msg.custom, message_log.empty.add (message_of_parsec_message cfg msg))
|
||||
end
|
||||
|
||||
open coroutine
|
||||
open monad_parsec
|
||||
|
|
@ -145,7 +164,7 @@ def parse.view : syntax → option parse.view_ty
|
|||
on a recursive call to `command.parser`, i.e. it forgets about locally registered parsers,
|
||||
but that's not an issue for our intended uses of it. -/
|
||||
@[derive monad alternative monad_reader monad_state monad_parsec monad_except monad_rec]
|
||||
def command_parser_m (ρ : Type) := reader_t ρ $ rec_t unit syntax $ state_t parser_state $ parsec_t syntax id
|
||||
def command_parser_m (ρ : Type) := reader_t ρ $ rec_t unit syntax $ parser_core_t id
|
||||
|
||||
section
|
||||
local attribute [reducible] parser_t command_parser_m
|
||||
|
|
|
|||
|
|
@ -30,17 +30,20 @@ structure module_parser_output :=
|
|||
(messages : message_log)
|
||||
(cfg : module_parser_config)
|
||||
|
||||
section
|
||||
local attribute [reducible] parser_core_t
|
||||
/- NOTE: missing the `reader_t` from `parser_t` because the `coroutine` already provides
|
||||
`monad_reader module_parser_config`. -/
|
||||
@[derive monad alternative monad_reader monad_state monad_parsec monad_except monad_coroutine]
|
||||
def module_parser_m := state_t parser_state $ parsec_t syntax $ coroutine module_parser_config module_parser_output
|
||||
def module_parser_m := parser_core_t $ coroutine module_parser_config module_parser_output
|
||||
abbreviation module_parser := module_parser_m syntax
|
||||
end
|
||||
|
||||
instance module_parser_m.lift_parser_t (ρ : Type) [has_lift_t module_parser_config ρ] :
|
||||
has_monad_lift (parser_t ρ id) module_parser_m :=
|
||||
{ monad_lift := λ α x st it, do
|
||||
{ monad_lift := λ α x st it nb_st, do
|
||||
cfg ← read,
|
||||
pure (((x.run ↑cfg).run st) it) }
|
||||
pure ((((x.run ↑cfg).run st) it).run nb_st) }
|
||||
|
||||
namespace module
|
||||
def yield_command (cmd : syntax) : module_parser_m unit :=
|
||||
|
|
|
|||
|
|
@ -143,14 +143,30 @@ do stx ← (node_choice! number {
|
|||
} : basic_parser),
|
||||
pure $ λ info, update_trailing info.trailing stx
|
||||
|
||||
def token : basic_parser_m syntax :=
|
||||
do (r, i) ← with_source_info $ do {
|
||||
-- NOTE the order: if a token is both a symbol and a valid identifier (i.e. a keyword),
|
||||
-- we want it to be recognized as a symbol
|
||||
f::_ ← longest_match [symbol', ident'] <|> list.ret <$> number' | error "token: unreachable",
|
||||
pure f
|
||||
},
|
||||
pure (r i)
|
||||
def token : basic_parser :=
|
||||
do it ← left_over,
|
||||
cache ← get_cache,
|
||||
-- NOTE: using `catch` instead of `<|>` so that error messages from the second block are preferred
|
||||
catch (do
|
||||
-- check token cache
|
||||
some tkc ← pure cache.token_cache | failure,
|
||||
guard (it.offset = tkc.start_it.offset),
|
||||
-- hackishly update parsec position
|
||||
monad_parsec.lift (λ it, parsec.result.ok () tkc.stop_it none),
|
||||
pure tkc.tk
|
||||
) (λ _, do
|
||||
-- cache failed, update cache
|
||||
(r, i) ← with_source_info $ do {
|
||||
-- NOTE the order: if a token is both a symbol and a valid identifier (i.e. a keyword),
|
||||
-- we want it to be recognized as a symbol
|
||||
f::_ ← longest_match [symbol', ident'] <|> list.ret <$> number' | error "token: unreachable",
|
||||
pure f
|
||||
},
|
||||
let tk := r i,
|
||||
new_it ← left_over,
|
||||
put_cache {cache with token_cache := some ⟨it, new_it, tk⟩},
|
||||
pure tk
|
||||
)
|
||||
|
||||
variable [monad_basic_parser m]
|
||||
|
||||
|
|
|
|||
|
|
@ -31,7 +31,7 @@ do t ← parser.mk_token_trie $
|
|||
parser.tokens term.builtin_leading_parsers ++
|
||||
parser.tokens term.builtin_trailing_parsers,
|
||||
pure $ {
|
||||
filename := "<unknown", tokens := t,
|
||||
filename := "<unknown>", tokens := t,
|
||||
command_parsers := command.builtin_command_parsers,
|
||||
leading_term_parsers := term.builtin_leading_parsers,
|
||||
trailing_term_parsers := term.builtin_trailing_parsers,
|
||||
|
|
@ -39,8 +39,8 @@ do t ← parser.mk_token_trie $
|
|||
|
||||
def parse_module (s : string) : except string (list module_parser_output) :=
|
||||
do cfg ← mk_config,
|
||||
(outputs, except.ok ((), ⟨⟨[]⟩⟩)) ← pure $ coroutine.finish (λ out : module_parser_output, out.cfg)
|
||||
((module.parser.run ⟨message_log.empty⟩).parse s) cfg
|
||||
(outputs, sum.inl (), ⟨[]⟩) ← pure $ coroutine.finish (λ out : module_parser_output, out.cfg)
|
||||
(parser.run cfg s (λ _, module.parser)) cfg
|
||||
| except.error "final parser output should be empty!",
|
||||
pure outputs
|
||||
|
||||
|
|
@ -90,7 +90,7 @@ universes u v
|
|||
s ← io.fs.read_file "../../library/init/core.lean",
|
||||
let s := (s.mk_iterator.nextn 6500).prev_to_string,
|
||||
cfg ← monad_except.lift_except $ mk_config,
|
||||
let k := (module.parser.run ⟨message_log.empty⟩).parse s,
|
||||
let k := parser.run cfg s (λ _, module.parser),
|
||||
outs ← io.prim.iterate_eio (k, cfg, ([] : list module_parser_output)) $ λ ⟨k, cfg, outs⟩, match k.resume cfg with
|
||||
| coroutine_result_core.done p := pure (sum.inr outs.reverse)
|
||||
| coroutine_result_core.yielded out k := do {
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue