perf(library/init/lean/parser/term): index term parsers by leading token

66% speedup on core.lean
This commit is contained in:
Sebastian Ullrich 2018-10-15 10:21:08 +02:00
parent 2df885d9a3
commit a820b9955f
5 changed files with 83 additions and 38 deletions

View file

@ -96,12 +96,17 @@ do -- build and register parser
| _ := throw "register_notation_parser: unimplemented",
pure $ psym::ptrans.to_monad
),
first_rule::_ ← pure spec.rules | throw "register_notation_parser: unreachable",
first_tk ← match first_rule.symbol with
| notation_symbol.view.quoted {symbol := some a ..} :=
pure a.val.trim
| _ := throw "register_notation_parser: unreachable",
let ps := ps.bind id,
cfg ← match spec.prefix_arg with
| none := pure {cfg with leading_term_parsers :=
parser.combinators.node k ps::cfg.leading_term_parsers}
cfg.leading_term_parsers.insert first_tk $ parser.combinators.node k ps}
| some _ := pure {cfg with trailing_term_parsers :=
parser.combinators.node k (read::ps.map coe)::cfg.trailing_term_parsers},
cfg.trailing_term_parsers.insert first_tk $ parser.combinators.node k (read::ps.map coe)},
pure cfg
/-- Recreate `elaborator_state.parser_cfg` from the elaborator state and the initial config,

View file

@ -191,11 +191,29 @@ abbreviation trailing_term_parser := trailing_term_parser_m syntax
instance trailing_term_parser_coe : has_coe term_parser trailing_term_parser :=
⟨λ x _, x⟩
local attribute [instance] name.has_lt_quick
/-- A multimap indexed by tokens. Used for indexing parsers by their leading token. -/
def token_map (α : Type) := rbmap name (list α) (<)
def token_map.insert {α : Type} (map : token_map α) (k : name) (v : α) : token_map α :=
match map.find k with
| none := map.insert k [v]
| some vs := map.insert k (v::vs)
def token_map.of_list {α : Type} : list (name × α) → token_map α
| [] := mk_rbmap _ _ _
| (⟨k,v⟩::xs) := (token_map.of_list xs).insert k v
instance token_map_nil.tokens : parser.has_tokens $ @token_map.of_list ρ [] :=
default _
instance token_map_cons.tokens (k : name) (r : ρ) (rs : list (name × ρ)) [parser.has_tokens r] [parser.has_tokens $ token_map.of_list rs] :
parser.has_tokens $ token_map.of_list ((k,r)::rs) :=
⟨tokens r ++ tokens (token_map.of_list rs)⟩
-- This needs to be a separate structure since `term_parser`s cannot contain themselves in their config
-- TODO: we need indexing here.
structure command_parser_config extends parser_config :=
(leading_term_parsers : list term_parser)
(trailing_term_parsers : list trailing_term_parser)
(leading_term_parsers : token_map term_parser)
(trailing_term_parsers : token_map trailing_term_parser)
instance command_parser_config_coe_parser_config : has_coe command_parser_config parser_config :=
⟨command_parser_config.to_parser_config⟩

View file

@ -18,9 +18,7 @@ local notation `m` := rec_t nat syntax base_m
local notation `parser` := m syntax
def curr_lbp : m nat :=
do st ← get,
-- suppress error messages
except.ok tk ← (monad_lift $ observing $ lookahead token) <* put st | pure 0 <* put st,
do except.ok tk ← monad_lift peek_token | pure 0,
match tk with
| syntax.atom ⟨_, sym⟩ := do
cfg ← read,

View file

@ -53,7 +53,11 @@ node! hole [hole: symbol "_" max_prec]
@[derive parser.has_tokens parser.has_view]
def sort.parser : term_parser :=
node_choice! sort {"Sort":max_prec, "Type":max_prec}
node! sort ["Sort":max_prec]
@[derive parser.has_tokens parser.has_view]
def type.parser : term_parser :=
node! type ["Type":max_prec]
section binder
@[derive has_tokens has_view]
@ -289,27 +293,33 @@ node! anonymous_inaccessible ["._":max_prec]
-- TODO(Sebastian): replace with attribute
@[derive has_tokens]
def builtin_leading_parsers : list term_parser := [
ident_univs.parser,
number.parser,
string_lit.parser,
paren.parser,
hole.parser,
sort.parser,
lambda.parser,
pi.parser,
anonymous_constructor.parser,
explicit_ident.parser,
let.parser,
have.parser,
show.parser,
assume.parser,
match.parser,
if.parser,
struct_inst.parser,
subtype.parser,
inaccessible.parser,
anonymous_inaccessible.parser
def builtin_leading_parsers : token_map term_parser := token_map.of_list [
(ident.name, ident_univs.parser),
(number.name, number.parser),
(string_lit.name, string_lit.parser),
("(", paren.parser),
("_", hole.parser),
("Sort", sort.parser),
("Type", type.parser),
("λ", lambda.parser),
("fun", lambda.parser),
("Π", pi.parser),
("Pi", pi.parser),
("∀", pi.parser),
("forall", pi.parser),
("⟨", anonymous_constructor.parser),
("@", explicit_ident.parser),
("@@", explicit_ident.parser),
("let", let.parser),
("have", have.parser),
("show", show.parser),
("assume", assume.parser),
("match", match.parser),
("if", if.parser),
("{", struct_inst.parser),
("{", subtype.parser),
(".(", inaccessible.parser),
("._", anonymous_inaccessible.parser)
]
@[derive parser.has_tokens parser.has_view]
@ -327,8 +337,6 @@ node! arrow [dom: get_leading, op: unicode_symbol "→" "->" 25, range: term.par
@[derive parser.has_view]
def projection.parser : trailing_term_parser :=
/- Use max_prec + 1 so that it bind more tightly than application:
`a (b).c` should be parsed as `a ((b).c)`. -/
try $ node! projection [
term: get_leading,
-- do not consume trailing whitespace
@ -341,25 +349,28 @@ try $ node! projection [
-- register '.' manually because of `raw_str`
instance projection.tokens : has_tokens projection.parser :=
⟨[{«prefix» := ".", lbp := max_prec}]⟩
/- Use max_prec + 1 so that it bind more tightly than application:
`a (b).c` should be parsed as `a ((b).c)`. -/
⟨[{«prefix» := ".", lbp := max_prec.succ}]⟩
@[derive has_tokens]
def builtin_trailing_parsers : list trailing_term_parser := [
arrow.parser,
projection.parser
def builtin_trailing_parsers : token_map trailing_term_parser := token_map.of_list [
("→", arrow.parser),
("->", arrow.parser),
(".", projection.parser)
]
end term
def term_parser.run (p : term_parser) : command_parser :=
do cfg ← read,
let trailing : trailing_term_parser := (longest_match cfg.trailing_term_parsers) <|>
let trailing : trailing_term_parser := (indexed cfg.trailing_term_parsers >>= longest_match) <|>
-- The application parsers should only be tried as a fall-back;
-- e.g. `a + b` should not be parsed as `a (+ b)`.
--TODO(Sebastian): We should be able to remove this workaround using
-- the proposed more robust precedence handling
any_of [term.sort_app.parser, term.app.parser],
adapt_reader coe $ pratt_parser (longest_match cfg.leading_term_parsers) trailing p
adapt_reader coe $ pratt_parser (indexed cfg.leading_term_parsers >>= longest_match) trailing p
end parser
end lean

View file

@ -185,6 +185,10 @@ do it ← left_over,
pure tk
)
def peek_token : basic_parser_m (except (parsec.message syntax) syntax) :=
do st ← get,
observing (try (lookahead token)) <* put st
variable [monad_basic_parser m]
def symbol (sym : string) (lbp := 0) : parser :=
@ -303,5 +307,14 @@ lift $ any_of [symbol unicode lbp, symbol ascii lbp]
-- use unicode variant by default
instance unicode_symbol.view_default (u a lbp) : parser.has_view_default (unicode_symbol u a lbp : parser) _ (syntax.atom ⟨none, u⟩) := ⟨⟩
def indexed {α : Type} (map : token_map α) : m (list α) :=
lift $ do
except.ok tk ← peek_token | error "",
n ← match tk with
| syntax.atom ⟨_, s⟩ := pure $ mk_simple_name s
| syntax.node ⟨some k, _⟩ := pure k.name
| _ := error "",
option.to_monad $ map.find n
end «parser»
end lean