perf(library/init/lean/parser/term): index term parsers by leading token
66% speedup on core.lean
This commit is contained in:
parent
2df885d9a3
commit
a820b9955f
5 changed files with 83 additions and 38 deletions
|
|
@ -96,12 +96,17 @@ do -- build and register parser
|
|||
| _ := throw "register_notation_parser: unimplemented",
|
||||
pure $ psym::ptrans.to_monad
|
||||
),
|
||||
first_rule::_ ← pure spec.rules | throw "register_notation_parser: unreachable",
|
||||
first_tk ← match first_rule.symbol with
|
||||
| notation_symbol.view.quoted {symbol := some a ..} :=
|
||||
pure a.val.trim
|
||||
| _ := throw "register_notation_parser: unreachable",
|
||||
let ps := ps.bind id,
|
||||
cfg ← match spec.prefix_arg with
|
||||
| none := pure {cfg with leading_term_parsers :=
|
||||
parser.combinators.node k ps::cfg.leading_term_parsers}
|
||||
cfg.leading_term_parsers.insert first_tk $ parser.combinators.node k ps}
|
||||
| some _ := pure {cfg with trailing_term_parsers :=
|
||||
parser.combinators.node k (read::ps.map coe)::cfg.trailing_term_parsers},
|
||||
cfg.trailing_term_parsers.insert first_tk $ parser.combinators.node k (read::ps.map coe)},
|
||||
pure cfg
|
||||
|
||||
/-- Recreate `elaborator_state.parser_cfg` from the elaborator state and the initial config,
|
||||
|
|
|
|||
|
|
@ -191,11 +191,29 @@ abbreviation trailing_term_parser := trailing_term_parser_m syntax
|
|||
instance trailing_term_parser_coe : has_coe term_parser trailing_term_parser :=
|
||||
⟨λ x _, x⟩
|
||||
|
||||
local attribute [instance] name.has_lt_quick
|
||||
/-- A multimap indexed by tokens. Used for indexing parsers by their leading token. -/
|
||||
def token_map (α : Type) := rbmap name (list α) (<)
|
||||
|
||||
def token_map.insert {α : Type} (map : token_map α) (k : name) (v : α) : token_map α :=
|
||||
match map.find k with
|
||||
| none := map.insert k [v]
|
||||
| some vs := map.insert k (v::vs)
|
||||
|
||||
def token_map.of_list {α : Type} : list (name × α) → token_map α
|
||||
| [] := mk_rbmap _ _ _
|
||||
| (⟨k,v⟩::xs) := (token_map.of_list xs).insert k v
|
||||
|
||||
instance token_map_nil.tokens : parser.has_tokens $ @token_map.of_list ρ [] :=
|
||||
default _
|
||||
instance token_map_cons.tokens (k : name) (r : ρ) (rs : list (name × ρ)) [parser.has_tokens r] [parser.has_tokens $ token_map.of_list rs] :
|
||||
parser.has_tokens $ token_map.of_list ((k,r)::rs) :=
|
||||
⟨tokens r ++ tokens (token_map.of_list rs)⟩
|
||||
|
||||
-- This needs to be a separate structure since `term_parser`s cannot contain themselves in their config
|
||||
-- TODO: we need indexing here.
|
||||
structure command_parser_config extends parser_config :=
|
||||
(leading_term_parsers : list term_parser)
|
||||
(trailing_term_parsers : list trailing_term_parser)
|
||||
(leading_term_parsers : token_map term_parser)
|
||||
(trailing_term_parsers : token_map trailing_term_parser)
|
||||
|
||||
instance command_parser_config_coe_parser_config : has_coe command_parser_config parser_config :=
|
||||
⟨command_parser_config.to_parser_config⟩
|
||||
|
|
|
|||
|
|
@ -18,9 +18,7 @@ local notation `m` := rec_t nat syntax base_m
|
|||
local notation `parser` := m syntax
|
||||
|
||||
def curr_lbp : m nat :=
|
||||
do st ← get,
|
||||
-- suppress error messages
|
||||
except.ok tk ← (monad_lift $ observing $ lookahead token) <* put st | pure 0 <* put st,
|
||||
do except.ok tk ← monad_lift peek_token | pure 0,
|
||||
match tk with
|
||||
| syntax.atom ⟨_, sym⟩ := do
|
||||
cfg ← read,
|
||||
|
|
|
|||
|
|
@ -53,7 +53,11 @@ node! hole [hole: symbol "_" max_prec]
|
|||
|
||||
@[derive parser.has_tokens parser.has_view]
|
||||
def sort.parser : term_parser :=
|
||||
node_choice! sort {"Sort":max_prec, "Type":max_prec}
|
||||
node! sort ["Sort":max_prec]
|
||||
|
||||
@[derive parser.has_tokens parser.has_view]
|
||||
def type.parser : term_parser :=
|
||||
node! type ["Type":max_prec]
|
||||
|
||||
section binder
|
||||
@[derive has_tokens has_view]
|
||||
|
|
@ -289,27 +293,33 @@ node! anonymous_inaccessible ["._":max_prec]
|
|||
|
||||
-- TODO(Sebastian): replace with attribute
|
||||
@[derive has_tokens]
|
||||
def builtin_leading_parsers : list term_parser := [
|
||||
ident_univs.parser,
|
||||
number.parser,
|
||||
string_lit.parser,
|
||||
paren.parser,
|
||||
hole.parser,
|
||||
sort.parser,
|
||||
lambda.parser,
|
||||
pi.parser,
|
||||
anonymous_constructor.parser,
|
||||
explicit_ident.parser,
|
||||
let.parser,
|
||||
have.parser,
|
||||
show.parser,
|
||||
assume.parser,
|
||||
match.parser,
|
||||
if.parser,
|
||||
struct_inst.parser,
|
||||
subtype.parser,
|
||||
inaccessible.parser,
|
||||
anonymous_inaccessible.parser
|
||||
def builtin_leading_parsers : token_map term_parser := token_map.of_list [
|
||||
(ident.name, ident_univs.parser),
|
||||
(number.name, number.parser),
|
||||
(string_lit.name, string_lit.parser),
|
||||
("(", paren.parser),
|
||||
("_", hole.parser),
|
||||
("Sort", sort.parser),
|
||||
("Type", type.parser),
|
||||
("λ", lambda.parser),
|
||||
("fun", lambda.parser),
|
||||
("Π", pi.parser),
|
||||
("Pi", pi.parser),
|
||||
("∀", pi.parser),
|
||||
("forall", pi.parser),
|
||||
("⟨", anonymous_constructor.parser),
|
||||
("@", explicit_ident.parser),
|
||||
("@@", explicit_ident.parser),
|
||||
("let", let.parser),
|
||||
("have", have.parser),
|
||||
("show", show.parser),
|
||||
("assume", assume.parser),
|
||||
("match", match.parser),
|
||||
("if", if.parser),
|
||||
("{", struct_inst.parser),
|
||||
("{", subtype.parser),
|
||||
(".(", inaccessible.parser),
|
||||
("._", anonymous_inaccessible.parser)
|
||||
]
|
||||
|
||||
@[derive parser.has_tokens parser.has_view]
|
||||
|
|
@ -327,8 +337,6 @@ node! arrow [dom: get_leading, op: unicode_symbol "→" "->" 25, range: term.par
|
|||
|
||||
@[derive parser.has_view]
|
||||
def projection.parser : trailing_term_parser :=
|
||||
/- Use max_prec + 1 so that it bind more tightly than application:
|
||||
`a (b).c` should be parsed as `a ((b).c)`. -/
|
||||
try $ node! projection [
|
||||
term: get_leading,
|
||||
-- do not consume trailing whitespace
|
||||
|
|
@ -341,25 +349,28 @@ try $ node! projection [
|
|||
|
||||
-- register '.' manually because of `raw_str`
|
||||
instance projection.tokens : has_tokens projection.parser :=
|
||||
⟨[{«prefix» := ".", lbp := max_prec}]⟩
|
||||
/- Use max_prec + 1 so that it bind more tightly than application:
|
||||
`a (b).c` should be parsed as `a ((b).c)`. -/
|
||||
⟨[{«prefix» := ".", lbp := max_prec.succ}]⟩
|
||||
|
||||
@[derive has_tokens]
|
||||
def builtin_trailing_parsers : list trailing_term_parser := [
|
||||
arrow.parser,
|
||||
projection.parser
|
||||
def builtin_trailing_parsers : token_map trailing_term_parser := token_map.of_list [
|
||||
("→", arrow.parser),
|
||||
("->", arrow.parser),
|
||||
(".", projection.parser)
|
||||
]
|
||||
|
||||
end term
|
||||
|
||||
def term_parser.run (p : term_parser) : command_parser :=
|
||||
do cfg ← read,
|
||||
let trailing : trailing_term_parser := (longest_match cfg.trailing_term_parsers) <|>
|
||||
let trailing : trailing_term_parser := (indexed cfg.trailing_term_parsers >>= longest_match) <|>
|
||||
-- The application parsers should only be tried as a fall-back;
|
||||
-- e.g. `a + b` should not be parsed as `a (+ b)`.
|
||||
--TODO(Sebastian): We should be able to remove this workaround using
|
||||
-- the proposed more robust precedence handling
|
||||
any_of [term.sort_app.parser, term.app.parser],
|
||||
adapt_reader coe $ pratt_parser (longest_match cfg.leading_term_parsers) trailing p
|
||||
adapt_reader coe $ pratt_parser (indexed cfg.leading_term_parsers >>= longest_match) trailing p
|
||||
|
||||
end parser
|
||||
end lean
|
||||
|
|
|
|||
|
|
@ -185,6 +185,10 @@ do it ← left_over,
|
|||
pure tk
|
||||
)
|
||||
|
||||
def peek_token : basic_parser_m (except (parsec.message syntax) syntax) :=
|
||||
do st ← get,
|
||||
observing (try (lookahead token)) <* put st
|
||||
|
||||
variable [monad_basic_parser m]
|
||||
|
||||
def symbol (sym : string) (lbp := 0) : parser :=
|
||||
|
|
@ -303,5 +307,14 @@ lift $ any_of [symbol unicode lbp, symbol ascii lbp]
|
|||
-- use unicode variant by default
|
||||
instance unicode_symbol.view_default (u a lbp) : parser.has_view_default (unicode_symbol u a lbp : parser) _ (syntax.atom ⟨none, u⟩) := ⟨⟩
|
||||
|
||||
def indexed {α : Type} (map : token_map α) : m (list α) :=
|
||||
lift $ do
|
||||
except.ok tk ← peek_token | error "",
|
||||
n ← match tk with
|
||||
| syntax.atom ⟨_, s⟩ := pure $ mk_simple_name s
|
||||
| syntax.node ⟨some k, _⟩ := pure k.name
|
||||
| _ := error "",
|
||||
option.to_monad $ map.find n
|
||||
|
||||
end «parser»
|
||||
end lean
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue