lean4-htt/library/init/lean/parser/basic.lean

226 lines
9.3 KiB
Text
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

/-
Copyright (c) 2018 Microsoft Corporation. All rights reserved.
Released under Apache 2.0 license as described in the file LICENSE.
Author: Sebastian Ullrich
Parser for the Lean language
-/
prelude
import init.lean.parser.parsec init.lean.parser.syntax init.lean.parser.rec
import init.lean.parser.trie
import init.lean.parser.identifier init.data.rbmap init.lean.message
namespace Lean
namespace Parser
/- Maximum standard precedence. This is the precedence of Function application.
In the standard Lean language, only the token `.` has a left-binding power greater
than `maxPrec` (so that field accesses like `g (h x).f` are parsed as `g ((h x).f)`,
not `(g (h x)).f`). -/
def maxPrec : Nat := 1024
structure TokenConfig :=
(«prefix» : String)
/- Left-binding power used by the Term Parser. The Term Parser operates in the context
of a right-binding power between 0 (used by parentheses and on the top-Level) and
(usually) `maxPrec` (used by Function application). After parsing an initial Term,
it continues parsing and expanding that Term only when the left-binding power of
the next token is greater than the current right-binding power. For example, it
never continues parsing an argument after the initial parse, unless a token with
lbp > maxPrec is encountered. Conversely, the Term Parser will always continue
parsing inside parentheses until it finds a token with lbp 0 (such as `)`). -/
(lbp : Nat := 0)
-- reading a token should not need any State
/- An optional Parser that is activated after matching `prefix`.
It should return a Syntax tree with a "hole" for the
`SourceInfo` surrounding the token, which will be supplied
by the `token` Parser.
Remark: `suffixParser` has many applications for example for parsing
hexdecimal numbers, `prefix` is `0x` and `suffixParser` is the Parser `digit*`.
We also use it to parse String literals: here `prefix` is just `"`.
-/
(suffixParser : Option (Parsec' (SourceInfo → Syntax)) := none)
-- Backtrackable State
structure ParserState :=
(messages : MessageLog)
structure TokenCacheEntry :=
(startIt stopIt : String.OldIterator)
(tk : Syntax)
-- Non-backtrackable State
structure ParserCache :=
(tokenCache : Option TokenCacheEntry := none)
-- for profiling
(hit miss : Nat := 0)
structure FrontendConfig :=
(filename : String)
(input : String)
(fileMap : FileMap)
/- Remark: if we have a Node in the Trie with `some TokenConfig`, the String induced by the path is equal to the `TokenConfig.prefix`. -/
structure ParserConfig extends FrontendConfig :=
(tokens : Trie TokenConfig)
instance parserConfigCoe : HasCoe ParserConfig FrontendConfig :=
⟨ParserConfig.toFrontendConfig⟩
@[derive Monad Alternative MonadParsec MonadExcept]
def parserCoreT (m : Type → Type) [Monad m] :=
ParsecT Syntax $ StateT ParserCache $ m
@[derive Monad Alternative MonadReader MonadParsec MonadExcept]
def ParserT (ρ : Type) (m : Type → Type) [Monad m] := ReaderT ρ $ parserCoreT m
@[derive Monad Alternative MonadReader MonadParsec MonadExcept]
def BasicParserM := ParserT ParserConfig Id
abbrev basicParser := BasicParserM Syntax
abbrev monadBasicParser := HasMonadLiftT BasicParserM
section
local attribute [reducible] BasicParserM ParserT parserCoreT
@[inline] def getCache : BasicParserM ParserCache :=
monadLift (get : StateT ParserCache Id _)
@[inline] def putCache : ParserCache → BasicParserM PUnit :=
λ c, monadLift (set c : StateT ParserCache Id _)
end
-- an arbitrary `Parser` Type; parsers are usually some Monad stack based on `BasicParserM` returning `Syntax`
variable {ρ : Type}
class HasTokens (r : ρ) := mk {} ::
(tokens : List TokenConfig)
@[noinline, nospecialize] def tokens (r : ρ) [HasTokens r] :=
HasTokens.tokens r
instance HasTokens.Inhabited (r : ρ) : Inhabited (HasTokens r) :=
⟨⟨[]⟩⟩
instance List.nil.tokens : Parser.HasTokens ([] : List ρ) :=
default _
instance List.cons.tokens (r : ρ) (rs : List ρ) [Parser.HasTokens r] [Parser.HasTokens rs] :
Parser.HasTokens (r::rs) :=
⟨tokens r ++ tokens rs⟩
class HasView (α : outParam Type) (r : ρ) :=
(view : Syntax → α)
(review : α → Syntax)
export HasView (view review)
def tryView {α : Type} (k : SyntaxNodeKind) [HasView α k] (stx : Syntax) : Option α :=
if stx.isOfKind k then some (HasView.view k stx) else none
instance HasView.default (r : ρ) : Inhabited (Parser.HasView Syntax r) :=
⟨{ view := id, review := id }⟩
class HasViewDefault (r : ρ) (α : outParam Type) [HasView α r] (default : α) := mk {}
def messageOfParsecMessage {μ : Type} (cfg : FrontendConfig) (msg : Parsec.Message μ) : Message :=
{filename := cfg.filename, pos := cfg.fileMap.toPosition msg.it.offset, text := msg.text}
/-- Run Parser stack, returning a partial Syntax tree in case of a fatal error -/
protected def run {m : Type → Type} {α ρ : Type} [Monad m] [HasCoeT ρ FrontendConfig] (cfg : ρ) (s : String) (r : StateT ParserState (ParserT ρ m) α) :
m (Sum α Syntax × MessageLog) :=
do (r, _) ← (((r.run {messages:=MessageLog.empty}).run cfg).parse s).run {},
pure $ match r with
| Except.ok (a, st) := (Sum.inl a, st.messages)
| Except.error msg := (Sum.inr msg.custom.get, MessageLog.empty.add (messageOfParsecMessage cfg msg))
open MonadParsec
open Parser.HasView
variables {α : Type} {m : Type → Type}
local notation `Parser` := m Syntax
def logMessage {μ : Type} [Monad m] [MonadReader ρ m] [HasLiftT ρ FrontendConfig] [MonadState ParserState m]
(msg : Parsec.Message μ) : m Unit :=
do cfg ← read,
modify (λ st, {st with messages := st.messages.add (messageOfParsecMessage ↑cfg msg)})
def mkTokenTrie (tokens : List TokenConfig) : Except String (Trie TokenConfig) :=
do -- the only hardcoded tokens, because they are never directly mentioned by a `Parser`
let builtinTokens : List TokenConfig := [{«prefix» := "/-"}, {«prefix» := "--"}],
t ← (builtinTokens ++ tokens).mfoldl (λ (t : Trie TokenConfig) tk,
match t.find tk.prefix with
| some tk' := (match tk.lbp, tk'.lbp with
| l, 0 := pure $ t.insert tk.prefix tk
| 0, _ := pure t
| l, l' := if l = l' then pure t else throw $
"invalid token '" ++ tk.prefix ++ "', has been defined with precedences " ++
toString l ++ " and " ++ toString l')
| none := pure $ t.insert tk.prefix tk)
Trie.empty,
pure t
/- Monad stacks used in multiple files -/
/- NOTE: We move `RecT` under `ParserT`'s `ReaderT` so that `termParser`, which does not
have access to `commandParser`'s ρ (=`CommandParserConfig`) can still recurse into it
(for command quotations). This means that the `CommandParserConfig` will be reset
on a recursive call to `command.Parser`, i.e. it forgets about locally registered parsers,
but that's not an issue for our intended uses of it. -/
@[derive Monad Alternative MonadReader MonadParsec MonadExcept MonadRec]
def CommandParserM (ρ : Type) := ReaderT ρ $ RecT Unit Syntax $ parserCoreT Id
section
local attribute [reducible] ParserT CommandParserM
instance CommandParserM.MonadReaderAdapter (ρ ρ' : Type) :
MonadReaderAdapter ρ ρ' (CommandParserM ρ) (CommandParserM ρ') :=
inferInstance
instance CommandParserM.basicParser (ρ : Type) [HasLiftT ρ ParserConfig] : monadBasicParser (CommandParserM ρ) :=
⟨λ _ x cfg rec, x.run ↑cfg⟩
end
/- The `Nat` at `RecT` is the lbp` -/
@[derive Monad Alternative MonadReader MonadParsec MonadExcept MonadRec monadBasicParser]
def TermParserM := RecT Nat Syntax $ CommandParserM ParserConfig
abbrev termParser := TermParserM Syntax
/-- A Term Parser for a suffix or infix notation that accepts a preceding Term. -/
@[derive Monad Alternative MonadReader MonadParsec MonadExcept MonadRec monadBasicParser]
def TrailingTermParserM := ReaderT Syntax TermParserM
abbrev trailingTermParser := TrailingTermParserM Syntax
instance trailingTermParserCoe : HasCoe termParser trailingTermParser :=
⟨λ x _, x⟩
/-- A multimap indexed by tokens. Used for indexing parsers by their leading token. -/
def TokenMap (α : Type) := RBMap Name (List α) Name.quickLt
def TokenMap.insert {α : Type} (map : TokenMap α) (k : Name) (v : α) : TokenMap α :=
match map.find k with
| none := map.insert k [v]
| some vs := map.insert k (v::vs)
def TokenMap.ofList {α : Type} : List (Name × α) → TokenMap α
| [] := mkRBMap _ _ _
| (⟨k,v⟩::xs) := (TokenMap.ofList xs).insert k v
instance tokenMapNil.tokens : Parser.HasTokens $ @TokenMap.ofList ρ [] :=
default _
instance tokenMapCons.tokens (k : Name) (r : ρ) (rs : List (Name × ρ)) [Parser.HasTokens r] [Parser.HasTokens $ TokenMap.ofList rs] :
Parser.HasTokens $ TokenMap.ofList ((k,r)::rs) :=
⟨tokens r ++ tokens (TokenMap.ofList rs)⟩
-- This needs to be a separate structure since `termParser`s cannot contain themselves in their config
structure CommandParserConfig extends ParserConfig :=
(leadingTermParsers : TokenMap termParser)
(trailingTermParsers : TokenMap trailingTermParser)
-- local Term parsers (such as from `local notation`) hide previous parsers instead of overloading them
(localLeadingTermParsers : TokenMap termParser := mkRBMap _ _ _)
(localTrailingTermParsers : TokenMap trailingTermParser := mkRBMap _ _ _)
instance commandParserConfigCoeParserConfig : HasCoe CommandParserConfig ParserConfig :=
⟨CommandParserConfig.toParserConfig⟩
abbrev commandParser := CommandParserM CommandParserConfig Syntax
end «Parser»
end Lean