We are going to start making drastic changes in the parser, elaborator, attributes, etc. Examples: - No View objects. I am going to implement match_syntax. - No RecT in the parser. I am going to implement parser extensions using an approach similar to the one I used to implement environment extensions. - No Parsec. I will use an approach similar to the one used in the experiment https://github.com/leanprover/lean4/tree/master/tests/playground/parser It is easier to perform these changes with the new frontend disabled. I will slowly re-active it as I apply the changes. cc @kha
226 lines
9.3 KiB
Text
226 lines
9.3 KiB
Text
/-
|
||
Copyright (c) 2018 Microsoft Corporation. All rights reserved.
|
||
Released under Apache 2.0 license as described in the file LICENSE.
|
||
Author: Sebastian Ullrich
|
||
|
||
Parser for the Lean language
|
||
-/
|
||
prelude
|
||
import init.lean.parser.parsec init.lean.parser.syntax init.lean.parser.rec
|
||
import init.lean.parser.trie
|
||
import init.lean.parser.identifier init.data.rbmap init.lean.message
|
||
|
||
namespace Lean
|
||
namespace Parser
|
||
|
||
/- Maximum standard precedence. This is the precedence of Function application.
|
||
In the standard Lean language, only the token `.` has a left-binding power greater
|
||
than `maxPrec` (so that field accesses like `g (h x).f` are parsed as `g ((h x).f)`,
|
||
not `(g (h x)).f`). -/
|
||
def maxPrec : Nat := 1024
|
||
|
||
structure TokenConfig :=
|
||
(«prefix» : String)
|
||
/- Left-binding power used by the Term Parser. The Term Parser operates in the context
|
||
of a right-binding power between 0 (used by parentheses and on the top-Level) and
|
||
(usually) `maxPrec` (used by Function application). After parsing an initial Term,
|
||
it continues parsing and expanding that Term only when the left-binding power of
|
||
the next token is greater than the current right-binding power. For example, it
|
||
never continues parsing an argument after the initial parse, unless a token with
|
||
lbp > maxPrec is encountered. Conversely, the Term Parser will always continue
|
||
parsing inside parentheses until it finds a token with lbp 0 (such as `)`). -/
|
||
(lbp : Nat := 0)
|
||
-- reading a token should not need any State
|
||
/- An optional Parser that is activated after matching `prefix`.
|
||
It should return a Syntax tree with a "hole" for the
|
||
`SourceInfo` surrounding the token, which will be supplied
|
||
by the `token` Parser.
|
||
|
||
Remark: `suffixParser` has many applications for example for parsing
|
||
hexdecimal numbers, `prefix` is `0x` and `suffixParser` is the Parser `digit*`.
|
||
We also use it to parse String literals: here `prefix` is just `"`.
|
||
-/
|
||
(suffixParser : Option (Parsec' (SourceInfo → Syntax)) := none)
|
||
|
||
-- Backtrackable State
|
||
structure ParserState :=
|
||
(messages : MessageLog)
|
||
|
||
structure TokenCacheEntry :=
|
||
(startIt stopIt : String.OldIterator)
|
||
(tk : Syntax)
|
||
|
||
-- Non-backtrackable State
|
||
structure ParserCache :=
|
||
(tokenCache : Option TokenCacheEntry := none)
|
||
-- for profiling
|
||
(hit miss : Nat := 0)
|
||
|
||
structure FrontendConfig :=
|
||
(filename : String)
|
||
(input : String)
|
||
(fileMap : FileMap)
|
||
|
||
/- Remark: if we have a Node in the Trie with `some TokenConfig`, the String induced by the path is equal to the `TokenConfig.prefix`. -/
|
||
structure ParserConfig extends FrontendConfig :=
|
||
(tokens : Trie TokenConfig)
|
||
|
||
instance parserConfigCoe : HasCoe ParserConfig FrontendConfig :=
|
||
⟨ParserConfig.toFrontendConfig⟩
|
||
|
||
@[derive Monad Alternative MonadParsec MonadExcept]
|
||
def parserCoreT (m : Type → Type) [Monad m] :=
|
||
ParsecT Syntax $ StateT ParserCache $ m
|
||
|
||
@[derive Monad Alternative MonadReader MonadParsec MonadExcept]
|
||
def ParserT (ρ : Type) (m : Type → Type) [Monad m] := ReaderT ρ $ parserCoreT m
|
||
@[derive Monad Alternative MonadReader MonadParsec MonadExcept]
|
||
def BasicParserM := ParserT ParserConfig Id
|
||
abbrev basicParser := BasicParserM Syntax
|
||
abbrev monadBasicParser := HasMonadLiftT BasicParserM
|
||
|
||
section
|
||
local attribute [reducible] BasicParserM ParserT parserCoreT
|
||
@[inline] def getCache : BasicParserM ParserCache :=
|
||
monadLift (get : StateT ParserCache Id _)
|
||
|
||
@[inline] def putCache : ParserCache → BasicParserM PUnit :=
|
||
λ c, monadLift (set c : StateT ParserCache Id _)
|
||
end
|
||
|
||
-- an arbitrary `Parser` Type; parsers are usually some Monad stack based on `BasicParserM` returning `Syntax`
|
||
variable {ρ : Type}
|
||
|
||
class HasTokens (r : ρ) := mk {} ::
|
||
(tokens : List TokenConfig)
|
||
|
||
@[noinline, nospecialize] def tokens (r : ρ) [HasTokens r] :=
|
||
HasTokens.tokens r
|
||
|
||
instance HasTokens.Inhabited (r : ρ) : Inhabited (HasTokens r) :=
|
||
⟨⟨[]⟩⟩
|
||
|
||
instance List.nil.tokens : Parser.HasTokens ([] : List ρ) :=
|
||
default _
|
||
|
||
instance List.cons.tokens (r : ρ) (rs : List ρ) [Parser.HasTokens r] [Parser.HasTokens rs] :
|
||
Parser.HasTokens (r::rs) :=
|
||
⟨tokens r ++ tokens rs⟩
|
||
|
||
class HasView (α : outParam Type) (r : ρ) :=
|
||
(view : Syntax → α)
|
||
(review : α → Syntax)
|
||
|
||
export HasView (view review)
|
||
|
||
def tryView {α : Type} (k : SyntaxNodeKind) [HasView α k] (stx : Syntax) : Option α :=
|
||
if stx.isOfKind k then some (HasView.view k stx) else none
|
||
|
||
instance HasView.default (r : ρ) : Inhabited (Parser.HasView Syntax r) :=
|
||
⟨{ view := id, review := id }⟩
|
||
|
||
class HasViewDefault (r : ρ) (α : outParam Type) [HasView α r] (default : α) := mk {}
|
||
|
||
def messageOfParsecMessage {μ : Type} (cfg : FrontendConfig) (msg : Parsec.Message μ) : Message :=
|
||
{filename := cfg.filename, pos := cfg.fileMap.toPosition msg.it.offset, text := msg.text}
|
||
|
||
/-- Run Parser stack, returning a partial Syntax tree in case of a fatal error -/
|
||
protected def run {m : Type → Type} {α ρ : Type} [Monad m] [HasCoeT ρ FrontendConfig] (cfg : ρ) (s : String) (r : StateT ParserState (ParserT ρ m) α) :
|
||
m (Sum α Syntax × MessageLog) :=
|
||
do (r, _) ← (((r.run {messages:=MessageLog.empty}).run cfg).parse s).run {},
|
||
pure $ match r with
|
||
| Except.ok (a, st) := (Sum.inl a, st.messages)
|
||
| Except.error msg := (Sum.inr msg.custom.get, MessageLog.empty.add (messageOfParsecMessage cfg msg))
|
||
|
||
open MonadParsec
|
||
open Parser.HasView
|
||
variables {α : Type} {m : Type → Type}
|
||
local notation `Parser` := m Syntax
|
||
|
||
def logMessage {μ : Type} [Monad m] [MonadReader ρ m] [HasLiftT ρ FrontendConfig] [MonadState ParserState m]
|
||
(msg : Parsec.Message μ) : m Unit :=
|
||
do cfg ← read,
|
||
modify (λ st, {st with messages := st.messages.add (messageOfParsecMessage ↑cfg msg)})
|
||
|
||
def mkTokenTrie (tokens : List TokenConfig) : Except String (Trie TokenConfig) :=
|
||
do -- the only hardcoded tokens, because they are never directly mentioned by a `Parser`
|
||
let builtinTokens : List TokenConfig := [{«prefix» := "/-"}, {«prefix» := "--"}],
|
||
t ← (builtinTokens ++ tokens).mfoldl (λ (t : Trie TokenConfig) tk,
|
||
match t.find tk.prefix with
|
||
| some tk' := match tk.lbp, tk'.lbp with
|
||
| l, 0 := pure $ t.insert tk.prefix tk
|
||
| 0, _ := pure t
|
||
| l, l' := if l = l' then pure t else throw $
|
||
"invalid token '" ++ tk.prefix ++ "', has been defined with precedences " ++
|
||
toString l ++ " and " ++ toString l'
|
||
| none := pure $ t.insert tk.prefix tk)
|
||
Trie.empty,
|
||
pure t
|
||
|
||
|
||
/- Monad stacks used in multiple files -/
|
||
|
||
/- NOTE: We move `RecT` under `ParserT`'s `ReaderT` so that `termParser`, which does not
|
||
have access to `commandParser`'s ρ (=`CommandParserConfig`) can still recurse into it
|
||
(for command quotations). This means that the `CommandParserConfig` will be reset
|
||
on a recursive call to `command.Parser`, i.e. it forgets about locally registered parsers,
|
||
but that's not an issue for our intended uses of it. -/
|
||
@[derive Monad Alternative MonadReader MonadParsec MonadExcept MonadRec]
|
||
def CommandParserM (ρ : Type) := ReaderT ρ $ RecT Unit Syntax $ parserCoreT Id
|
||
|
||
section
|
||
local attribute [reducible] ParserT CommandParserM
|
||
instance CommandParserM.MonadReaderAdapter (ρ ρ' : Type) :
|
||
MonadReaderAdapter ρ ρ' (CommandParserM ρ) (CommandParserM ρ') :=
|
||
inferInstance
|
||
instance CommandParserM.basicParser (ρ : Type) [HasLiftT ρ ParserConfig] : monadBasicParser (CommandParserM ρ) :=
|
||
⟨λ _ x cfg rec, x.run ↑cfg⟩
|
||
end
|
||
|
||
/- The `Nat` at `RecT` is the lbp` -/
|
||
@[derive Monad Alternative MonadReader MonadParsec MonadExcept MonadRec monadBasicParser]
|
||
def TermParserM := RecT Nat Syntax $ CommandParserM ParserConfig
|
||
abbrev termParser := TermParserM Syntax
|
||
|
||
/-- A Term Parser for a suffix or infix notation that accepts a preceding Term. -/
|
||
@[derive Monad Alternative MonadReader MonadParsec MonadExcept MonadRec monadBasicParser]
|
||
def TrailingTermParserM := ReaderT Syntax TermParserM
|
||
abbrev trailingTermParser := TrailingTermParserM Syntax
|
||
|
||
instance trailingTermParserCoe : HasCoe termParser trailingTermParser :=
|
||
⟨λ x _, x⟩
|
||
|
||
/-- A multimap indexed by tokens. Used for indexing parsers by their leading token. -/
|
||
def TokenMap (α : Type) := RBMap Name (List α) Name.quickLt
|
||
|
||
def TokenMap.insert {α : Type} (map : TokenMap α) (k : Name) (v : α) : TokenMap α :=
|
||
match map.find k with
|
||
| none := map.insert k [v]
|
||
| some vs := map.insert k (v::vs)
|
||
|
||
def TokenMap.ofList {α : Type} : List (Name × α) → TokenMap α
|
||
| [] := mkRBMap _ _ _
|
||
| (⟨k,v⟩::xs) := (TokenMap.ofList xs).insert k v
|
||
|
||
instance tokenMapNil.tokens : Parser.HasTokens $ @TokenMap.ofList ρ [] :=
|
||
default _
|
||
|
||
instance tokenMapCons.tokens (k : Name) (r : ρ) (rs : List (Name × ρ)) [Parser.HasTokens r] [Parser.HasTokens $ TokenMap.ofList rs] :
|
||
Parser.HasTokens $ TokenMap.ofList ((k,r)::rs) :=
|
||
⟨tokens r ++ tokens (TokenMap.ofList rs)⟩
|
||
|
||
-- This needs to be a separate structure since `termParser`s cannot contain themselves in their config
|
||
structure CommandParserConfig extends ParserConfig :=
|
||
(leadingTermParsers : TokenMap termParser)
|
||
(trailingTermParsers : TokenMap trailingTermParser)
|
||
-- local Term parsers (such as from `local notation`) hide previous parsers instead of overloading them
|
||
(localLeadingTermParsers : TokenMap termParser := mkRBMap _ _ _)
|
||
(localTrailingTermParsers : TokenMap trailingTermParser := mkRBMap _ _ _)
|
||
|
||
instance commandParserConfigCoeParserConfig : HasCoe CommandParserConfig ParserConfig :=
|
||
⟨CommandParserConfig.toParserConfig⟩
|
||
|
||
abbrev commandParser := CommandParserM CommandParserConfig Syntax
|
||
|
||
end «Parser»
|
||
end Lean
|