test(tests/playground/parser): add longestMatch and other helper functions
This commit is contained in:
parent
3d8c3d5789
commit
5188adc685
3 changed files with 201 additions and 18 deletions
|
|
@ -79,6 +79,10 @@ def ParserData.pushSyntax (d : ParserData) (n : Syntax) : ParserData :=
|
|||
match d with
|
||||
| ⟨stack, pos, cache, msg⟩ := ⟨stack.push n, pos, cache, msg⟩
|
||||
|
||||
def ParserData.shrinkStack (d : ParserData) (iniStackSz : Nat) : ParserData :=
|
||||
match d with
|
||||
| ⟨stack, pos, cache, msg⟩ := ⟨stack.shrink iniStackSz, pos, cache, msg⟩
|
||||
|
||||
def ParserData.next (d : ParserData) (s : String) (pos : Nat) : ParserData :=
|
||||
d.setPos (s.next pos)
|
||||
|
||||
|
|
@ -604,16 +608,21 @@ abbrev BasicParser : Type := AbsParser BasicParserFn
|
|||
abbrev CmdParserFn (ρ : Type) : Type := EnvParserFn ρ (RecParserFn Unit ParserFn)
|
||||
abbrev TermParserFn : Type := RecParserFn Nat (CmdParserFn ParserConfig)
|
||||
abbrev TermParser : Type := AbsParser TermParserFn
|
||||
abbrev TrailingTermParser : Type := AbsParser (EnvParserFn Syntax TermParserFn)
|
||||
abbrev TrailingTermParserFn : Type := EnvParserFn Syntax TermParserFn
|
||||
abbrev TrailingTermParser : Type := AbsParser TrailingTermParserFn
|
||||
|
||||
structure TermParsingTables :=
|
||||
(leadingTermParsers : TokenMap TermParserFn)
|
||||
(trailingTermParsers : TokenMap TrailingTermParserFn)
|
||||
-- local Term parsers (such as from `local notation`) hide previous parsers instead of overloading them
|
||||
(localLeadingTermParsers : TokenMap TermParserFn := RBMap.empty)
|
||||
(localTrailingTermParsers : TokenMap TrailingTermParserFn := RBMap.empty)
|
||||
|
||||
structure CommandParserConfig extends ParserConfig :=
|
||||
(leadingTermParsers : TokenMap TermParser)
|
||||
(trailingTermParsers : TokenMap TrailingTermParser)
|
||||
-- local Term parsers (such as from `local notation`) hide previous parsers instead of overloading them
|
||||
(localLeadingTermParsers : TokenMap TermParser := RBMap.empty)
|
||||
(localTrailingTermParsers : TokenMap TrailingTermParser := RBMap.empty)
|
||||
(pTables : TermParsingTables)
|
||||
|
||||
abbrev CommandParser : Type := AbsParser (CmdParserFn CommandParserConfig)
|
||||
abbrev CommandParserFn : Type := CmdParserFn CommandParserConfig
|
||||
abbrev CommandParser : Type := AbsParser CommandParserFn
|
||||
|
||||
@[inline] def Term.parser (rbp : Nat := 0) : TermParser :=
|
||||
{ fn := RecParserFn.recurse rbp }
|
||||
|
|
@ -735,6 +744,17 @@ def strLitFn : BasicParserFn
|
|||
@[inline] def strLit : BasicParser :=
|
||||
{ fn := numberFn }
|
||||
|
||||
def identFn : BasicParserFn
|
||||
| cfg s d :=
|
||||
let d := tokenFn cfg s d in
|
||||
if d.hasError || !(d.stxStack.back.isIdent) then
|
||||
d.mkError "expected identifier"
|
||||
else
|
||||
d
|
||||
|
||||
@[inline] def ident : BasicParser :=
|
||||
{ fn := identFn }
|
||||
|
||||
instance string2basic : HasCoe String BasicParser :=
|
||||
⟨symbol⟩
|
||||
|
||||
|
|
@ -754,5 +774,119 @@ if d.hasError then
|
|||
else
|
||||
Except.ok d.stxStack.back
|
||||
|
||||
-- Helper function for testing (non-recursive) term parsers
|
||||
def TermParser.test (p : TermParser) (input : String) (filename : String := "<input>") : Except String Syntax :=
|
||||
let frontendCfg := mkFrontendConfig filename input in
|
||||
let tokens := p.info.updateTokens {} in
|
||||
let cfg : ParserConfig := { tokens := tokens, .. frontendCfg } in
|
||||
let d : ParserData := { stxStack := Array.empty, pos := 0, cache := {}, errorMsg := none } in
|
||||
let dummyCmdParser : Unit → ParserFn := λ _ _ d, d.mkError "no command parser" in
|
||||
let dummyTermParser : ℕ → CmdParserFn ParserConfig := λ _ _ _ _ d, d.mkError "no term parser" in
|
||||
let d := p.fn dummyTermParser cfg dummyCmdParser input d in
|
||||
if d.hasError then
|
||||
Except.error (d.toErrorMsg cfg)
|
||||
else
|
||||
Except.ok d.stxStack.back
|
||||
|
||||
def ParserData.keepNewError (d : ParserData) (oldStackSize : Nat) : ParserData :=
|
||||
match d with
|
||||
| ⟨stack, pos, cache, err⟩ := ⟨stack.shrink oldStackSize, pos, cache, err⟩
|
||||
|
||||
def ParserData.keepPrevError (d : ParserData) (oldStackSize : Nat) (oldStopPos : String.Pos) (oldError : Option String) : ParserData :=
|
||||
match d with
|
||||
| ⟨stack, _, cache, _⟩ := ⟨stack.shrink oldStackSize, oldStopPos, cache, oldError⟩
|
||||
|
||||
def ParserData.mergeErrors (d : ParserData) (oldStackSize : Nat) (oldError : String) : ParserData :=
|
||||
match d with
|
||||
| ⟨stack, pos, cache, some err⟩ := ⟨stack.shrink oldStackSize, pos, cache, some (err ++ "; " ++ oldError)⟩
|
||||
| other := other
|
||||
|
||||
def ParserData.mkLongestNodeAlt (d : ParserData) (startSize : Nat) : ParserData :=
|
||||
match d with
|
||||
| ⟨stack, pos, cache, _⟩ :=
|
||||
if stack.size == startSize then ⟨stack.push Syntax.missing, pos, cache, none⟩ -- parser did not create any node, then we just add `Syntax.missing`
|
||||
else if stack.size == startSize + 1 then d
|
||||
else
|
||||
-- parser created more than one node, combine them into a single node
|
||||
let node := Syntax.node nullKind (stack.extract startSize stack.size) [] in
|
||||
let stack := stack.shrink startSize in
|
||||
⟨stack.push node, pos, cache, none⟩
|
||||
|
||||
def ParserData.keepLatest (d : ParserData) (startStackSize : Nat) : ParserData :=
|
||||
match d with
|
||||
| ⟨stack, pos, cache, _⟩ :=
|
||||
let node := stack.back in
|
||||
let stack := stack.shrink startStackSize in
|
||||
let stack := stack.push node in
|
||||
⟨stack, pos, cache, none⟩
|
||||
|
||||
def ParserData.replaceLongest (d : ParserData) (startStackSize : Nat) (prevStackSize : Nat) : ParserData :=
|
||||
let d := d.mkLongestNodeAlt prevStackSize in
|
||||
d.keepLatest startStackSize
|
||||
|
||||
def longestMatchFnAux (startSize : Nat) (startPos : String.Pos) : List TermParserFn → TermParserFn
|
||||
| [] := λ _ _ _ _ d, if !d.hasError && d.stackSize > startSize + 1 then d.mkNode choiceKind startSize else d
|
||||
| (p::ps) := λ tp cfg cp s d,
|
||||
let prevErrorMsg := d.errorMsg in
|
||||
let prevStopPos := d.pos in
|
||||
let prevSize := d.stackSize in
|
||||
let d := d.restore prevSize startPos in
|
||||
let d := p tp cfg cp s d in
|
||||
match prevErrorMsg, d.errorMsg with
|
||||
| none, none := -- both succeeded
|
||||
let d :=
|
||||
if d.pos > prevStopPos then d.replaceLongest startSize prevSize -- replace
|
||||
else if d.pos < prevStopPos then d.restore prevSize prevStopPos -- keep prev
|
||||
else d.mkLongestNodeAlt prevSize in -- keep both
|
||||
longestMatchFnAux ps tp cfg cp s d
|
||||
| none, some _ := -- prev succeeded, current failed
|
||||
let d := d.restore prevSize prevStopPos in
|
||||
longestMatchFnAux ps tp cfg cp s d
|
||||
| some oldError, some _ := -- both failed
|
||||
let d :=
|
||||
if d.pos > prevStopPos then d.keepNewError prevSize
|
||||
else if d.pos < prevStopPos then d.keepPrevError prevSize prevStopPos prevErrorMsg
|
||||
else d.mergeErrors prevSize oldError in
|
||||
longestMatchFnAux ps tp cfg cp s d
|
||||
| some _, none := -- prev failed, current succeeded
|
||||
let d := d.mkLongestNodeAlt startSize in
|
||||
longestMatchFnAux ps tp cfg cp s d
|
||||
|
||||
def longestMatchFn : List TermParserFn → TermParserFn
|
||||
| [] := λ _ _ _ _ d, d.mkError "longest match: empty list"
|
||||
| [p] := λ tp cfg cp s d,
|
||||
let startSize := d.stackSize in
|
||||
let d := p tp cfg cp s d in
|
||||
if d.hasError then d else d.mkLongestNodeAlt startSize
|
||||
| (p::ps) := λ tp cfg cp s d,
|
||||
let startSize := d.stackSize in
|
||||
let startPos := d.pos in
|
||||
let d := p tp cfg cp s d in
|
||||
if d.hasError then
|
||||
let d := d.shrinkStack startSize in
|
||||
longestMatchFnAux startSize startPos ps tp cfg cp s d
|
||||
else
|
||||
let d := d.mkLongestNodeAlt startSize in
|
||||
longestMatchFnAux startSize startPos ps tp cfg cp s d
|
||||
|
||||
-- Helper function for testing longestMatchFn, we don't use this function directly
|
||||
def longestMatch (ps : List TermParser) : TermParser :=
|
||||
{ info := { updateTokens := λ trie, ps.foldl (λ trie p, p.info.updateTokens trie) trie,
|
||||
firstTokens := ps.foldl (λ tks p, p.info.firstTokens ++ tks) [] },
|
||||
fn := longestMatchFn (ps.map (λ p, p.fn)) }
|
||||
|
||||
-- Stopped here
|
||||
|
||||
@[noinline] def termPrattParser (tbl : TermParsingTables) (rbp : Nat) : TermParserFn :=
|
||||
λ g, g 0 -- TODO(Leo)
|
||||
|
||||
@[specialize] def TermParserFn.run (p : TermParserFn) : CommandParserFn
|
||||
| cfg r s d :=
|
||||
let p := RecParserFn.run p (termPrattParser cfg.pTables) in
|
||||
p cfg.toParserConfig r s d
|
||||
|
||||
def parseExpr (rbp : Nat) : CommandParserFn :=
|
||||
TermParserFn.run (RecParserFn.recurse rbp)
|
||||
|
||||
end Parser
|
||||
-- end Lean
|
||||
|
|
|
|||
|
|
@ -131,6 +131,10 @@ def MacroScopes.flip : MacroScopes → MacroScopes → MacroScopes
|
|||
| [] := [x]
|
||||
|
||||
namespace Syntax
|
||||
def isIdent : Syntax → Bool
|
||||
| (Syntax.ident _ _ _ _ _) := true
|
||||
| _ := false
|
||||
|
||||
def isOfKind : Syntax → SyntaxNodeKind → Bool
|
||||
| (Syntax.node kind _ _) k := k == kind
|
||||
| other _ := false
|
||||
|
|
|
|||
|
|
@ -9,32 +9,77 @@ def mkNumPairKind : IO SyntaxNodeKind := nextKind `numPair
|
|||
def mkNumSetKind : IO SyntaxNodeKind := nextKind `numSet
|
||||
@[init mkNumSetKind] constant numSetKind : SyntaxNodeKind := default _
|
||||
|
||||
def mkParenIdentKind : IO SyntaxNodeKind := nextKind `parenIdent
|
||||
@[init mkParenIdentKind] constant parenIdentKind : SyntaxNodeKind := default _
|
||||
|
||||
def mkQualifiedKind : IO SyntaxNodeKind := nextKind `qualified
|
||||
@[init mkQualifiedKind] constant qualifiedKind : SyntaxNodeKind := default _
|
||||
|
||||
def mkSeqKind : IO SyntaxNodeKind := nextKind `seq
|
||||
@[init mkSeqKind] constant seqKind : SyntaxNodeKind := default _
|
||||
|
||||
def mkSetCompKind : IO SyntaxNodeKind := nextKind `setComp
|
||||
@[init mkSetCompKind] constant setCompKind : SyntaxNodeKind := default _
|
||||
|
||||
-- set_option pp.implicit true
|
||||
-- set_option trace.compiler.boxed true
|
||||
|
||||
def numPair : BasicParser :=
|
||||
def numPairP : TermParser :=
|
||||
node numPairKind $
|
||||
"("; number; ","; number; ")"
|
||||
|
||||
def numSet : BasicParser :=
|
||||
def numSetP : TermParser :=
|
||||
node numSetKind $
|
||||
"{"; sepBy number ","; "}"
|
||||
|
||||
def testParser : BasicParser :=
|
||||
many (numPair || numSet)
|
||||
def testParser1 : TermParser :=
|
||||
many (numPairP || numSetP)
|
||||
|
||||
@[noinline] def test (p : BasicParser) (s : String) : IO Unit :=
|
||||
match p.run s with
|
||||
def parenIdentP : TermParser :=
|
||||
node parenIdentKind $
|
||||
"("; ident; ")"
|
||||
|
||||
def qualifiedP : TermParser :=
|
||||
node qualifiedKind $
|
||||
"("; ident; ":"; ident; ")"
|
||||
|
||||
def seqP : TermParser :=
|
||||
node seqKind $
|
||||
"("; sepBy ident ":"; ")"
|
||||
|
||||
def setCompP : TermParser :=
|
||||
node setCompKind $
|
||||
"{"; ident; ":"; ident; "|"; ident; "}"
|
||||
|
||||
def testParser2 : TermParser :=
|
||||
many (longestMatch [qualifiedP, parenIdentP, seqP, setCompP])
|
||||
|
||||
@[noinline] def test (p : TermParser) (s : String) : IO Unit :=
|
||||
match p.test s with
|
||||
| Except.error msg := IO.println msg
|
||||
| Except.ok stx := IO.println "ok" -- IO.println stx
|
||||
| Except.ok stx := IO.println stx -- IO.println "ok"
|
||||
|
||||
def mkNumPairString : Nat → String → String
|
||||
def mkTestString1 : Nat → String → String
|
||||
| 0 s := s
|
||||
| (n+1) s := mkNumPairString n $
|
||||
| (n+1) s := mkTestString1 n $
|
||||
s ++ "{} /- /- comment2 -/ -/ "
|
||||
++ "{" ++ toString n ++ "," ++ toString (n+1) ++ ", " ++ toString (n+2) ++ "}"
|
||||
++ "(" ++ toString n ++ ", " ++ toString n ++ ") -- comment\n"
|
||||
|
||||
def test1 (n : Nat) : IO Unit :=
|
||||
let s := mkTestString1 n "" in
|
||||
test testParser1 s
|
||||
|
||||
def mkTestString2 : Nat → String → String
|
||||
| 0 s := s
|
||||
| (n+1) s := mkTestString2 n $
|
||||
s ++ "(xyz) /- /- comment2 -/ -/ "
|
||||
++ "(x:y:z) -- comment1 \n"
|
||||
++ "(x:y)"
|
||||
|
||||
def test2 (n : Nat) : IO Unit :=
|
||||
let s := mkTestString2 n "" in
|
||||
test testParser2 s
|
||||
|
||||
def main (xs : List String) : IO Unit :=
|
||||
let s := mkNumPairString xs.head.toNat "" in
|
||||
test testParser s
|
||||
test2 xs.head.toNat
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue