test(tests/playground/parser): add longestMatch and other helper functions

This commit is contained in:
Leonardo de Moura 2019-04-23 17:29:38 -07:00
parent 3d8c3d5789
commit 5188adc685
3 changed files with 201 additions and 18 deletions

View file

@ -79,6 +79,10 @@ def ParserData.pushSyntax (d : ParserData) (n : Syntax) : ParserData :=
match d with
| ⟨stack, pos, cache, msg⟩ := ⟨stack.push n, pos, cache, msg⟩
def ParserData.shrinkStack (d : ParserData) (iniStackSz : Nat) : ParserData :=
match d with
| ⟨stack, pos, cache, msg⟩ := ⟨stack.shrink iniStackSz, pos, cache, msg⟩
def ParserData.next (d : ParserData) (s : String) (pos : Nat) : ParserData :=
d.setPos (s.next pos)
@ -604,16 +608,21 @@ abbrev BasicParser : Type := AbsParser BasicParserFn
abbrev CmdParserFn (ρ : Type) : Type := EnvParserFn ρ (RecParserFn Unit ParserFn)
abbrev TermParserFn : Type := RecParserFn Nat (CmdParserFn ParserConfig)
abbrev TermParser : Type := AbsParser TermParserFn
abbrev TrailingTermParser : Type := AbsParser (EnvParserFn Syntax TermParserFn)
abbrev TrailingTermParserFn : Type := EnvParserFn Syntax TermParserFn
abbrev TrailingTermParser : Type := AbsParser TrailingTermParserFn
structure TermParsingTables :=
(leadingTermParsers : TokenMap TermParserFn)
(trailingTermParsers : TokenMap TrailingTermParserFn)
-- local Term parsers (such as from `local notation`) hide previous parsers instead of overloading them
(localLeadingTermParsers : TokenMap TermParserFn := RBMap.empty)
(localTrailingTermParsers : TokenMap TrailingTermParserFn := RBMap.empty)
structure CommandParserConfig extends ParserConfig :=
(leadingTermParsers : TokenMap TermParser)
(trailingTermParsers : TokenMap TrailingTermParser)
-- local Term parsers (such as from `local notation`) hide previous parsers instead of overloading them
(localLeadingTermParsers : TokenMap TermParser := RBMap.empty)
(localTrailingTermParsers : TokenMap TrailingTermParser := RBMap.empty)
(pTables : TermParsingTables)
abbrev CommandParser : Type := AbsParser (CmdParserFn CommandParserConfig)
abbrev CommandParserFn : Type := CmdParserFn CommandParserConfig
abbrev CommandParser : Type := AbsParser CommandParserFn
@[inline] def Term.parser (rbp : Nat := 0) : TermParser :=
{ fn := RecParserFn.recurse rbp }
@ -735,6 +744,17 @@ def strLitFn : BasicParserFn
@[inline] def strLit : BasicParser :=
{ fn := numberFn }
def identFn : BasicParserFn
| cfg s d :=
let d := tokenFn cfg s d in
if d.hasError || !(d.stxStack.back.isIdent) then
d.mkError "expected identifier"
else
d
@[inline] def ident : BasicParser :=
{ fn := identFn }
instance string2basic : HasCoe String BasicParser :=
⟨symbol⟩
@ -754,5 +774,119 @@ if d.hasError then
else
Except.ok d.stxStack.back
-- Helper function for testing (non-recursive) term parsers
def TermParser.test (p : TermParser) (input : String) (filename : String := "<input>") : Except String Syntax :=
let frontendCfg := mkFrontendConfig filename input in
let tokens := p.info.updateTokens {} in
let cfg : ParserConfig := { tokens := tokens, .. frontendCfg } in
let d : ParserData := { stxStack := Array.empty, pos := 0, cache := {}, errorMsg := none } in
let dummyCmdParser : Unit → ParserFn := λ _ _ d, d.mkError "no command parser" in
let dummyTermParser : → CmdParserFn ParserConfig := λ _ _ _ _ d, d.mkError "no term parser" in
let d := p.fn dummyTermParser cfg dummyCmdParser input d in
if d.hasError then
Except.error (d.toErrorMsg cfg)
else
Except.ok d.stxStack.back
def ParserData.keepNewError (d : ParserData) (oldStackSize : Nat) : ParserData :=
match d with
| ⟨stack, pos, cache, err⟩ := ⟨stack.shrink oldStackSize, pos, cache, err⟩
def ParserData.keepPrevError (d : ParserData) (oldStackSize : Nat) (oldStopPos : String.Pos) (oldError : Option String) : ParserData :=
match d with
| ⟨stack, _, cache, _⟩ := ⟨stack.shrink oldStackSize, oldStopPos, cache, oldError⟩
def ParserData.mergeErrors (d : ParserData) (oldStackSize : Nat) (oldError : String) : ParserData :=
match d with
| ⟨stack, pos, cache, some err⟩ := ⟨stack.shrink oldStackSize, pos, cache, some (err ++ "; " ++ oldError)⟩
| other := other
def ParserData.mkLongestNodeAlt (d : ParserData) (startSize : Nat) : ParserData :=
match d with
| ⟨stack, pos, cache, _⟩ :=
if stack.size == startSize then ⟨stack.push Syntax.missing, pos, cache, none⟩ -- parser did not create any node, then we just add `Syntax.missing`
else if stack.size == startSize + 1 then d
else
-- parser created more than one node, combine them into a single node
let node := Syntax.node nullKind (stack.extract startSize stack.size) [] in
let stack := stack.shrink startSize in
⟨stack.push node, pos, cache, none⟩
def ParserData.keepLatest (d : ParserData) (startStackSize : Nat) : ParserData :=
match d with
| ⟨stack, pos, cache, _⟩ :=
let node := stack.back in
let stack := stack.shrink startStackSize in
let stack := stack.push node in
⟨stack, pos, cache, none⟩
def ParserData.replaceLongest (d : ParserData) (startStackSize : Nat) (prevStackSize : Nat) : ParserData :=
let d := d.mkLongestNodeAlt prevStackSize in
d.keepLatest startStackSize
def longestMatchFnAux (startSize : Nat) (startPos : String.Pos) : List TermParserFn → TermParserFn
| [] := λ _ _ _ _ d, if !d.hasError && d.stackSize > startSize + 1 then d.mkNode choiceKind startSize else d
| (p::ps) := λ tp cfg cp s d,
let prevErrorMsg := d.errorMsg in
let prevStopPos := d.pos in
let prevSize := d.stackSize in
let d := d.restore prevSize startPos in
let d := p tp cfg cp s d in
match prevErrorMsg, d.errorMsg with
| none, none := -- both succeeded
let d :=
if d.pos > prevStopPos then d.replaceLongest startSize prevSize -- replace
else if d.pos < prevStopPos then d.restore prevSize prevStopPos -- keep prev
else d.mkLongestNodeAlt prevSize in -- keep both
longestMatchFnAux ps tp cfg cp s d
| none, some _ := -- prev succeeded, current failed
let d := d.restore prevSize prevStopPos in
longestMatchFnAux ps tp cfg cp s d
| some oldError, some _ := -- both failed
let d :=
if d.pos > prevStopPos then d.keepNewError prevSize
else if d.pos < prevStopPos then d.keepPrevError prevSize prevStopPos prevErrorMsg
else d.mergeErrors prevSize oldError in
longestMatchFnAux ps tp cfg cp s d
| some _, none := -- prev failed, current succeeded
let d := d.mkLongestNodeAlt startSize in
longestMatchFnAux ps tp cfg cp s d
def longestMatchFn : List TermParserFn → TermParserFn
| [] := λ _ _ _ _ d, d.mkError "longest match: empty list"
| [p] := λ tp cfg cp s d,
let startSize := d.stackSize in
let d := p tp cfg cp s d in
if d.hasError then d else d.mkLongestNodeAlt startSize
| (p::ps) := λ tp cfg cp s d,
let startSize := d.stackSize in
let startPos := d.pos in
let d := p tp cfg cp s d in
if d.hasError then
let d := d.shrinkStack startSize in
longestMatchFnAux startSize startPos ps tp cfg cp s d
else
let d := d.mkLongestNodeAlt startSize in
longestMatchFnAux startSize startPos ps tp cfg cp s d
-- Helper function for testing longestMatchFn, we don't use this function directly
def longestMatch (ps : List TermParser) : TermParser :=
{ info := { updateTokens := λ trie, ps.foldl (λ trie p, p.info.updateTokens trie) trie,
firstTokens := ps.foldl (λ tks p, p.info.firstTokens ++ tks) [] },
fn := longestMatchFn (ps.map (λ p, p.fn)) }
-- Stopped here
@[noinline] def termPrattParser (tbl : TermParsingTables) (rbp : Nat) : TermParserFn :=
λ g, g 0 -- TODO(Leo)
@[specialize] def TermParserFn.run (p : TermParserFn) : CommandParserFn
| cfg r s d :=
let p := RecParserFn.run p (termPrattParser cfg.pTables) in
p cfg.toParserConfig r s d
def parseExpr (rbp : Nat) : CommandParserFn :=
TermParserFn.run (RecParserFn.recurse rbp)
end Parser
-- end Lean

View file

@ -131,6 +131,10 @@ def MacroScopes.flip : MacroScopes → MacroScopes → MacroScopes
| [] := [x]
namespace Syntax
def isIdent : Syntax → Bool
| (Syntax.ident _ _ _ _ _) := true
| _ := false
def isOfKind : Syntax → SyntaxNodeKind → Bool
| (Syntax.node kind _ _) k := k == kind
| other _ := false

View file

@ -9,32 +9,77 @@ def mkNumPairKind : IO SyntaxNodeKind := nextKind `numPair
def mkNumSetKind : IO SyntaxNodeKind := nextKind `numSet
@[init mkNumSetKind] constant numSetKind : SyntaxNodeKind := default _
def mkParenIdentKind : IO SyntaxNodeKind := nextKind `parenIdent
@[init mkParenIdentKind] constant parenIdentKind : SyntaxNodeKind := default _
def mkQualifiedKind : IO SyntaxNodeKind := nextKind `qualified
@[init mkQualifiedKind] constant qualifiedKind : SyntaxNodeKind := default _
def mkSeqKind : IO SyntaxNodeKind := nextKind `seq
@[init mkSeqKind] constant seqKind : SyntaxNodeKind := default _
def mkSetCompKind : IO SyntaxNodeKind := nextKind `setComp
@[init mkSetCompKind] constant setCompKind : SyntaxNodeKind := default _
-- set_option pp.implicit true
-- set_option trace.compiler.boxed true
def numPair : BasicParser :=
def numPairP : TermParser :=
node numPairKind $
"("; number; ","; number; ")"
def numSet : BasicParser :=
def numSetP : TermParser :=
node numSetKind $
"{"; sepBy number ","; "}"
def testParser : BasicParser :=
many (numPair || numSet)
def testParser1 : TermParser :=
many (numPairP || numSetP)
@[noinline] def test (p : BasicParser) (s : String) : IO Unit :=
match p.run s with
def parenIdentP : TermParser :=
node parenIdentKind $
"("; ident; ")"
def qualifiedP : TermParser :=
node qualifiedKind $
"("; ident; ":"; ident; ")"
def seqP : TermParser :=
node seqKind $
"("; sepBy ident ":"; ")"
def setCompP : TermParser :=
node setCompKind $
"{"; ident; ":"; ident; "|"; ident; "}"
def testParser2 : TermParser :=
many (longestMatch [qualifiedP, parenIdentP, seqP, setCompP])
@[noinline] def test (p : TermParser) (s : String) : IO Unit :=
match p.test s with
| Except.error msg := IO.println msg
| Except.ok stx := IO.println "ok" -- IO.println stx
| Except.ok stx := IO.println stx -- IO.println "ok"
def mkNumPairString : Nat → String → String
def mkTestString1 : Nat → String → String
| 0 s := s
| (n+1) s := mkNumPairString n $
| (n+1) s := mkTestString1 n $
s ++ "{} /- /- comment2 -/ -/ "
++ "{" ++ toString n ++ "," ++ toString (n+1) ++ ", " ++ toString (n+2) ++ "}"
++ "(" ++ toString n ++ ", " ++ toString n ++ ") -- comment\n"
def test1 (n : Nat) : IO Unit :=
let s := mkTestString1 n "" in
test testParser1 s
def mkTestString2 : Nat → String → String
| 0 s := s
| (n+1) s := mkTestString2 n $
s ++ "(xyz) /- /- comment2 -/ -/ "
++ "(x:y:z) -- comment1 \n"
++ "(x:y)"
def test2 (n : Nat) : IO Unit :=
let s := mkTestString2 n "" in
test testParser2 s
def main (xs : List String) : IO Unit :=
let s := mkNumPairString xs.head.toNat "" in
test testParser s
test2 xs.head.toNat