import init.lean.message init.lean.parser.syntax init.lean.parser.trie init.lean.parser.basic init.lean.parser.stringliteral import init.lean.parser.token namespace Lean namespace flatParser open String open Parser (Syntax Syntax.missing) open Parser (Trie TokenMap) abbreviation pos := String.Pos /-- A precomputed cache for quickly mapping Char offsets to positions. -/ structure FileMap := (offsets : Array Nat) (lines : Array Nat) namespace FileMap private def fromStringAux (s : String) : Nat → Nat → Nat → pos → Array Nat → Array Nat → FileMap | 0 offset line i offsets lines := ⟨offsets.push offset, lines.push line⟩ | (k+1) offset line i offsets lines := if s.atEnd i then ⟨offsets.push offset, lines.push line⟩ else let c := s.get i in let i := s.next i in let offset := offset + 1 in if c = '\n' then fromStringAux k offset (line+1) i (offsets.push offset) (lines.push (line+1)) else fromStringAux k offset line i offsets lines def fromString (s : String) : FileMap := fromStringAux s s.length 0 1 0 (Array.empty.push 0) (Array.empty.push 1) /- Remark: `offset is in [(offsets.get b), (offsets.get e)]` and `b < e` -/ private def toPositionAux (offsets : Array Nat) (lines : Array Nat) (offset : Nat) : Nat → Nat → Nat → Position | 0 b e := ⟨offset, 1⟩ -- unreachable | (k+1) b e := let offsetB := offsets.get b in if e = b + 1 then ⟨offset - offsetB, lines.get b⟩ else let m := (b + e) / 2 in let offsetM := offsets.get m in if offset = offsetM then ⟨0, lines.get m⟩ else if offset > offsetM then toPositionAux k m e else toPositionAux k b m def toPosition : FileMap → Nat → Position | ⟨offsets, lines⟩ offset := toPositionAux offsets lines offset offsets.size 0 (offsets.size-1) end FileMap structure TokenConfig := («prefix» : String) (lbp : Nat := 0) structure FrontendConfig := (filename : String) (input : String) (FileMap : FileMap) /- Remark: if we have a Node in the Trie with `some TokenConfig`, the String induced by the path is equal to the `TokenConfig.prefix`. -/ structure ParserConfig extends FrontendConfig := (tokens : Trie TokenConfig) -- Backtrackable State structure ParserState := (messages : MessageLog) structure TokenCacheEntry := (startPos stopPos : pos) (tk : Syntax) -- Non-backtrackable State structure ParserCache := (tokenCache : Option TokenCacheEntry := none) inductive Result (α : Type) | ok (a : α) (i : pos) (cache : ParserCache) (State : ParserState) (eps : Bool) : Result | error {} (msg : String) (i : pos) (cache : ParserCache) (stx : Syntax) (eps : Bool) : Result inductive Result.IsOk {α : Type} : Result α → Prop | mk (a : α) (i : pos) (cache : ParserCache) (State : ParserState) (eps : Bool) : Result.IsOk (Result.ok a i cache State eps) theorem errorIsNotOk {α : Type} {msg : String} {i : pos} {cache : ParserCache} {stx : Syntax} {eps : Bool} (h : Result.IsOk (@Result.error α msg i cache stx eps)) : False := match h with end @[inline] def unreachableError {α β : Type} {msg : String} {i : pos} {cache : ParserCache} {stx : Syntax} {eps : Bool} (h : Result.IsOk (@Result.error α msg i cache stx eps)) : β := False.elim (errorIsNotOk h) def resultOk := {r : Result Unit // r.IsOk} @[inline] def mkResultOk (i : pos) (cache : ParserCache) (State : ParserState) (eps := true) : resultOk := ⟨Result.ok () i cache State eps, Result.IsOk.mk _ _ _ _ _⟩ def parserCoreM (α : Type) := ParserConfig → resultOk → Result α abbreviation parserCore := parserCoreM Syntax structure recParsers := (cmdParser : parserCore) (termParser : Nat → parserCore) def parserM (α : Type) := recParsers → parserCoreM α abbreviation Parser := parserM Syntax abbreviation trailingParser := Syntax → Parser @[inline] def command.Parser : Parser := λ ps, ps.cmdParser @[inline] def Term.Parser (rbp : Nat := 0) : Parser := λ ps, ps.termParser rbp @[inline] def parserM.pure {α : Type} (a : α) : parserM α := λ _ _ r, match r with | ⟨Result.ok _ it c s _, h⟩ := Result.ok a it c s true | ⟨Result.error _ _ _ _ _, h⟩ := unreachableError h @[inline_if_reduce] def eagerOr (b₁ b₂ : Bool) := b₁ || b₂ @[inline_if_reduce] def eagerAnd (b₁ b₂ : Bool) := b₁ && b₂ @[inline] def parserM.bind {α β : Type} (x : parserM α) (f : α → parserM β) : parserM β := λ ps cfg r, match x ps cfg r with | Result.ok a i c s e₁ := (match f a ps cfg (mkResultOk i c s) with | Result.ok b i c s e₂ := Result.ok b i c s (eagerAnd e₁ e₂) | Result.error msg i c stx e₂ := Result.error msg i c stx (eagerAnd e₁ e₂)) | Result.error msg i c stx e := Result.error msg i c stx e instance : Monad parserM := {pure := @parserM.pure, bind := @parserM.bind} @[inline] protected def orelse {α : Type} (p q : parserM α) : parserM α := λ ps cfg r, match r with | ⟨Result.ok _ i₁ _ s₁ _, _⟩ := (match p ps cfg r with | Result.error msg₁ i₂ c₂ stx₁ true := q ps cfg (mkResultOk i₁ c₂ s₁) | other := other) | ⟨Result.error _ _ _ _ _, h⟩ := unreachableError h @[inline] protected def failure {α : Type} : parserM α := λ _ _ r, match r with | ⟨Result.ok _ i c s _, h⟩ := Result.error "failure" i c Syntax.missing true | ⟨Result.error _ _ _ _ _, h⟩ := unreachableError h instance : Alternative parserM := { orelse := @flatParser.orelse, failure := @flatParser.failure, ..flatParser.Monad } def setSilentError {α : Type} : Result α → Result α | (Result.error i c msg stx _) := Result.error i c msg stx true | other := other /-- `try p` behaves like `p`, but it pretends `p` hasn't consumed any input when `p` fails. -/ @[inline] def try {α : Type} (p : parserM α) : parserM α := λ ps cfg r, setSilentError (p ps cfg r) @[inline] def atEnd (cfg : ParserConfig) (i : pos) : Bool := cfg.input.atEnd i @[inline] def curr (cfg : ParserConfig) (i : pos) : Char := cfg.input.get i @[inline] def next (cfg : ParserConfig) (i : pos) : pos := cfg.input.next i @[inline] def inputSize (cfg : ParserConfig) : Nat := cfg.input.length @[inline] def currPos : resultOk → pos | ⟨Result.ok _ i _ _ _, _⟩ := i | ⟨Result.error _ _ _ _ _, h⟩ := unreachableError h @[inline] def currState : resultOk → ParserState | ⟨Result.ok _ _ _ s _, _⟩ := s | ⟨Result.error _ _ _ _ _, h⟩ := unreachableError h def mkError {α : Type} (r : resultOk) (msg : String) (stx : Syntax := Syntax.missing) (eps := true) : Result α := match r with | ⟨Result.ok _ i c s _, _⟩ := Result.error msg i c stx eps | ⟨Result.error _ _ _ _ _, h⟩ := unreachableError h @[inline] def satisfy (p : Char → Bool) : parserM Char := λ _ cfg r, match r with | ⟨Result.ok _ i ch st e, _⟩ := if atEnd cfg i then mkError r "end of input" else let c := curr cfg i in if p c then Result.ok c (next cfg i) ch st false else mkError r "unexpected character" | ⟨Result.error _ _ _ _ _, h⟩ := unreachableError h def any : parserM Char := satisfy (λ _, true) @[specialize] def takeUntilAux (p : Char → Bool) (cfg : ParserConfig) : Nat → resultOk → Result Unit | 0 r := r.val | (n+1) r := match r with | ⟨Result.ok _ i ch st e, _⟩ := if atEnd cfg i then r.val else let c := curr cfg i in if p c then r.val else takeUntilAux n (mkResultOk (next cfg i) ch st true) | ⟨Result.error _ _ _ _ _, h⟩ := unreachableError h @[specialize] def takeUntil (p : Char → Bool) : parserM Unit := λ ps cfg r, takeUntilAux p cfg (inputSize cfg) r def takeUntilNewLine : parserM Unit := takeUntil (= '\n') def whitespace : parserM Unit := takeUntil (λ c, !c.isWhitespace) -- setOption Trace.Compiler.boxed True --- setOption pp.implicit True def strAux (cfg : ParserConfig) (str : String) (error : String) : Nat → resultOk → pos → Result Unit | 0 r j := mkError r error | (n+1) r j := if str.atEnd j then r.val else match r with | ⟨Result.ok _ i ch st e, _⟩ := if atEnd cfg i then Result.error error i ch Syntax.missing true else if curr cfg i = str.get j then strAux n (mkResultOk (next cfg i) ch st true) (str.next j) else Result.error error i ch Syntax.missing true | ⟨Result.error _ _ _ _ _, h⟩ := unreachableError h -- #exit @[inline] def str (s : String) : parserM Unit := λ ps cfg r, strAux cfg s ("expected " ++ repr s) (inputSize cfg) r 0 @[specialize] def manyAux (p : parserM Unit) : Nat → Bool → parserM Unit | 0 fst := pure () | (k+1) fst := λ ps cfg r, let i₀ := currPos r in let s₀ := currState r in match p ps cfg r with | Result.ok a i c s _ := manyAux k false ps cfg (mkResultOk i c s) | Result.error _ _ c _ _ := Result.ok () i₀ c s₀ fst @[inline] def many (p : parserM Unit) : parserM Unit := λ ps cfg r, manyAux p (inputSize cfg) true ps cfg r @[inline] def many1 (p : parserM Unit) : parserM Unit := p *> many p def dummyParserCore : parserCore := λ cfg r, mkError r "dummy" def testParser {α : Type} (x : parserM α) (input : String) : String := let r := x { cmdParser := dummyParserCore, termParser := λ _, dummyParserCore } { filename := "test", input := input, FileMap := FileMap.fromString input, tokens := Lean.Parser.Trie.empty } (mkResultOk 0 {} {messages := MessageLog.empty}) in match r with | Result.ok _ i _ _ _ := "Ok at " ++ toString i | Result.error msg i _ _ _ := "Error at " ++ toString i ++ ": " ++ msg /- mutual def recCmd, recTerm (parseCmd : Parser) (parseTerm : Nat → Parser) (parseLvl : Nat → parserCore) with recCmd : Nat → parserCore | 0 cfg r := mkError r "Parser: no progress" | (n+1) cfg r := parseCmd ⟨recCmd n, parseLvl, recTerm n⟩ cfg r with recTerm : Nat → Nat → parserCore | 0 rbp cfg r := mkError r "Parser: no progress" | (n+1) rbp cfg r := parseTerm rbp ⟨recCmd n, parseLvl, recTerm n⟩ cfg r -/ /- def runParser (x : Parser) (parseCmd : Parser) (parseLvl : Nat → Parser) (parseTerm : Nat → Parser) (input : Iterator) (cfg : ParserConfig) : Result Syntax := let it := input in let n := it.remaining in let r := mkResultOk it {} {messages := MessageLog.Empty} in let pl := recLvl (parseLvl) n in let ps : recParsers := { cmdParser := recCmd parseCmd parseTerm pl n, lvlParser := pl, termParser := recTerm parseCmd parseTerm pl n } in x ps cfg r -/ structure parsingTables := (leadingTermParsers : TokenMap Parser) (trailingTermParsers : TokenMap trailingParser) abbreviation CommandParserM (α : Type) := parsingTables → parserM α end flatParser end Lean section open Lean.flatParser def flatP : parserM Unit := many1 (str "++" <|> str "**" <|> (str "--" *> takeUntil (= '\n') *> any *> pure ())) end section open Lean.Parser open Lean.Parser.MonadParsec @[reducible] def Parser (α : Type) : Type := ReaderT Lean.flatParser.recParsers (ReaderT Lean.flatParser.ParserConfig (ParsecT Syntax (StateT ParserCache Id))) α def testParsec (p : Parser Unit) (input : String) : String := let ps : Lean.flatParser.recParsers := { cmdParser := Lean.flatParser.dummyParserCore, termParser := λ _, Lean.flatParser.dummyParserCore } in let cfg : Lean.flatParser.ParserConfig := { filename := "test", input := input, FileMap := Lean.flatParser.FileMap.fromString input, tokens := Lean.Parser.Trie.empty } in let r := p ps cfg input.mkOldIterator {} in match r with | (Parsec.Result.ok _ it _, _) := "OK at " ++ toString it.offset | (Parsec.Result.error msg _, _) := "Error " ++ msg.toString @[inline] def str' (s : String) : Parser Unit := str s *> pure () def parsecP : Parser Unit := many1' (str' "++" <|> str' "**" <|> (str "--" *> takeUntil (λ c, c = '\n') *> any *> pure ())) def parsecP2 : Parser Unit := many1' ((parseStringLiteral *> whitespace *> pure ()) <|> (str "--" *> takeUntil (λ c, c = '\n') *> any *> pure ())) end namespace BasicParser open Lean.Parser open Lean.Parser.MonadParsec def testBasicParser (p : BasicParserM Unit) (input : String) : String := let cfg : Lean.Parser.ParserConfig := { filename := "test", input := input, fileMap := { lines := {} }, tokens := Lean.Parser.Trie.empty } in let r := p cfg input.mkOldIterator {} in match r with | (Parsec.Result.ok _ it _, _) := "OK at " ++ toString it.offset | (Parsec.Result.error msg _, _) := "Error " ++ msg.toString @[inline] def str' (s : String) : BasicParserM Unit := str s *> pure () def parserP : BasicParserM Unit := many1' (str' "++" <|> str' "**" <|> (str "--" *> takeUntil (λ c, c = '\n') *> any *> pure ())) def parser2 : BasicParserM Unit := many1' ((parseStringLiteral *> Lean.Parser.MonadParsec.whitespace *> pure ()) <|> (str "--" *> takeUntil (λ c, c = '\n') *> any *> pure ())) def parser3 : BasicParserM Unit := Lean.Parser.whitespace end BasicParser def mkBigString : Nat → String → String | 0 s := s | (n+1) s := mkBigString n (s ++ "-- new comment\n") def mkBigString2 : Nat → String → String | 0 s := s | (n+1) s := mkBigString2 n (s ++ "\"hello\\nworld\"\n-- comment\n") def mkBigString3 : Nat → String → String | 0 s := s | (n+1) s := mkBigString3 n (s ++ "/- /- comment 1 -/ -/ \n -- comment 2 \n \t \n ") @[noinline] def testFlatP (s : String) : IO Unit := IO.println (Lean.flatParser.testParser flatP s) @[noinline] def testParsecP (p : Parser Unit) (s : String) : IO Unit := IO.println (testParsec p s) @[noinline] def testBasicParser (p : Lean.Parser.BasicParserM Unit) (s : String) : IO Unit := IO.println (BasicParser.testBasicParser p s) @[noinline] def prof {α : Type} (msg : String) (p : IO α) : IO α := let msg₁ := "Time for '" ++ msg ++ "':" in let msg₂ := "Memory usage for '" ++ msg ++ "':" in allocprof msg₂ (timeit msg₁ p) def main (xs : List String) : IO Unit := -- let s₁ := mkBigString xs.head.toNat "" in -- let s₂ := s₁ ++ "bad" ++ mkBigString 20 "" in -- let s₃ := mkBigString2 xs.head.toNat "" in let s₄ := mkBigString3 xs.head.toNat "" in -- prof "flat Parser 1" (testFlatP s₁) *> -- prof "flat Parser 2" (testFlatP s₂) *> -- prof "Parsec 1" (testParsecP parsecP s₁) *> -- prof "Parsec 2" (testParsecP parsecP s₂) *> -- prof "Parsec 3" (testParsecP parsecP2 s₃) *> prof "Basic parser 1" (testBasicParser BasicParser.parser3 s₄)