import init.Lean.Message init.Lean.Parser.Syntax init.Lean.Parser.Trie init.Lean.Parser.basic import init.Lean.Parser.token namespace Lean namespace flatParser open String open Parser (Syntax Syntax.missing Syntax.atom Syntax.ident Syntax.rawNode number stringLit) open Parser (Trie TokenMap) def maxPrec : Nat := 1024 abbrev pos := String.utf8Pos /-- A precomputed cache for quickly mapping Char offsets to positions. -/ structure FileMap := (offsets : Array Nat) (lines : Array Nat) namespace FileMap private def fromStringAux (s : String) : Nat → Nat → Nat → pos → Array Nat → Array Nat → FileMap | 0 offset line i offsets lines := ⟨offsets.push offset, lines.push line⟩ | (k+1) offset line i offsets lines := if s.utf8AtEnd i then ⟨offsets.push offset, lines.push line⟩ else let c := s.utf8Get i in let i := s.utf8Next i in let offset := offset + 1 in if c = '\n' then fromStringAux k offset (line+1) i (offsets.push offset) (lines.push (line+1)) else fromStringAux k offset line i offsets lines def fromString (s : String) : FileMap := fromStringAux s s.length 0 1 0 (Array.nil.push 0) (Array.nil.push 1) /- Remark: `offset is in [(offsets.get b), (offsets.get e)]` and `b < e` -/ private def toPositionAux (offsets : Array Nat) (lines : Array Nat) (offset : Nat) : Nat → Nat → Nat → Position | 0 b e := ⟨offset, 1⟩ -- unreachable | (k+1) b e := let offsetB := offsets.read' b in if e = b + 1 then ⟨offset - offsetB, lines.read' b⟩ else let m := (b + e) / 2 in let offsetM := offsets.read' m in if offset = offsetM then ⟨0, lines.read' m⟩ else if offset > offsetM then toPositionAux k m e else toPositionAux k b m def toPosition : FileMap → Nat → Position | ⟨offsets, lines⟩ offset := toPositionAux offsets lines offset offsets.size 0 (offsets.size-1) end FileMap structure TokenConfig := («prefix» : String) (lbp : Nat := 0) structure FrontendConfig := (filename : String) (input : String) (FileMap : FileMap) /- Remark: if we have a Node in the Trie with `some TokenConfig`, the String induced by the path is equal to the `TokenConfig.prefix`. -/ structure ParserConfig extends FrontendConfig := (tokens : Trie TokenConfig) -- Backtrackable State structure ParserState := (messages : MessageLog) structure TokenCacheEntry := (startPos stopPos : pos) (tk : Syntax) -- Non-backtrackable State structure ParserCache := (tokenCache : Option TokenCacheEntry := none) inductive Result (α : Type) | ok (a : α) (i : pos) (cache : ParserCache) (State : ParserState) (eps : Bool) : Result | error {} (msg : String) (i : pos) (cache : ParserCache) (stx : Syntax) (eps : Bool) : Result inductive Result.IsOk {α : Type} : Result α → Prop | mk (a : α) (i : pos) (cache : ParserCache) (State : ParserState) (eps : Bool) : Result.IsOk (Result.ok a i cache State eps) theorem errorIsNotOk {α : Type} {msg : String} {i : pos} {cache : ParserCache} {stx : Syntax} {eps : Bool} (h : Result.IsOk (@Result.error α msg i cache stx eps)) : False := match h with end @[inline] def unreachableError {α β : Type} {msg : String} {i : pos} {cache : ParserCache} {stx : Syntax} {eps : Bool} (h : Result.IsOk (@Result.error α msg i cache stx eps)) : β := False.elim (errorIsNotOk h) def resultOk := {r : Result Unit // r.IsOk} @[inline] def mkResultOk (i : pos) (cache : ParserCache) (State : ParserState) (eps := tt) : resultOk := ⟨Result.ok () i cache State eps, Result.IsOk.mk _ _ _ _ _⟩ def mkError {α : Type} (r : resultOk) (msg : String) (stx : Syntax := Syntax.missing) (eps := tt) : Result α := match r with | ⟨Result.ok _ i c s _, _⟩ := Result.error msg i c stx eps | ⟨Result.error _ _ _ _ _, h⟩ := unreachableError h def parserCoreM (α : Type) := resultOk → Result α abbrev parserCore := parserCoreM Syntax @[inline] def parserCoreM.pure {α : Type} (a : α) : parserCoreM α := λ r, match r with | ⟨Result.ok _ it c s _, h⟩ := Result.ok a it c s tt | ⟨Result.error _ _ _ _ _, h⟩ := unreachableError h @[inlineIfReduce] def strictOr (b₁ b₂ : Bool) := b₁ || b₂ @[inlineIfReduce] def strictAnd (b₁ b₂ : Bool) := b₁ && b₂ @[inline] def parserCoreM.bind {α β : Type} (x : parserCoreM α) (f : α → parserCoreM β) : parserCoreM β := λ r, match x r with | Result.ok a i c s e₁ := (match f a (mkResultOk i c s) with | Result.ok b i c s e₂ := Result.ok b i c s (strictAnd e₁ e₂) | Result.error msg i c stx e₂ := Result.error msg i c stx (strictAnd e₁ e₂)) | Result.error msg i c stx e := Result.error msg i c stx e instance : Monad parserCoreM := {bind := @parserCoreM.bind, pure := @parserCoreM.pure} instance : Inhabited parserCore := ⟨λ r, mkError r "error"⟩ @[inline] def parserCoreM.error {α : Type} (msg : String) : parserCoreM α := λ r, mkError r msg @[inline] def error {α : Type} {m : Type → Type} [HasMonadLiftT parserCoreM m] (msg : String) : m α := monadLift $ parserCoreM.error msg abbrev BasicParserM : Type → Type := ReaderT ParserConfig parserCoreM abbrev basicParser : Type := BasicParserM Syntax abbrev CommandParserM (ρ : Type) : Type → Type := ReaderT ρ (ReaderT parserCore parserCoreM) abbrev TermParserM : Type → Type := ReaderT (Nat → parserCore) (CommandParserM ParserConfig) abbrev termParser : Type := TermParserM Syntax abbrev trailingTermParser : Type := Syntax → termParser structure CommandParserConfig extends ParserConfig := (leadingTermParsers : TokenMap termParser) (trailingTermParsers : TokenMap trailingTermParser) abbrev commandParser : Type := CommandParserM CommandParserConfig Syntax abbrev commandParserCore : Type := CommandParserM ParserConfig Syntax @[inline] def termParserOfBasicParser {α : Type} (p : BasicParserM α) : TermParserM α := λ _ cfg _, p cfg @[inline] def commandParserOfBasicParser {α : Type} (p : BasicParserM α) : CommandParserM CommandParserConfig α := λ cfg _, p cfg.toParserConfig instance basic2termP : HasMonadLift BasicParserM TermParserM := ⟨@termParserOfBasicParser⟩ instance basic2commandP : HasMonadLift BasicParserM (CommandParserM CommandParserConfig) := ⟨@commandParserOfBasicParser⟩ @[inline] def Term.Parser (rbp := 0) : termParser := λ p _ _, p rbp @[inline] def command.Parser : commandParser := λ _ p, p @[inline] def readCfg : TermParserM ParserConfig := λ _ cfg _, pure cfg def peekToken : BasicParserM Syntax := error "TODO" def currLbp : TermParserM Nat := do tk ← monadLift peekToken, match tk with | Syntax.atom ⟨_, sym⟩ := do cfg ← readCfg, (match cfg.tokens.matchPrefix sym.mkIterator with | some ⟨_, tkCfg⟩ := pure tkCfg.lbp | _ := error "currLbp: unreachable") | Syntax.rawNode {kind := @number, ..} := pure maxPrec | Syntax.rawNode {kind := @stringLit, ..} := pure maxPrec | Syntax.ident _ := pure maxPrec | _ := error "currLbp: unknown token kind" #exit match tk with | Syntax.atom ⟨_, sym⟩ := do cfg ← read, -- some ⟨_, tkCfg⟩ ← pure (cfg.tokens.matchPrefix sym.mkIterator) | error "currLbp: unreachable", pure 0 | Syntax.ident _ := pure maxPrec | Syntax.rawNode {kind := @number, ..} := pure maxPrec | Syntax.rawNode {kind := @stringLit, ..} := pure maxPrec | _ := error "currLbp: unknown token kind" private def trailing (cfg : CommandParserConfig) : trailingTermParser := λ _ p _ _ r, p 0 r -- TODO(Leo) private def leading (cfg : CommandParserConfig) : termParser := λ p _ _ r, p 0 r -- TODO(Leo) def dummy : Nat → parserCore := λ _ r, mkError r "dummy" def pratt (leadingP : termParser) (trailingP : trailingTermParser) (p : termParser) : commandParserCore := p dummy def commandParserOfTermParser (p : termParser) : commandParser := λ cfg rec r, let leadingP : termParser := leading cfg in let trailingP : trailingTermParser := trailing cfg in let cfg : ParserConfig := cfg.toParserConfig in let p : commandParserCore := pratt leadingP trailingP p in p cfg rec r #exit def prattParser (cfg : CommandParserConfig) : termParser := leading @[inline] def toParserCore (termP : Nat → Parser) (cmdP : parserCore) : Nat → parserCore := fix (λ recF rbp cfg r, termP rbp cmdP recF cfg r) @[inline] def Parser.run (x : Parser) (termP : Nat → Parser) (cmdP : parserCore) : parserCore := x cmdP (toParserCore termP cmdP) -- STOPPED HERE #exit def parserCore.run (cmdP : parserCore) (termP : parserCore) : parserCore := def aux (f : Nat → parserCore) : Nat → parserCore structure CommandParserConfig extends recParserConfig := (leadingTermParsers : TokenMap Parser) (trailingTermParsers : TokenMap trailingParser) abbrev CommandParserM (α : Type) : Type := parserCoreM CommandParserConfig α abbrev commandParser := CommandParserM Syntax #exit -- abbrev -- def parserM (α : Type) := recParsers → parserCoreM α abbreviation Parser := parserM Syntax abbreviation trailingParser := Syntax → Parser @[inline] def command.Parser : Parser := λ cfg, cfg.cmdParser cfg @[inline] def Term.Parser (rbp : Nat := 0) : Parser := λ ps, ps.termParser rbp instance : Monad parserM := {pure := @parserM.pure, bind := @parserM.bind} @[inline] protected def orelse {α : Type} (p q : parserM α) : parserM α := λ ps cfg r, match r with | ⟨Result.ok _ i₁ _ s₁ _, _⟩ := (match p ps cfg r with | Result.error msg₁ i₂ c₂ stx₁ tt := q ps cfg (mkResultOk i₁ c₂ s₁) | other := other) | ⟨Result.error _ _ _ _ _, h⟩ := unreachableError h @[inline] protected def failure {α : Type} : parserM α := λ _ _ r, match r with | ⟨Result.ok _ i c s _, h⟩ := Result.error "failure" i c Syntax.missing tt | ⟨Result.error _ _ _ _ _, h⟩ := unreachableError h instance : Alternative parserM := { orelse := @flatParser.orelse, failure := @flatParser.failure, ..flatParser.Monad } def setSilentError {α : Type} : Result α → Result α | (Result.error i c msg stx _) := Result.error i c msg stx tt | other := other /-- `try p` behaves like `p`, but it pretends `p` hasn't consumed any input when `p` fails. -/ @[inline] def try {α : Type} (p : parserM α) : parserM α := λ ps cfg r, setSilentError (p ps cfg r) @[inline] def atEnd (cfg : ParserConfig) (i : pos) : Bool := cfg.input.utf8AtEnd i @[inline] def curr (cfg : ParserConfig) (i : pos) : Char := cfg.input.utf8Get i @[inline] def next (cfg : ParserConfig) (i : pos) : pos := cfg.input.utf8Next i @[inline] def inputSize (cfg : ParserConfig) : Nat := cfg.input.length @[inline] def currPos : resultOk → pos | ⟨Result.ok _ i _ _ _, _⟩ := i | ⟨Result.error _ _ _ _ _, h⟩ := unreachableError h @[inline] def currState : resultOk → ParserState | ⟨Result.ok _ _ _ s _, _⟩ := s | ⟨Result.error _ _ _ _ _, h⟩ := unreachableError h @[inline] def satisfy (p : Char → Bool) : parserM Char := λ _ cfg r, match r with | ⟨Result.ok _ i ch st e, _⟩ := if atEnd cfg i then mkError r "end of input" else let c := curr cfg i in if p c then Result.ok c (next cfg i) ch st ff else mkError r "unexpected character" | ⟨Result.error _ _ _ _ _, h⟩ := unreachableError h def any : parserM Char := satisfy (λ _, tt) @[specialize] def takeUntilAux (p : Char → Bool) (cfg : ParserConfig) : Nat → resultOk → Result Unit | 0 r := r.val | (n+1) r := match r with | ⟨Result.ok _ i ch st e, _⟩ := if atEnd cfg i then r.val else let c := curr cfg i in if p c then r.val else takeUntilAux n (mkResultOk (next cfg i) ch st tt) | ⟨Result.error _ _ _ _ _, h⟩ := unreachableError h @[specialize] def takeUntil (p : Char → Bool) : parserM Unit := λ ps cfg r, takeUntilAux p cfg (inputSize cfg) r def takeUntilNewLine : parserM Unit := takeUntil (= '\n') def whitespace : parserM Unit := takeUntil (λ c, !c.isWhitespace) -- setOption Trace.Compiler.boxed True --- setOption pp.implicit True def strAux (cfg : ParserConfig) (str : String) (error : String) : Nat → resultOk → pos → Result Unit | 0 r j := mkError r error | (n+1) r j := if str.utf8AtEnd j then r.val else match r with | ⟨Result.ok _ i ch st e, _⟩ := if atEnd cfg i then Result.error error i ch Syntax.missing tt else if curr cfg i = str.utf8Get j then strAux n (mkResultOk (next cfg i) ch st tt) (str.utf8Next j) else Result.error error i ch Syntax.missing tt | ⟨Result.error _ _ _ _ _, h⟩ := unreachableError h -- #exit @[inline] def str (s : String) : parserM Unit := λ ps cfg r, strAux cfg s ("expected " ++ repr s) (inputSize cfg) r 0 @[specialize] def manyAux (p : parserM Unit) : Nat → Bool → parserM Unit | 0 fst := pure () | (k+1) fst := λ ps cfg r, let i₀ := currPos r in let s₀ := currState r in match p ps cfg r with | Result.ok a i c s _ := manyAux k ff ps cfg (mkResultOk i c s) | Result.error _ _ c _ _ := Result.ok () i₀ c s₀ fst @[inline] def many (p : parserM Unit) : parserM Unit := λ ps cfg r, manyAux p (inputSize cfg) tt ps cfg r @[inline] def many1 (p : parserM Unit) : parserM Unit := p *> many p def dummyParserCore : parserCore := λ cfg r, mkError r "dummy" def testParser {α : Type} (x : parserM α) (input : String) : String := let r := x { cmdParser := dummyParserCore, termParser := λ _, dummyParserCore } { filename := "test", input := input, FileMap := FileMap.fromString input, tokens := Lean.Parser.Trie.mk } (mkResultOk 0 {} {messages := MessageLog.Empty}) in match r with | Result.ok _ i _ _ _ := "Ok at " ++ toString i | Result.error msg i _ _ _ := "Error at " ++ toString i ++ ": " ++ msg /- mutual def recCmd, recTerm (parseCmd : Parser) (parseTerm : Nat → Parser) (parseLvl : Nat → parserCore) with recCmd : Nat → parserCore | 0 cfg r := mkError r "Parser: no progress" | (n+1) cfg r := parseCmd ⟨recCmd n, parseLvl, recTerm n⟩ cfg r with recTerm : Nat → Nat → parserCore | 0 rbp cfg r := mkError r "Parser: no progress" | (n+1) rbp cfg r := parseTerm rbp ⟨recCmd n, parseLvl, recTerm n⟩ cfg r -/ /- def runParser (x : Parser) (parseCmd : Parser) (parseLvl : Nat → Parser) (parseTerm : Nat → Parser) (input : Iterator) (cfg : ParserConfig) : Result Syntax := let it := input in let n := it.remaining in let r := mkResultOk it {} {messages := MessageLog.Empty} in let pl := recLvl (parseLvl) n in let ps : recParsers := { cmdParser := recCmd parseCmd parseTerm pl n, lvlParser := pl, termParser := recTerm parseCmd parseTerm pl n } in x ps cfg r -/ structure parsingTables := (leadingTermParsers : TokenMap Parser) (trailingTermParsers : TokenMap trailingParser) abbreviation CommandParserM (α : Type) := parsingTables → parserM α end flatParser end Lean def mkBigString : Nat → String → String | 0 s := s | (n+1) s := mkBigString n (s ++ "-- new comment\n") section open Lean.flatParser def flatP : parserM Unit := many1 (str "--" *> takeUntil (= '\n') *> any *> pure ()) end section open Lean.Parser open Lean.Parser.MonadParsec @[reducible] def Parser (α : Type) : Type := ReaderT Lean.flatParser.recParsers (ReaderT Lean.flatParser.ParserConfig (ParsecT Syntax (StateT ParserCache id))) α def testParsec (p : Parser Unit) (input : String) : String := let ps : Lean.flatParser.recParsers := { cmdParser := Lean.flatParser.dummyParserCore, termParser := λ _, Lean.flatParser.dummyParserCore } in let cfg : Lean.flatParser.ParserConfig := { filename := "test", input := input, FileMap := Lean.flatParser.FileMap.fromString input, tokens := Lean.Parser.Trie.mk } in let r := p ps cfg input.mkIterator {} in match r with | (Parsec.Result.ok _ it _, _) := "OK at " ++ toString it.offset | (Parsec.Result.error msg _, _) := "Error " ++ msg.toString def parsecP : Parser Unit := many1' (str "--" *> takeUntil (λ c, c = '\n') *> any *> pure ()) end @[noinline] def testFlatP (s : String) : IO Unit := IO.println (Lean.flatParser.testParser flatP s) @[noinline] def testParsecP (s : String) : IO Unit := IO.println (testParsec parsecP s) def prof {α : Type} (msg : String) (p : IO α) : IO α := let msg₁ := "Time for '" ++ msg ++ "':" in let msg₂ := "Memory usage for '" ++ msg ++ "':" in allocprof msg₂ (timeit msg₁ p) def main (xs : List String) : IO UInt32 := let s₁ := mkBigString xs.head.toNat "" in let s₂ := s₁ ++ "bad" ++ mkBigString 20 "" in prof "flat Parser 1" (testFlatP s₁) *> prof "flat Parser 2" (testFlatP s₂) *> -- prof "Parsec 1" (testParsecP s₁) *> -- prof "Parsec 2" (testParsecP s₂) *> pure 0