diff --git a/src/Lean/Data/Trie.lean b/src/Lean/Data/Trie.lean index 7680c07f14..cbd8fbe965 100644 --- a/src/Lean/Data/Trie.lean +++ b/src/Lean/Data/Trie.lean @@ -166,14 +166,16 @@ partial def findPrefix (t : Trie α) (pre : String) : Array α := go t 0 /-- Find the longest _key_ in the trie that is contained in the given string `s` at position `i`, and return the associated value. -/ -partial def matchPrefix (s : String) (t : Trie α) (i : String.Pos) : Option α := +partial def matchPrefix (s : String) (t : Trie α) (i : String.Pos) + (endByte := s.utf8ByteSize) + (endByte_valid : endByte ≤ s.utf8ByteSize := by simp) : Option α := let rec loop | leaf v, _, res => if v.isSome then v else res | node1 v c' t', i, res => let res := if v.isSome then v else res - if h : i < s.utf8ByteSize then - let c := s.getUtf8Byte i h + if h : i < endByte then + let c := s.getUtf8Byte i (by omega) if c == c' then loop t' (i + 1) res else res @@ -181,8 +183,8 @@ partial def matchPrefix (s : String) (t : Trie α) (i : String.Pos) : Option α res | node v cs ts, i, res => let res := if v.isSome then v else res - if h : i < s.utf8ByteSize then - let c := s.getUtf8Byte i h + if h : i < endByte then + let c := s.getUtf8Byte i (by omega) match cs.findIdx? (· == c) with | none => res | some idx => loop ts[idx]! (i + 1) res diff --git a/src/Lean/Language/Lean.lean b/src/Lean/Language/Lean.lean index a88337274e..2ad200381c 100644 --- a/src/Lean/Language/Lean.lean +++ b/src/Lean/Language/Lean.lean @@ -260,7 +260,7 @@ ruled out. def LeanProcessingM.run (act : LeanProcessingM α) (oldInputCtx? : Option InputContext) : ProcessingM α := do -- compute position of syntactic change once - let firstDiffPos? := oldInputCtx?.map (·.input.firstDiffPos (← read).input) + let firstDiffPos? := oldInputCtx?.map (·.inputString.firstDiffPos (← read).inputString) ReaderT.adapt ({ · with firstDiffPos? }) act /-- @@ -392,7 +392,7 @@ where -- they are passed separately from `old` if let some oldSuccess := old.result? then -- make sure to update ranges of all reused tasks - let progressRange? := .some ⟨newParserState.pos, ctx.input.endPos⟩ + let progressRange? := .some ⟨newParserState.pos, ctx.endPos⟩ return { ictx stx := newStx @@ -478,7 +478,7 @@ where processHeader (stx : HeaderSyntax) (parserState : Parser.ModuleParserState) : LeanProcessingM (SnapshotTask HeaderProcessedSnapshot) := do let ctx ← read - SnapshotTask.ofIO none none (.some ⟨0, ctx.input.endPos⟩) <| + SnapshotTask.ofIO none none (.some ⟨0, ctx.endPos⟩) <| ReaderT.run (r := ctx) <| -- re-enter reader in new task withHeaderExceptions (α := HeaderProcessedSnapshot) ({ · with result? := none, metaSnap := default }) do let setup ← match (← setupImports stx) with @@ -569,7 +569,7 @@ where parseCmd oldNext newParserState oldResult.cmdState newProm sync cancelTk ctx prom.resolve <| { old with nextCmdSnap? := some { stx? := none - reportingRange := .some ⟨newParserState.pos, ctx.input.endPos⟩ + reportingRange := .some ⟨newParserState.pos, ctx.endPos⟩ task := newProm.result! cancelTk? := cancelTk } } @@ -651,7 +651,7 @@ where else some <$> IO.Promise.new let nextCmdSnap? := next?.map ({ stx? := none - reportingRange := .some ⟨parserState.pos, ctx.input.endPos⟩ + reportingRange := .some ⟨parserState.pos, ctx.endPos⟩ cancelTk? := parseCancelTk task := ·.result! }) diff --git a/src/Lean/Parser/Basic.lean b/src/Lean/Parser/Basic.lean index 0833915c49..c780f38454 100644 --- a/src/Lean/Parser/Basic.lean +++ b/src/Lean/Parser/Basic.lean @@ -143,7 +143,7 @@ def errorAtSavedPosFn (msg : String) (delta : Bool) : ParserFn := fun c s => match c.savedPos? with | none => s | some pos => - let pos := if delta then c.input.next pos else pos + let pos := if delta then c.next pos else pos s.mkUnexpectedErrorAt msg pos /-- Generate an error at the position saved with the `withPosition` combinator. @@ -510,15 +510,15 @@ def many1Unbox (p : Parser) : Parser := partial def satisfyFn (p : Char → Bool) (errorMsg : String := "unexpected character") : ParserFn := fun c s => let i := s.pos - if h : c.input.atEnd i then s.mkEOIError - else if p (c.input.get' i h) then s.next' c.input i h + if h : c.atEnd i then s.mkEOIError + else if p (c.get' i h) then s.next' c i h else s.mkUnexpectedError errorMsg partial def takeUntilFn (p : Char → Bool) : ParserFn := fun c s => let i := s.pos - if h : c.input.atEnd i then s - else if p (c.input.get' i h) then s - else takeUntilFn p c (s.next' c.input i h) + if h : c.atEnd i then s + else if p (c.get' i h) then s + else takeUntilFn p c (s.next' c i h) def takeWhileFn (p : Char → Bool) : ParserFn := takeUntilFn (fun c => !p c) @@ -528,26 +528,25 @@ def takeWhile1Fn (p : Char → Bool) (errorMsg : String) : ParserFn := variable (pushMissingOnError : Bool) in partial def finishCommentBlock (nesting : Nat) : ParserFn := fun c s => - let input := c.input - let i := s.pos - if h : input.atEnd i then eoi s + let i := s.pos + if h : c.atEnd i then eoi s else - let curr := input.get' i h - let i := input.next' i h + let curr := c.get' i h + let i := c.next' i h if curr == '-' then - if h : input.atEnd i then eoi s + if h : c.atEnd i then eoi s else - let curr := input.get' i h + let curr := c.get' i h if curr == '/' then -- "-/" end of comment - if nesting == 1 then s.next' input i h - else finishCommentBlock (nesting-1) c (s.next' input i h) + if nesting == 1 then s.next' c i h + else finishCommentBlock (nesting-1) c (s.next' c i h) else finishCommentBlock nesting c (s.setPos i) else if curr == '/' then - if h : input.atEnd i then eoi s + if h : c.atEnd i then eoi s else - let curr := input.get' i h - if curr == '-' then finishCommentBlock (nesting+1) c (s.next' input i h) + let curr := c.get' i h + if curr == '-' then finishCommentBlock (nesting+1) c (s.next' c i h) else finishCommentBlock nesting c (s.setPos i) else finishCommentBlock nesting c (s.setPos i) where @@ -555,49 +554,46 @@ where /-- Consume whitespace and comments -/ partial def whitespace : ParserFn := fun c s => - let input := c.input - let i := s.pos - if h : input.atEnd i then s + let i := s.pos + if h : c.atEnd i then s else - let curr := input.get' i h + let curr := c.get' i h if curr == '\t' then s.mkUnexpectedError (pushMissing := false) "tabs are not allowed; please configure your editor to expand them" else if curr == '\r' then s.mkUnexpectedError (pushMissing := false) "isolated carriage returns are not allowed" - else if curr.isWhitespace then whitespace c (s.next' input i h) + else if curr.isWhitespace then whitespace c (s.next' c i h) else if curr == '-' then - let i := input.next' i h - let curr := input.get i - if curr == '-' then andthenFn (takeUntilFn (fun c => c = '\n')) whitespace c (s.next input i) + let i := c.next' i h + let curr := c.get i + if curr == '-' then andthenFn (takeUntilFn (fun c => c = '\n')) whitespace c (s.next c i) else s else if curr == '/' then - let i := input.next' i h - let curr := input.get i + let i := c.next' i h + let curr := c.get i if curr == '-' then - let i := input.next i - let curr := input.get i + let i := c.next i + let curr := c.get i if curr == '-' || curr == '!' then s -- "/--" and "/-!" doc comment are actual tokens - else andthenFn (finishCommentBlock (pushMissingOnError := false) 1) whitespace c (s.next input i) + else andthenFn (finishCommentBlock (pushMissingOnError := false) 1) whitespace c (s.next c i) else s else s -def mkEmptySubstringAt (s : String) (p : String.Pos) : Substring := { - str := s, startPos := p, stopPos := p -} +def ParserContext.mkEmptySubstringAt (c : ParserContext) (p : String.Pos) : Substring := + c.substring p p private def rawAux (startPos : String.Pos) (trailingWs : Bool) : ParserFn := fun c s => - let input := c.input let stopPos := s.pos - let leading := mkEmptySubstringAt input startPos - let val := input.extract startPos stopPos + let leading := c.mkEmptySubstringAt startPos + let val := c.extract startPos stopPos if trailingWs then let s := whitespace c s let stopPos' := s.pos - let trailing := { str := input, startPos := stopPos, stopPos := stopPos' : Substring } + let trailing := c.substring (startPos := stopPos) (stopPos := stopPos') let atom := mkAtom (SourceInfo.original leading startPos trailing (startPos + val)) val s.pushSyntax atom else - let trailing := mkEmptySubstringAt input stopPos + let trailing := c.mkEmptySubstringAt stopPos let atom := mkAtom (SourceInfo.original leading startPos trailing (startPos + val)) val s.pushSyntax atom @@ -615,12 +611,11 @@ def rawCh (c : Char) (trailingWs := false) : Parser := { } def hexDigitFn : ParserFn := fun c s => - let input := c.input - let i := s.pos - if h : input.atEnd i then s.mkEOIError + let i := s.pos + if h : c.atEnd i then s.mkEOIError else - let curr := input.get' i h - let i := input.next' i h + let curr := c.get' i h + let i := c.next' i h if curr.isDigit || ('a' <= curr && curr <= 'f') || ('A' <= curr && curr <= 'F') then s.setPos i else s.mkUnexpectedError "invalid hexadecimal numeral" @@ -630,17 +625,17 @@ Raises an error if the whitespace does not contain exactly one newline character -/ partial def stringGapFn (seenNewline : Bool) : ParserFn := fun c s => let i := s.pos - if h : c.input.atEnd i then s -- let strLitFnAux handle the EOI error if !seenNewline + if h : c.atEnd i then s -- let strLitFnAux handle the EOI error if !seenNewline else - let curr := c.input.get' i h + let curr := c.get' i h if curr == '\n' then if seenNewline then -- Having more than one newline in a string gap is visually confusing s.mkUnexpectedError "unexpected additional newline in string gap" else - stringGapFn true c (s.next' c.input i h) + stringGapFn true c (s.next' c i h) else if curr.isWhitespace then - stringGapFn seenNewline c (s.next' c.input i h) + stringGapFn seenNewline c (s.next' c i h) else if seenNewline then s else @@ -653,17 +648,16 @@ Parses a string quotation after a `\`. in particular `"\" newline whitespace*` gaps. -/ def quotedCharCoreFn (isQuotable : Char → Bool) (inString : Bool) : ParserFn := fun c s => - let input := c.input - let i := s.pos - if h : input.atEnd i then s.mkEOIError + let i := s.pos + if h : c.atEnd i then s.mkEOIError else - let curr := input.get' i h + let curr := c.get' i h if isQuotable curr then - s.next' input i h + s.next' c i h else if curr == 'x' then - andthenFn hexDigitFn hexDigitFn c (s.next' input i h) + andthenFn hexDigitFn hexDigitFn c (s.next' c i h) else if curr == 'u' then - andthenFn hexDigitFn (andthenFn hexDigitFn (andthenFn hexDigitFn hexDigitFn)) c (s.next' input i h) + andthenFn hexDigitFn (andthenFn hexDigitFn (andthenFn hexDigitFn hexDigitFn)) c (s.next' c i h) else if inString && curr == '\n' then stringGapFn false c s else @@ -686,39 +680,36 @@ def quotedStringFn : ParserFn := def mkNodeToken (n : SyntaxNodeKind) (startPos : String.Pos) : ParserFn := fun c s => Id.run do if s.hasError then return s - let input := c.input let stopPos := s.pos - let leading := mkEmptySubstringAt input startPos - let val := input.extract startPos stopPos + let leading := c.mkEmptySubstringAt startPos + let val := c.extract startPos stopPos let s := whitespace c s let wsStopPos := s.pos - let trailing := { str := input, startPos := stopPos, stopPos := wsStopPos : Substring } + let trailing := c.substring (startPos := stopPos) (stopPos := wsStopPos) let info := SourceInfo.original leading startPos trailing stopPos s.pushSyntax (Syntax.mkLit n val info) def charLitFnAux (startPos : String.Pos) : ParserFn := fun c s => - let input := c.input - let i := s.pos - if h : input.atEnd i then s.mkEOIError + let i := s.pos + if h : c.atEnd i then s.mkEOIError else - let curr := input.get' i h - let s := s.setPos (input.next' i h) + let curr := c.get' i h + let s := s.setPos (c.next' i h) let s := if curr == '\\' then quotedCharFn c s else s if s.hasError then s else let i := s.pos - let curr := input.get i - let s := s.setPos (input.next i) + let curr := c.get i + let s := s.setPos (c.next i) if curr == '\'' then mkNodeToken charLitKind startPos c s else s.mkUnexpectedError "missing end of character literal" partial def strLitFnAux (startPos : String.Pos) : ParserFn := fun c s => - let input := c.input - let i := s.pos - if h : input.atEnd i then s.mkUnexpectedErrorAt "unterminated string literal" startPos + let i := s.pos + if h : c.atEnd i then s.mkUnexpectedErrorAt "unterminated string literal" startPos else - let curr := input.get' i h - let s := s.setPos (input.next' i h) + let curr := c.get' i h + let s := s.setPos (c.next' i h) if curr == '\"' then mkNodeToken strLitKind startPos c s else if curr == '\\' then andthenFn quotedStringFn (strLitFnAux startPos) c s @@ -730,12 +721,12 @@ If we are looking at a valid start to a raw string (`r##...#"`), returns true. We assume `i` begins at the position immediately after `r`. -/ -partial def isRawStrLitStart (input : String) (i : String.Pos) : Bool := - if h : input.atEnd i then false +partial def isRawStrLitStart (c : ParserContext) (i : String.Pos) : Bool := + if h : c.atEnd i then false else - let curr := input.get' i h + let curr := c.get' i h if curr == '#' then - isRawStrLitStart input (input.next' i h) + isRawStrLitStart c (c.next' i h) else curr == '"' @@ -755,12 +746,11 @@ where The `num` variable counts the number of `#`s after the `r`. -/ initState (num : Nat) : ParserFn := fun c s => - let input := c.input - let i := s.pos - if h : input.atEnd i then errorUnterminated s + let i := s.pos + if h : c.atEnd i then errorUnterminated s else - let curr := input.get' i h - let s := s.setPos (input.next' i h) + let curr := c.get' i h + let s := s.setPos (c.next' i h) if curr == '#' then initState (num + 1) c s else if curr == '"' then @@ -773,12 +763,11 @@ where the raw string literal, we switch to `closingState`. -/ normalState (num : Nat) : ParserFn := fun c s => - let input := c.input - let i := s.pos - if h : input.atEnd i then errorUnterminated s + let i := s.pos + if h : c.atEnd i then errorUnterminated s else - let curr := input.get' i h - let s := s.setPos (input.next' i h) + let curr := c.get' i h + let s := s.setPos (c.next' i h) if curr == '\"' then if num == 0 then mkNodeToken strLitKind startPos c s @@ -792,12 +781,11 @@ where Note: `num > 0` since the `num = 0` case is entirely handled by `normalState`. -/ closingState (num : Nat) (closingNum : Nat) : ParserFn := fun c s => - let input := c.input - let i := s.pos - if h : input.atEnd i then errorUnterminated s + let i := s.pos + if h : c.atEnd i then errorUnterminated s else - let curr := input.get' i h - let s := s.setPos (input.next' i h) + let curr := c.get' i h + let s := s.setPos (c.next' i h) if curr == '#' then if closingNum + 1 == num then mkNodeToken strLitKind startPos c s @@ -815,30 +803,28 @@ Note: this does not report that it is expecting `_` if we reach EOI or an unexpe Rationale: this error happens if there is already a `_`, and while sequences of `_` are allowed, it's a bit perverse to suggest extending the sequence. -/ partial def takeDigitsFn (isDigit : Char → Bool) (expecting : String) (needDigit : Bool) : ParserFn := fun c s => - let input := c.input - let i := s.pos - if h : input.atEnd i then + let i := s.pos + if h : c.atEnd i then if needDigit then s.mkEOIError [expecting] else s else - let curr := input.get' i h - if curr == '_' then takeDigitsFn isDigit expecting true c (s.next' c.input i h) - else if isDigit curr then takeDigitsFn isDigit expecting false c (s.next' c.input i h) + let curr := c.get' i h + if curr == '_' then takeDigitsFn isDigit expecting true c (s.next' c i h) + else if isDigit curr then takeDigitsFn isDigit expecting false c (s.next' c i h) else if needDigit then s.mkUnexpectedError "unexpected character" (expected := [expecting]) else s def decimalNumberFn (startPos : String.Pos) (c : ParserContext) : ParserState → ParserState := fun s => - let s := takeDigitsFn (fun c => c.isDigit) "decimal number" false c s - let input := c.input - let i := s.pos - if h : input.atEnd i then + let s := takeDigitsFn (fun c => c.isDigit) "decimal number" false c s + let i := s.pos + if h : c.atEnd i then mkNodeToken numLitKind startPos c s else - let curr := input.get' i h - let j := input.next i - if ∃ hj : ¬ input.atEnd j, curr = '.' && input.get' j hj = '.' then + let curr := c.get' i h + let j := c.next i + if ∃ hj : ¬ c.atEnd j, curr = '.' && c.get' j hj = '.' then mkNodeToken numLitKind startPos c s else if curr == '.' || curr == 'e' || curr == 'E' then parseScientific s @@ -851,12 +837,11 @@ where mkNodeToken scientificLitKind startPos c s parseOptDot s := - let input := c.input let i := s.pos - let curr := input.get i + let curr := c.get i if curr == '.' then - let i := input.next i - let curr := input.get i + let i := c.next i + let curr := c.get i if curr.isDigit then takeDigitsFn (fun c => c.isDigit) "decimal number" false c (s.setPos i) else @@ -865,13 +850,12 @@ where s parseOptExp s := - let input := c.input let i := s.pos - let curr := input.get i + let curr := c.get i if curr == 'e' || curr == 'E' then - let i := input.next i - let i := if input.get i == '-' || input.get i == '+' then input.next i else i - let curr := input.get i + let i := c.next i + let i := if c.get i == '-' || c.get i == '+' then c.next i else i + let curr := c.get i if curr.isDigit then takeDigitsFn (fun c => c.isDigit) "decimal number" false c (s.setPos i) else @@ -892,36 +876,35 @@ def hexNumberFn (startPos : String.Pos) : ParserFn := fun c s => mkNodeToken numLitKind startPos c s def numberFnAux : ParserFn := fun c s => - let input := c.input let startPos := s.pos - if h : input.atEnd startPos then s.mkEOIError + if h : c.atEnd startPos then s.mkEOIError else - let curr := input.get' startPos h + let curr := c.get' startPos h if curr == '0' then - let i := input.next' startPos h - let curr := input.get i + let i := c.next' startPos h + let curr := c.get i if curr == 'b' || curr == 'B' then - binNumberFn startPos c (s.next input i) + binNumberFn startPos c (s.next c i) else if curr == 'o' || curr == 'O' then - octalNumberFn startPos c (s.next input i) + octalNumberFn startPos c (s.next c i) else if curr == 'x' || curr == 'X' then - hexNumberFn startPos c (s.next input i) + hexNumberFn startPos c (s.next c i) else decimalNumberFn startPos c (s.setPos i) else if curr.isDigit then - decimalNumberFn startPos c (s.next input startPos) + decimalNumberFn startPos c (s.next c startPos) else s.mkError "numeral" -def isIdCont : String → ParserState → Bool := fun input s => +def isIdCont : ParserContext → ParserState → Bool := fun c s => let i := s.pos - let curr := input.get i + let curr := c.get i if curr == '.' then - let i := input.next i - if input.atEnd i then + let i := c.next i + if c.atEnd i then false else - let curr := input.get i + let curr := c.get i isIdFirst curr || isIdBeginEscape curr else false @@ -942,13 +925,12 @@ def mkTokenAndFixPos (startPos : String.Pos) (tk : Option Token) : ParserFn := f if c.forbiddenTk? == some tk then s.mkErrorAt "forbidden token" startPos else - let input := c.input - let leading := mkEmptySubstringAt input startPos + let leading := c.mkEmptySubstringAt startPos let stopPos := startPos + tk let s := s.setPos stopPos let s := whitespace c s let wsStopPos := s.pos - let trailing := { str := input, startPos := stopPos, stopPos := wsStopPos : Substring } + let trailing := c.substring (startPos := stopPos) (stopPos := wsStopPos) let atom := mkAtom (SourceInfo.original leading startPos trailing stopPos) tk s.pushSyntax atom @@ -957,45 +939,43 @@ def mkIdResult (startPos : String.Pos) (tk : Option Token) (val : Name) : Parser if isToken startPos stopPos tk then mkTokenAndFixPos startPos tk c s else - let input := c.input - let rawVal := { str := input, startPos := startPos, stopPos := stopPos : Substring } + let rawVal := c.substring startPos stopPos let s := whitespace c s let trailingStopPos := s.pos - let leading := mkEmptySubstringAt input startPos - let trailing := { str := input, startPos := stopPos, stopPos := trailingStopPos : Substring } + let leading := c.mkEmptySubstringAt startPos + let trailing := c.substring (startPos := stopPos) (stopPos := trailingStopPos) let info := SourceInfo.original leading startPos trailing stopPos let atom := mkIdent info rawVal val s.pushSyntax atom partial def identFnAux (startPos : String.Pos) (tk : Option Token) (r : Name) : ParserFn := let rec parse (r : Name) (c s) := - let input := c.input - let i := s.pos - if h : input.atEnd i then + let i := s.pos + if h : c.atEnd i then s.mkEOIError else - let curr := input.get' i h + let curr := c.get' i h if isIdBeginEscape curr then - let startPart := input.next' i h + let startPart := c.next' i h let s := takeUntilFn isIdEndEscape c (s.setPos startPart) - if h : input.atEnd s.pos then + if h : c.atEnd s.pos then s.mkUnexpectedErrorAt "unterminated identifier escape" startPart else let stopPart := s.pos - let s := s.next' c.input s.pos h - let r := .str r (input.extract startPart stopPart) - if isIdCont input s then - let s := s.next input s.pos + let s := s.next' c s.pos h + let r := .str r (c.extract startPart stopPart) + if isIdCont c s then + let s := s.next c s.pos parse r c s else mkIdResult startPos tk r c s else if isIdFirst curr then let startPart := i - let s := takeWhileFn isIdRest c (s.next input i) + let s := takeWhileFn isIdRest c (s.next c i) let stopPart := s.pos - let r := .str r (input.extract startPart stopPart) - if isIdCont input s then - let s := s.next input s.pos + let r := .str r (c.extract startPart stopPart) + if isIdCont c s then + let s := s.next c s.pos parse r c s else mkIdResult startPos tk r c s @@ -1007,8 +987,7 @@ private def isIdFirstOrBeginEscape (c : Char) : Bool := isIdFirst c || isIdBeginEscape c private def nameLitAux (startPos : String.Pos) : ParserFn := fun c s => - let input := c.input - let s := identFnAux startPos none .anonymous c (s.next input startPos) + let s := identFnAux startPos none .anonymous c (s.next c startPos) if s.hasError then s else @@ -1020,21 +999,23 @@ private def nameLitAux (startPos : String.Pos) : ParserFn := fun c s => | _ => s.mkError "invalid Name literal" private def tokenFnAux : ParserFn := fun c s => - let input := c.input let i := s.pos - let curr := input.get i + let curr := c.get i if curr == '\"' then - strLitFnAux i c (s.next input i) - else if curr == '\'' && getNext input i != '\'' then - charLitFnAux i c (s.next input i) + strLitFnAux i c (s.next c i) + else if curr == '\'' && c.getNext i != '\'' then + charLitFnAux i c (s.next c i) else if curr.isDigit then numberFnAux c s - else if curr == '`' && isIdFirstOrBeginEscape (getNext input i) then + else if curr == '`' && isIdFirstOrBeginEscape (c.getNext i) then nameLitAux i c s - else if curr == 'r' && isRawStrLitStart input (input.next i) then - rawStrLitFnAux i c (s.next input i) + else if curr == 'r' && isRawStrLitStart c (c.next i) then + rawStrLitFnAux i c (s.next c i) else - let tk := c.tokens.matchPrefix input i + let tk := c.tokens.matchPrefix c.inputString i c.endPos.byteIdx <| by + let ⟨⟨{inputString, endPos := ⟨endPos⟩, endPos_valid, ..}, _, _, _⟩⟩ := c + rw [String.endPos] at endPos_valid + omega identFnAux i tk .anonymous c s private def updateTokenCache (startPos : String.Pos) (s : ParserState) : ParserState := @@ -1048,9 +1029,8 @@ private def updateTokenCache (startPos : String.Pos) (s : ParserState) : ParserS | other => other def tokenFn (expected : List String := []) : ParserFn := fun c s => - let input := c.input - let i := s.pos - if input.atEnd i then s.mkEOIError expected + let i := s.pos + if c.atEnd i then s.mkEOIError expected else let tkc := s.cache.tokenCache if tkc.startPos == i then @@ -1078,9 +1058,8 @@ def peekToken (c : ParserContext) (s : ParserState) : ParserState × Except Pars /-- Treat keywords as identifiers. -/ def rawIdentFn : ParserFn := fun c s => - let input := c.input - let i := s.pos - if input.atEnd i then s.mkEOIError + let i := s.pos + if c.atEnd i then s.mkEOIError else identFnAux i none .anonymous c s def satisfySymbolFn (p : String → Bool) (expected : List String) : ParserFn := fun c s => Id.run do @@ -1158,10 +1137,9 @@ partial def strAux (sym : String) (errorMsg : String) (j : String.Pos) :ParserFn if h₁ : sym.atEnd j then s else let i := s.pos - let input := c.input - if h₂ : input.atEnd i then s.mkError errorMsg - else if sym.get' j h₁ != input.get' i h₂ then s.mkError errorMsg - else parse (sym.next' j h₁) c (s.next' input i h₂) + if h₂ : c.atEnd i then s.mkError errorMsg + else if sym.get' j h₁ != c.get' i h₂ then s.mkError errorMsg + else parse (sym.next' j h₁) c (s.next' c i h₂) parse j def checkTailWs (prev : Syntax) : Bool := @@ -1306,7 +1284,6 @@ def identEq (id : Name) : Parser := { } def hygieneInfoFn : ParserFn := fun c s => Id.run do - let input := c.input let finish pos str trailing s := -- Builds an actual hygieneInfo node from empty string `str` and trailing whitespace `trailing`. let info := SourceInfo.original str pos trailing pos @@ -1321,13 +1298,13 @@ def hygieneInfoFn : ParserFn := fun c s => Id.run do if !s.stxStack.isEmpty then let prev := s.stxStack.back if let .original leading pos trailing endPos := prev.getTailInfo then - let str := mkEmptySubstringAt input endPos + let str := c.mkEmptySubstringAt endPos -- steal the trailing whitespace from the last node and use it for this node let s := s.popSyntax.pushSyntax <| prev.setTailInfo (.original leading pos str endPos) return finish endPos str trailing s -- The stack can be empty if this is either the first token, or if we are in a fresh cache. -- In that case we just put the hygieneInfo at the current location. - let str := mkEmptySubstringAt input s.pos + let str := c.mkEmptySubstringAt s.pos finish s.pos str str s def hygieneInfoNoAntiquot : Parser := { @@ -1577,7 +1554,7 @@ This parser has the same arity as `p` - it just forwards the results of `p`. -/ def eoiFn : ParserFn := fun c s => let i := s.pos - if c.input.atEnd i then s + if c.atEnd i then s else s.mkError "expected end of file" def eoi : Parser := { @@ -1736,11 +1713,10 @@ def termParser (prec : Nat := 0) : Parser := fn := fun c s => let prev := s.stxStack.back if checkTailNoWs prev then - let input := c.input - let i := s.pos - if h : input.atEnd i then s + let i := s.pos + if h : c.atEnd i then s else - let curr := input.get' i h + let curr := c.get' i h if curr == ':' then s.mkUnexpectedError "unexpected ':'" else s @@ -1775,7 +1751,7 @@ def tokenAntiquotFn : ParserFn := fun c s => Id.run do def tokenWithAntiquot : Parser → Parser := withFn fun f c s => let s := f c s -- fast check that is false in most cases - if c.input.get s.pos == '%' then + if c.get s.pos == '%' then tokenAntiquotFn c s else s @@ -1811,7 +1787,7 @@ def unicodeSymbol (sym asciiSym : String) : Parser := def withAntiquotFn (antiquotP p : ParserFn) (isCatAntiquot := false) : ParserFn := fun c s => -- fast check that is false in most cases - if c.input.get s.pos == '$' then + if c.get s.pos == '$' then -- Do not allow antiquotation choice nodes here as `antiquotP` is the strictly more general -- antiquotation than any in `p`. -- If it is a category antiquotation, do not backtrack into the category at all as that would @@ -1954,7 +1930,7 @@ def prattParser (kind : Name) (tables : PrattParsingTables) (behavior : LeadingI def fieldIdxFn : ParserFn := fun c s => let initStackSz := s.stackSize let iniPos := s.pos - let curr := c.input.get iniPos + let curr := c.get iniPos if curr.isDigit && curr != '0' then let s := takeWhileFn (fun c => c.isDigit) c s mkNodeToken fieldIdxKind iniPos c s diff --git a/src/Lean/Parser/Extension.lean b/src/Lean/Parser/Extension.lean index 3a467f479a..2a5dadbefb 100644 --- a/src/Lean/Parser/Extension.lean +++ b/src/Lean/Parser/Extension.lean @@ -443,12 +443,27 @@ def getSyntaxNodeKinds (env : Environment) : List SyntaxNodeKind := def getTokenTable (env : Environment) : TokenTable := (parserExtension.getState env).tokens +set_option linter.unusedVariables.funArgs false in -- Note: `crlfToLf` preserves logical line and column numbers for each character. -def mkInputContext (input : String) (fileName : String) (normalizeLineEndings := true) : InputContext := - let input' := if normalizeLineEndings then input.crlfToLf else input - { input := input', - fileName := fileName, - fileMap := input'.toFileMap } +def mkInputContext (input : String) (fileName : String) + (normalizeLineEndings := true) + (endPos := input.endPos) + (endPos_valid : endPos ≤ input.endPos := by simp) : + InputContext := + let text := FileMap.ofString input + let next := if normalizeLineEndings then + -- Convert the stop position to a line/column position so crlf translation doesn't invalidate it + let endPos' := text.toPosition endPos + let text := FileMap.ofString text.source.crlfToLf + (text, text.ofPosition endPos') + else + (text, endPos) + let text := next.1 + let endPos' := next.2 + if h : endPos' ≤ text.source.endPos then + .mk text.source fileName (fileMap := text) (endPos := endPos') (endPos_valid := h) + else + .mk text.source fileName (fileMap := text) def mkParserState (input : String) : ParserState := { cache := initCacheForInput input } @@ -460,7 +475,7 @@ def runParserCategory (env : Environment) (catName : Name) (input : String) (fil let s := p.run ictx { env, options := {} } (getTokenTable env) (mkParserState input) if !s.allErrors.isEmpty then Except.error (s.toErrorMsg ictx) - else if ictx.input.atEnd s.pos then + else if ictx.atEnd s.pos then Except.ok s.stxStack.back else Except.error ((s.mkError "end of input").toErrorMsg ictx) diff --git a/src/Lean/Parser/Module.lean b/src/Lean/Parser/Module.lean index bfb1750ece..f0015ed8a0 100644 --- a/src/Lean/Parser/Module.lean +++ b/src/Lean/Parser/Module.lean @@ -83,7 +83,7 @@ def parseHeader (inputCtx : InputContext) : IO (TSyntax ``Module.header × Modul let dummyEnv ← mkEmptyEnvironment let p := andthenFn whitespace Module.header.fn let tokens := Module.updateTokens (getTokenTable dummyEnv) - let s := p.run inputCtx { env := dummyEnv, options := {} } tokens (mkParserState inputCtx.input) + let s := p.run inputCtx { env := dummyEnv, options := {} } tokens (mkParserState inputCtx.inputString) let stx := if s.stxStack.isEmpty then .missing else s.stxStack.back let mut messages : MessageLog := {} for (pos, stk, err) in s.allErrors do @@ -125,7 +125,7 @@ def isTerminalCommand (s : Syntax) : Bool := s.isOfKind ``Command.exit || s.isOfKind ``Command.import || s.isOfKind ``Command.eoi private def consumeInput (inputCtx : InputContext) (pmctx : ParserModuleContext) (pos : String.Pos) : String.Pos := - let s : ParserState := { cache := initCacheForInput inputCtx.input, pos := pos } + let s : ParserState := { cache := initCacheForInput inputCtx.inputString, pos := pos } let s := tokenFn [] |>.run inputCtx pmctx (getTokenTable pmctx.env) s match s.errorMsg with | some _ => pos + ' ' @@ -140,12 +140,12 @@ partial def parseCommand (inputCtx : InputContext) (pmctx : ParserModuleContext) let mut messages := messages let mut stx := Syntax.missing -- will always be assigned below repeat - if inputCtx.input.atEnd pos then + if inputCtx.atEnd pos then stx := mkEOI pos break let pos' := pos let p := andthenFn whitespace topLevelCommandParserFn - let s := p.run inputCtx pmctx (getTokenTable pmctx.env) { cache := initCacheForInput inputCtx.input, pos } + let s := p.run inputCtx pmctx (getTokenTable pmctx.env) { cache := initCacheForInput inputCtx.inputString, pos } -- save errors from sub-recoveries for (rpos, rstk, recovered) in s.recoveredErrors do messages := messages.add <| mkErrorMessage inputCtx rpos rstk recovered diff --git a/src/Lean/Parser/StrInterpolation.lean b/src/Lean/Parser/StrInterpolation.lean index 41986886d1..f8906f1dec 100644 --- a/src/Lean/Parser/StrInterpolation.lean +++ b/src/Lean/Parser/StrInterpolation.lean @@ -15,16 +15,15 @@ def isQuotableCharForStrInterpolant (c : Char) : Bool := c == '{' || isQuotableCharDefault c partial def interpolatedStrFn (p : ParserFn) : ParserFn := fun c s => - let input := c.input let stackSize := s.stackSize let rec parse (startPos : String.Pos) (c : ParserContext) (s : ParserState) : ParserState := - let i := s.pos - if input.atEnd i then + let i := s.pos + if c.atEnd i then let s := s.mkError "unterminated string literal" s.mkNode interpolatedStrKind stackSize else - let curr := input.get i - let s := s.setPos (input.next i) + let curr := c.get i + let s := s.setPos (c.next i) if curr == '\"' then let s := mkNodeToken interpolatedStrLitKind startPos c s s.mkNode interpolatedStrKind stackSize @@ -36,9 +35,9 @@ partial def interpolatedStrFn (p : ParserFn) : ParserFn := fun c s => if s.hasError then s else let i := s.pos - let curr := input.get i + let curr := c.get i if curr == '}' then - let s := s.setPos (input.next i) + let s := s.setPos (c.next i) parse i c s else let s := s.mkError "'}'" @@ -46,14 +45,14 @@ partial def interpolatedStrFn (p : ParserFn) : ParserFn := fun c s => else parse startPos c s let startPos := s.pos - if input.atEnd startPos then + if c.atEnd startPos then s.mkEOIError else - let curr := input.get s.pos; + let curr := c.get s.pos; if curr != '\"' then s.mkError "interpolated string" else - let s := s.next input startPos + let s := s.next c startPos parse startPos c s @[inline] def interpolatedStrNoAntiquot (p : Parser) : Parser := { diff --git a/src/Lean/Parser/Types.lean b/src/Lean/Parser/Types.lean index 8199307353..383161ec43 100644 --- a/src/Lean/Parser/Types.lean +++ b/src/Lean/Parser/Types.lean @@ -48,15 +48,127 @@ def SyntaxNodeKindSet.insert (s : SyntaxNodeKindSet) (k : SyntaxNodeKind) : Synt Input string and related data. Recall that the `FileMap` is a helper structure for mapping `String.Pos` in the input string to line/column information. -/ structure InputContext where - input : String + mk' :: + inputString : String fileName : String - fileMap : FileMap - endPos : String.Pos := input.endPos - endPos_valid : endPos ≤ input.endPos := by simp + fileMap : FileMap + endPos : String.Pos := inputString.endPos + endPos_valid : endPos ≤ inputString.endPos := by simp instance : Inhabited InputContext where default := ⟨"", default, default, "".endPos, String.Pos.mk_le_mk.mpr (Nat.le_refl _)⟩ + +namespace InputContext +def mk + (input fileName : String) + (endPos : String.Pos := input.endPos) + (endPos_valid : endPos ≤ input.endPos := by simp) + (fileMap : FileMap := FileMap.ofString input) : + InputContext where + inputString := input + endPos := endPos + endPos_valid := endPos_valid + fileName := fileName + fileMap := fileMap + +@[deprecated "Call `InputContext.get`, `InputContext.get'`, `InputContext.next'`, and `InputContext.atEnd` directly" (since := "2025-08-21")] +def input (c : InputContext) : String := c.inputString.extract 0 c.endPos + +/-- +Returns `true` if a specified byte position is greater than or equal to the position which points to +the end of the input string. Otherwise, returns `false`. +-/ +def atEnd (c : InputContext) (p : String.Pos) : Bool := p ≥ c.endPos + +/-- +Returns the character at position `p` of the input string. If `p` is not a valid position, returns +the fallback value `(default : Char)`, which is `'A'`, but does not panic. +-/ +@[inline] +def get (c : InputContext) (p : String.Pos) : Char := c.inputString.get p + +theorem not_atEnd_inputString {c : InputContext} {p : String.Pos} : + ¬c.atEnd p → ¬c.inputString.atEnd p := by + intro h + let {inputString, endPos := ⟨e⟩, endPos_valid, ..} := c + cases p + simp only [String.endPos, String.Pos.mk_le_mk] at endPos_valid + simp_all [String.atEnd, InputContext.atEnd, String.Pos.mk_le_mk] + exact Nat.lt_of_lt_of_le h endPos_valid + +/-- +Returns the character at position `p` of the input string. Returns `(default : Char)`, which is +`'A'`, if `p` is not a valid position. + +Requires evidence, `h`, that `p` is within bounds instead of performing a run-time bounds check as +in `InputContext.get`. + +A typical pattern combines `get'` with a dependent `if`-expression to avoid the overhead of an +additional bounds check. For example: +``` +def getInBounds? (s : String) (p : String.Pos) : Option Char := + if h : s.atEnd p then none else some (s.get' p h) +``` +Even with evidence of `¬ s.atEnd p`, `p` may be invalid if a byte index points into the middle of a +multi-byte UTF-8 character. +-/ +@[inline] +def get' (c : InputContext) (p : String.Pos) (h : ¬c.atEnd p) : Char := + c.inputString.get' p (c.not_atEnd_inputString h) + +/-- +Returns the next position in the input string after position `p`. If `p` is not a valid position or +`p = c.endPos`, returns the position one byte after `p`. + +A run-time bounds check is performed to determine whether `p` is at the end of the string. If a +bounds check has already been performed, use `InputContext.next'` to avoid a repeated check. +-/ +@[inline] +def next (c : InputContext) (p : String.Pos) : String.Pos := + c.inputString.next p + +/-- +Returns the next position in the input string after position `p`. The result is unspecified if `p` +is not a valid position. + +Requires evidence, `h`, that `p` is within bounds. No run-time bounds check is performed. +-/ +@[inline] +def next' (c : InputContext) (p : String.Pos) (h : ¬c.atEnd p) : String.Pos := + c.inputString.next' p (c.not_atEnd_inputString h) + +/-- +Creates a new string that consists of the region of the input string delimited by the two positions. + +The result is `""` if the start position is greater than or equal to the end position or if the +start position is at the end of the string. If either position is invalid (that is, if either points +at the middle of a multi-byte UTF-8 character) then the result is unspecified. +-/ +@[inline] +def extract (c : InputContext) : String.Pos → String.Pos → String := + c.inputString.extract + +/-- +Extracts a substring of the input string, bounded by `startPos` and `stopPos`. +-/ +@[inline] +def substring (c : InputContext) (startPos stopPos : String.Pos) : Substring := + { str := c.inputString, startPos, stopPos := min stopPos c.endPos } + +/-- Return character after position `pos` -/ +@[inline] +def getNext (input : InputContext) (pos : String.Pos) : Char := + input.get (input.next pos) + +/-- Returns the character position prior to `pos` -/ +@[inline] +def prev (c : InputContext) (pos : String.Pos) : String.Pos := + c.inputString.prev pos + +end InputContext + + /-- Input context derived from elaboration of previous commands. -/ structure ParserModuleContext where env : Environment @@ -82,6 +194,16 @@ structure ParserContextCore extends InputContext, ParserModuleContext, Cacheable /-- Opaque parser context updateable using `adaptCacheableContextFn` and `adaptUncacheableContextFn`. -/ structure ParserContext extends ParserContextCore where private mk :: +instance : Coe ParserContext InputContext := ⟨(·.toInputContext)⟩ + +/-- +Modifies the ending position of a parser context. +-/ +def ParserContext.setEndPos (c : ParserContext) + (endPos : String.Pos) (endPos_valid : endPos ≤ c.inputString.endPos) : + ParserContext := + { c with endPos, endPos_valid } + structure Error where /-- If not `missing`, used for lazily calculating `unexpected` message and range in `mkErrorMessage`. @@ -237,11 +359,11 @@ def popSyntax (s : ParserState) : ParserState := def shrinkStack (s : ParserState) (iniStackSz : Nat) : ParserState := { s with stxStack := s.stxStack.shrink iniStackSz } -def next (s : ParserState) (input : String) (pos : String.Pos) : ParserState := - { s with pos := input.next pos } +def next (s : ParserState) (c : ParserContext) (pos : String.Pos) : ParserState := + { s with pos := c.next pos } -def next' (s : ParserState) (input : String) (pos : String.Pos) (h : ¬ input.atEnd pos) : ParserState := - { s with pos := input.next' pos h } +def next' (s : ParserState) (c : ParserContext) (pos : String.Pos) (h : ¬ c.atEnd pos) : ParserState := + { s with pos := c.next' pos h } def mkNode (s : ParserState) (k : SyntaxNodeKind) (iniStackSz : Nat) : ParserState := match s with diff --git a/src/Lean/PrettyPrinter/Formatter.lean b/src/Lean/PrettyPrinter/Formatter.lean index 6f4dc0cffd..6f3d035657 100644 --- a/src/Lean/PrettyPrinter/Formatter.lean +++ b/src/Lean/PrettyPrinter/Formatter.lean @@ -352,14 +352,12 @@ def trailingNode.formatter (k : SyntaxNodeKind) (_ _ : Nat) (p : Formatter) : Fo def parseToken (s : String) : FormatterM ParserState := -- include comment tokens, e.g. when formatting `- -0` - return (Parser.andthenFn Parser.whitespace (Parser.tokenFn [])).run { - input := s, - fileName := "", - fileMap := FileMap.ofString "" - } { - env := ← getEnv, - options := ← getOptions - } ((← read).table) (Parser.mkParserState s) + let ictx := .mk s "" (fileMap := FileMap.ofString "") + return (Parser.andthenFn Parser.whitespace (Parser.tokenFn [])).run ictx + { + env := ← getEnv, + options := ← getOptions + } ((← read).table) (Parser.mkParserState s) def pushToken (info : SourceInfo) (tk : String) (ident : Bool) : FormatterM Unit := do if let SourceInfo.original _ _ ss _ := info then diff --git a/src/Lean/Server/Utils.lean b/src/Lean/Server/Utils.lean index b344b0d4fe..b34661c4b2 100644 --- a/src/Lean/Server/Utils.lean +++ b/src/Lean/Server/Utils.lean @@ -97,10 +97,11 @@ structure DocumentMeta where deriving Inhabited /-- Extracts an `InputContext` from `doc`. -/ -def DocumentMeta.mkInputContext (doc : DocumentMeta) : Parser.InputContext where - input := doc.text.source - fileName := (System.Uri.fileUriToPath? doc.uri).getD doc.uri |>.toString - fileMap := doc.text +def DocumentMeta.mkInputContext (doc : DocumentMeta) : Parser.InputContext := + .mk + (input := doc.text.source) + (fileName := (System.Uri.fileUriToPath? doc.uri).getD doc.uri |>.toString) + (fileMap := doc.text) /-- Replaces the range `r` (using LSP UTF-16 positions) in `text` (using UTF-8 positions) diff --git a/src/lake/Lake/Toml/Grammar.lean b/src/lake/Lake/Toml/Grammar.lean index 7d835fe5f2..88b758ab63 100644 --- a/src/lake/Lake/Toml/Grammar.lean +++ b/src/lake/Lake/Toml/Grammar.lean @@ -39,28 +39,26 @@ public def wsFn : ParserFn := /-- Consumes the LF following a CR in a CRLF newline. -/ def crlfAuxFn : ParserFn := fun c s => - let input := c.input let errMsg := "invalid newline; no LF after CR" - if h : input.atEnd s.pos then + if h : c.atEnd s.pos then s.mkUnexpectedError errMsg else - let curr := input.get' s.pos h + let curr := c.get' s.pos h if curr == '\n' then - s.next' input s.pos h + s.next' c s.pos h else s.mkUnexpectedError errMsg /-- Consume a newline. -/ public def newlineFn : ParserFn := fun c s => - let input := c.input - if h : input.atEnd s.pos then + if h : c.atEnd s.pos then s.mkEOIError ["newline"] else - let curr := input.get' s.pos h + let curr := c.get' s.pos h if curr == '\n' then - s.next' input s.pos h + s.next' c s.pos h else if curr == '\r' then - crlfAuxFn c (s.next' input s.pos h) + crlfAuxFn c (s.next' c s.pos h) else mkUnexpectedCharError s curr ["newline"] @@ -76,31 +74,29 @@ public def commentFn : ParserFn := /-- Consume optional whitespace (space, tab, or newline). -/ public partial def wsNewlineFn : ParserFn := fun c s => - let input := c.input - if h : input.atEnd s.pos then + if h : c.atEnd s.pos then s else - let curr := input.get' s.pos h + let curr := c.get' s.pos h if curr = ' ' || curr = '\t' || curr == '\n' then - wsNewlineFn c (s.next' input s.pos h) + wsNewlineFn c (s.next' c s.pos h) else if curr == '\r' then - (crlfAuxFn >> wsNewlineFn) c (s.next' input s.pos h) + (crlfAuxFn >> wsNewlineFn) c (s.next' c s.pos h) else s /-- Consume optional sequence of whitespace / newline(s) / comment (s). -/ public partial def trailingFn : ParserFn := fun c s => - let input := c.input - if h : input.atEnd s.pos then + if h : c.atEnd s.pos then s else - let curr := input.get' s.pos h + let curr := c.get' s.pos h if curr = ' ' || curr = '\t' || curr == '\n' then - trailingFn c (s.next' input s.pos h) + trailingFn c (s.next' c s.pos h) else if curr == '\r' then - (crlfAuxFn >> trailingFn) c (s.next' input s.pos h) + (crlfAuxFn >> trailingFn) c (s.next' c s.pos h) else if curr == '#' then - (commentBodyFn >> trailingFn) c (s.next' input s.pos h) + (commentBodyFn >> trailingFn) c (s.next' c s.pos h) else s @@ -114,23 +110,22 @@ public def isEscapeChar (c : Char) : Bool := /-- Consumes a TOML string escape sequence after a `\`. -/ def escapeSeqFn (stringGap : Bool) : ParserFn := fun c s => - let input := c.input let expected := ["escape sequence"] - if h : input.atEnd s.pos then + if h : c.atEnd s.pos then s.mkEOIError expected else - let curr := input.get' s.pos h + let curr := c.get' s.pos h let ifStringGap (p : ParserFn) := if stringGap then - p c (s.next' input s.pos h) + p c (s.next' c s.pos h) else s.mkUnexpectedError "string gap is forbidden here" expected if isEscapeChar curr then - s.next' input s.pos h + s.next' c s.pos h else if curr == 'u' then - repeatFn 4 hexDigitFn c (s.next' input s.pos h) + repeatFn 4 hexDigitFn c (s.next' c s.pos h) else if curr == 'U' then - repeatFn 8 hexDigitFn c (s.next' input s.pos h) + repeatFn 8 hexDigitFn c (s.next' c s.pos h) else if curr == ' ' || curr == '\t' then ifStringGap (wsFn >> newlineFn >> wsNewlineFn) else if curr == '\n' then @@ -141,50 +136,47 @@ def escapeSeqFn (stringGap : Bool) : ParserFn := fun c s => s.mkUnexpectedError "invalid escape sequence" partial def basicStringAuxFn (startPos : String.Pos) : ParserFn := fun c s => - let input := c.input - if h : input.atEnd s.pos then + if h : c.atEnd s.pos then s.mkUnexpectedErrorAt "unterminated basic string" startPos else - let curr := input.get' s.pos h + let curr := c.get' s.pos h if curr == '\"' then - s.next' input s.pos h + s.next' c s.pos h else if curr == '\\' then - (escapeSeqFn false >> basicStringAuxFn startPos) c (s.next' input s.pos h) + (escapeSeqFn false >> basicStringAuxFn startPos) c (s.next' c s.pos h) else if isControlChar curr then mkUnexpectedCharError s curr else - basicStringAuxFn startPos c (s.next' input s.pos h) + basicStringAuxFn startPos c (s.next' c s.pos h) public def basicStringFn : ParserFn := usePosFn fun startPos => chFn '\"' ["basic string"] >> basicStringAuxFn startPos partial def literalStringAuxFn (startPos : String.Pos) : ParserFn := fun c s => - let input := c.input - if h : input.atEnd s.pos then + if h : c.atEnd s.pos then s.mkUnexpectedErrorAt "unterminated literal string" startPos else - let curr := input.get' s.pos h + let curr := c.get' s.pos h if curr == '\'' then - s.next' input s.pos h + s.next' c s.pos h else if isControlChar curr then mkUnexpectedCharError s curr else - literalStringAuxFn startPos c (s.next' input s.pos h) + literalStringAuxFn startPos c (s.next' c s.pos h) public def literalStringFn : ParserFn := usePosFn fun startPos => chFn '\'' ["literal string"] >> literalStringAuxFn startPos partial def mlLiteralStringAuxFn (startPos : String.Pos) (quoteDepth : Nat) : ParserFn := fun c s => - let input := c.input - if h : input.atEnd s.pos then + if h : c.atEnd s.pos then if quoteDepth ≥ 3 then s else s.mkUnexpectedErrorAt "unterminated multi-line literal string" startPos else - let curr := input.get' s.pos h + let curr := c.get' s.pos h if curr == '\'' then - let s := s.next' input s.pos h + let s := s.next' c s.pos h if quoteDepth ≥ 5 then s.mkUnexpectedError "too many quotes" else @@ -192,29 +184,28 @@ partial def mlLiteralStringAuxFn (startPos : String.Pos) (quoteDepth : Nat) : P else if quoteDepth ≥ 3 then s else if curr == '\n' then - mlLiteralStringAuxFn startPos 0 c (s.next' input s.pos h) + mlLiteralStringAuxFn startPos 0 c (s.next' c s.pos h) else if curr == '\r' then - (crlfAuxFn >> mlLiteralStringAuxFn startPos 0) c (s.next' input s.pos h) + (crlfAuxFn >> mlLiteralStringAuxFn startPos 0) c (s.next' c s.pos h) else if isControlChar curr then mkUnexpectedCharError s curr else - mlLiteralStringAuxFn startPos 0 c (s.next' input s.pos h) + mlLiteralStringAuxFn startPos 0 c (s.next' c s.pos h) public def mlLiteralStringFn : ParserFn := usePosFn fun startPos => atomicFn (repeatFn 3 (chFn '\'' ["multi-line literal string"])) >> mlLiteralStringAuxFn startPos 0 partial def mlBasicStringAuxFn (startPos : String.Pos) (quoteDepth : Nat) : ParserFn := fun c s => - let input := c.input - if h : input.atEnd s.pos then + if h : c.atEnd s.pos then if quoteDepth ≥ 3 then s else s.mkUnexpectedErrorAt "unterminated multi-line basic string" startPos else - let curr := input.get' s.pos h + let curr := c.get' s.pos h if curr == '\"' then - let s := s.next' input s.pos h + let s := s.next' c s.pos h if quoteDepth ≥ 5 then s.mkUnexpectedError "too many quotes" else @@ -222,15 +213,15 @@ partial def mlBasicStringAuxFn (startPos : String.Pos) (quoteDepth : Nat) : Pars else if quoteDepth ≥ 3 then s else if curr == '\n' then - mlBasicStringAuxFn startPos 0 c (s.next' input s.pos h) + mlBasicStringAuxFn startPos 0 c (s.next' c s.pos h) else if curr == '\r' then - (crlfAuxFn >> mlBasicStringAuxFn startPos 0) c (s.next' input s.pos h) + (crlfAuxFn >> mlBasicStringAuxFn startPos 0) c (s.next' c s.pos h) else if curr == '\\' then - (escapeSeqFn true >> mlBasicStringAuxFn startPos 0) c (s.next' input s.pos h) + (escapeSeqFn true >> mlBasicStringAuxFn startPos 0) c (s.next' c s.pos h) else if isControlChar curr then mkUnexpectedCharError s curr else - mlBasicStringAuxFn startPos 0 c (s.next' input s.pos h) + mlBasicStringAuxFn startPos 0 c (s.next' c s.pos h) public def mlBasicStringFn : ParserFn := usePosFn fun startPos => atomicFn (repeatFn 3 (chFn '\"' ["multi-line basic string"])) >> @@ -244,19 +235,18 @@ def hourMinFn : ParserFn := digitPairFn ["hour digit"] >> chFn ':' >> digitPairFn ["minute digit"] def timeTailFn (allowOffset : Bool) : ParserFn := fun c s => - let input := c.input - if h : input.atEnd s.pos then + if h : c.atEnd s.pos then s else - let curr := input.get' s.pos h + let curr := c.get' s.pos h if curr = '.' then - let s := s.next' input s.pos h + let s := s.next' c s.pos h let s := takeWhile1Fn (·.isDigit) ["millisecond"] c s if s.hasError then s else - if h : input.atEnd s.pos then s else - timeOffsetFn (input.get' s.pos h) (input.next' s.pos h) c s + if h : c.atEnd s.pos then s else + timeOffsetFn (c.get' s.pos h) (c.next' s.pos h) c s else - timeOffsetFn curr (input.next' s.pos h) c s + timeOffsetFn curr (c.next' s.pos h) c s where @[inline] timeOffsetFn curr nextPos c s := if curr == 'Z' || curr == 'z' then @@ -277,15 +267,14 @@ public def timeFn (allowOffset := false) : ParserFn := def optTimeFn : ParserFn := fun c s => let i := s.pos - let input := c.input - if h : input.atEnd i then + if h : c.atEnd i then s else - let curr := input.get' i h + let curr := c.get' i h if curr = 'T' || curr = 't' then - timeFn true c (s.next' input i h) + timeFn true c (s.next' c i h) else if curr = ' ' then - let tPos := input.next' i h + let tPos := c.next' i h let s := timeFn true c (s.setPos tPos) if s.hasError && s.pos == tPos then s.restore (s.stackSize-1) i else s else @@ -299,29 +288,27 @@ public def dateTimeFn : ParserFn := repeatFn 4 (digitFn ["year digit"]) >> chFn '-' >> dateTimeAuxFn def decExpFn : ParserFn := fun c s => - let input := c.input let expected := ["decimal exponent"] - if h : input.atEnd s.pos then + if h : c.atEnd s.pos then s.mkEOIError expected else - let curr := input.get' s.pos h + let curr := c.get' s.pos h if curr = '-' || curr == '+' then - let s := s.next' input s.pos h + let s := s.next' c s.pos h sepByChar1Fn (·.isDigit) '_' expected c s else if curr.isDigit then - let s := s.next' input s.pos h + let s := s.next' c s.pos h sepByChar1AuxFn (·.isDigit) '_' expected c s else mkUnexpectedCharError s curr expected def optDecExpFn : ParserFn := fun c s => - let input := c.input - if h : input.atEnd s.pos then + if h : c.atEnd s.pos then s else - let curr := input.get' s.pos h + let curr := c.get' s.pos h if curr == 'e' || curr == 'E' then - decExpFn c (s.next' input s.pos h) + decExpFn c (s.next' c s.pos h) else s @@ -342,11 +329,10 @@ def decNumberTailAuxFn (startPos : String.Pos) (curr : Char) (nextPos : String.P pushLit `Lake.Toml.decInt startPos skipFn c s def decNumberTailFn (startPos : String.Pos) : ParserFn := fun c s => - let input := c.input - if h : input.atEnd s.pos then + if h : c.atEnd s.pos then pushLit `Lake.Toml.decInt startPos skipFn c s else - decNumberTailAuxFn startPos (input.get' s.pos h) (input.next' s.pos h) c s + decNumberTailAuxFn startPos (c.get' s.pos h) (c.next' s.pos h) c s mutual @@ -358,27 +344,25 @@ partial def decNumberSepFn (startPos : String.Pos) (curr : Char) (nextPos : Stri decNumberTailAuxFn startPos curr nextPos c s partial def decNumberAuxFn (startPos : String.Pos) : ParserFn := fun c s => - let input := c.input - if h : input.atEnd s.pos then + if h : c.atEnd s.pos then pushLit `Lake.Toml.decInt startPos skipFn c s else - let curr := input.get' s.pos h + let curr := c.get' s.pos h if curr.isDigit then - let s := s.next' input s.pos h + let s := s.next' c s.pos h decNumberAuxFn startPos c s else - decNumberSepFn startPos curr (input.next' s.pos h) c s + decNumberSepFn startPos curr (c.next' s.pos h) c s partial def decNumberFn (startPos : String.Pos) : ParserFn := fun c s => let i := s.pos - let input := c.input let expected := ["decimal integer", "float"] - if h : input.atEnd i then + if h : c.atEnd i then s.mkEOIError expected else - let curr := input.get' i h + let curr := c.get' i h if curr.isDigit then - let s := s.next' input i h + let s := s.next' c i h decNumberAuxFn startPos c s else mkUnexpectedCharError s curr expected @@ -392,37 +376,35 @@ def nanAuxFn (startPos : String.Pos) : ParserFn := strFn "an" ["'nan'"] >> pushLit `Lake.Toml.float startPos def decimalFn (startPos : String.Pos) : ParserFn := fun c s => - let input := c.input let expected := ["decimal integer", "float"] - if h : input.atEnd s.pos then + if h : c.atEnd s.pos then s.mkEOIError expected else - let curr := input.get' s.pos h + let curr := c.get' s.pos h if curr == '0' then - decNumberTailFn startPos c (s.next' input s.pos h) + decNumberTailFn startPos c (s.next' c s.pos h) else if curr.isDigit then - decNumberAuxFn startPos c (s.next' input s.pos h) + decNumberAuxFn startPos c (s.next' c s.pos h) else if curr == 'i' then - infAuxFn startPos c (s.next' input s.pos h) + infAuxFn startPos c (s.next' c s.pos h) else if curr == 'n' then - nanAuxFn startPos c (s.next' input s.pos h) + nanAuxFn startPos c (s.next' c s.pos h) else mkUnexpectedCharError s curr expected def decNumeralAuxFn (startPos : String.Pos) : ParserFn := fun c s => - let input := c.input - if h : input.atEnd s.pos then + if h : c.atEnd s.pos then s.mkEOIError ["decimal integer", "float", "date-time"] else -- `NN` - let curr := input.get' s.pos h - let nextPos := input.next' s.pos h + let curr := c.get' s.pos h + let nextPos := c.next' s.pos h if curr.isDigit then let s := s.setPos nextPos - if h : input.atEnd s.pos then + if h : c.atEnd s.pos then pushLit `Lake.Toml.decInt startPos skipFn c s else - let curr := input.get' s.pos h - let nextPos := input.next' s.pos h + let curr := c.get' s.pos h + let nextPos := c.next' s.pos h if curr == ':' then -- `HH:` let s := s.setPos nextPos let s := timeAuxFn false c s @@ -430,18 +412,18 @@ def decNumeralAuxFn (startPos : String.Pos) : ParserFn := fun c s => pushLit `Lake.Toml.dateTime startPos skipFn c s else if curr.isDigit then -- `NNN` let s := s.setPos nextPos - if h : input.atEnd s.pos then + if h : c.atEnd s.pos then pushLit `Lake.Toml.decInt startPos skipFn c s else - let curr := input.get' s.pos h - let nextPos := input.next' s.pos h + let curr := c.get' s.pos h + let nextPos := c.next' s.pos h if curr.isDigit then -- `NNNN` let s := s.setPos nextPos - if h : input.atEnd nextPos then + if h : c.atEnd nextPos then pushLit `Lake.Toml.decInt startPos skipFn c s else - let curr := input.get' s.pos h - let nextPos := input.next' s.pos h + let curr := c.get' s.pos h + let nextPos := c.next' s.pos h if curr == '-' then -- `YYYY-` let s := s.setPos nextPos let s := dateTimeAuxFn c s @@ -460,49 +442,48 @@ def decNumeralAuxFn (startPos : String.Pos) : ParserFn := fun c s => decNumberSepFn startPos curr nextPos c s public def numeralFn : ParserFn := atomicFn fun c s => - let input := c.input let startPos := s.pos let expected := ["integer", "float", "date-time"] - if h : input.atEnd s.pos then + if h : c.atEnd s.pos then s.mkEOIError expected else - let curr := input.get' s.pos h + let curr := c.get' s.pos h if curr == '0' then - let s := s.next' input startPos h - if h : input.atEnd s.pos then + let s := s.next' c startPos h + if h : c.atEnd s.pos then pushLit `Lake.Toml.decInt startPos skipFn c s else - let curr := input.get' s.pos h + let curr := c.get' s.pos h if curr == 'b' then - let s := s.next' input s.pos h + let s := s.next' c s.pos h let s := sepByChar1Fn isBinDigit '_' ["binary integer"] c s if s.hasError then s else pushLit `Lake.Toml.binNum startPos skipFn c s else if curr == 'o' then - let s := s.next' input s.pos h + let s := s.next' c s.pos h let s := sepByChar1Fn isOctDigit '_' ["octal integer"] c s if s.hasError then s else pushLit `Lake.Toml.octNum startPos skipFn c s else if curr == 'x' then - let s := s.next' input s.pos h + let s := s.next' c s.pos h let s := sepByChar1Fn isHexDigit '_' ["hexadecimal integer"] c s if s.hasError then s else pushLit `Lake.Toml.hexNum startPos skipFn c s else if curr.isDigit then - let s := s.next' input s.pos h + let s := s.next' c s.pos h let s := (chFn ':' >> timeAuxFn false) c s if s.hasError then s else pushLit `Lake.Toml.dateTime startPos skipFn c s else - decNumberTailAuxFn startPos curr (input.next' s.pos h) c s + decNumberTailAuxFn startPos curr (c.next' s.pos h) c s else if curr.isDigit then - decNumeralAuxFn startPos c (s.next' input s.pos h) + decNumeralAuxFn startPos c (s.next' c s.pos h) else if curr == '+' || curr == '-' then - decimalFn startPos c (s.next' input s.pos h) + decimalFn startPos c (s.next' c s.pos h) else if curr == 'i' then - infAuxFn startPos c (s.next' input s.pos h) + infAuxFn startPos c (s.next' c s.pos h) else if curr == 'n' then - nanAuxFn startPos c (s.next' input s.pos h) + nanAuxFn startPos c (s.next' c s.pos h) else s.mkUnexpectedError s!"unexpected '{curr}'" expected diff --git a/src/lake/Lake/Toml/Load.lean b/src/lake/Lake/Toml/Load.lean index d1ef8e859b..7d32d5d447 100644 --- a/src/lake/Lake/Toml/Load.lean +++ b/src/lake/Lake/Toml/Load.lean @@ -24,10 +24,10 @@ public def loadToml (ictx : InputContext) : EIO MessageLog Table := do | .ok env => pure env | .error e => throw <| MessageLog.empty.add <| mkMessageNoPos ictx <| m!"failed to initialize TOML environment: {e}" - let s := toml.fn.run ictx { env, options := {} } {} (mkParserState ictx.input) + let s := toml.fn.run ictx { env, options := {} } {} (mkParserState ictx.inputString) if let some errorMsg := s.errorMsg then throw <| MessageLog.empty.add <| mkParserErrorMessage ictx s errorMsg - else if ictx.input.atEnd s.pos then + else if ictx.atEnd s.pos then let act := elabToml ⟨s.stxStack.back⟩ match (← act.run {fileName := ictx.fileName, fileMap := ictx.fileMap} {env} |>.toBaseIO) with | .ok (t, s) => diff --git a/src/lake/Lake/Toml/ParserUtil.lean b/src/lake/Lake/Toml/ParserUtil.lean index 74be8bdf32..729a55f47c 100644 --- a/src/lake/Lake/Toml/ParserUtil.lean +++ b/src/lake/Lake/Toml/ParserUtil.lean @@ -59,12 +59,12 @@ public def mkUnexpectedCharError (s : ParserState) (c : Char) (expected : List @[inline] public def satisfyFn (p : Char → Bool) (expected : List String := []) : ParserFn := fun c s => let i := s.pos - if h : c.input.atEnd i then + if h : c.atEnd i then s.mkEOIError expected else - let curr := c.input.get' i h + let curr := c.get' i h if p curr then - s.next' c.input i h + s.next' c i h else mkUnexpectedCharError s curr expected @@ -98,26 +98,24 @@ mutual public partial def sepByChar1AuxFn (p : Char → Bool) (sep : Char) (expected : List String := []) : ParserFn := fun c s => let i := s.pos - let input := c.input - if h : input.atEnd i then + if h : c.atEnd i then s else - let curr := input.get' i h + let curr := c.get' i h if p curr then - sepByChar1AuxFn p sep expected c (s.next' input i h) + sepByChar1AuxFn p sep expected c (s.next' c i h) else if curr == sep then - sepByChar1Fn p sep expected c (s.next' input i h) + sepByChar1Fn p sep expected c (s.next' c i h) else s public partial def sepByChar1Fn (p : Char → Bool) (sep : Char) (expected : List String := []) : ParserFn := fun c s => let i := s.pos - let input := c.input - if h : input.atEnd i then + if h : c.atEnd i then s else - let curr := input.get' i h - let s := s.next' input i h + let curr := c.get' i h + let s := s.next' c i h if p curr then sepByChar1AuxFn p sep expected c s else if curr == sep then @@ -129,13 +127,12 @@ end /-- Push a new atom onto the syntax stack. -/ public def pushAtom (startPos : String.Pos) (trailingFn := skipFn) : ParserFn := fun c s => - let input := c.input let stopPos := s.pos - let leading := mkEmptySubstringAt input startPos - let val := input.extract startPos stopPos + let leading := c.mkEmptySubstringAt startPos + let val := c.extract startPos stopPos let s := trailingFn c s let stopPos' := s.pos - let trailing := { str := input, startPos := stopPos, stopPos := stopPos' : Substring } + let trailing := c.substring (startPos := stopPos) (stopPos := stopPos') let atom := mkAtom (SourceInfo.original leading startPos trailing (startPos + val)) val s.pushSyntax atom @@ -188,13 +185,12 @@ public def strAtom.parenthesizer (_ : String) (_ : List String) (_ : ParserFn) /-- Push `(Syntax.node kind )` onto the syntax stack. -/ public def pushLit (kind : SyntaxNodeKind) (startPos : String.Pos) (trailingFn := skipFn) : ParserFn := fun c s => - let input := c.input let stopPos := s.pos - let leading := mkEmptySubstringAt input startPos - let val := input.extract startPos stopPos + let leading := c.mkEmptySubstringAt startPos + let val := c.extract startPos stopPos let s := trailingFn c s let wsStopPos := s.pos - let trailing := { str := input, startPos := stopPos, stopPos := wsStopPos : Substring } + let trailing := c.substring (startPos := stopPos) (stopPos := wsStopPos) let info := SourceInfo.original leading startPos trailing stopPos s.pushSyntax (Syntax.mkLit kind val info) diff --git a/tests/lean/run/parseEnd.lean b/tests/lean/run/parseEnd.lean new file mode 100644 index 0000000000..946770d828 --- /dev/null +++ b/tests/lean/run/parseEnd.lean @@ -0,0 +1,59 @@ +import Lean.Elab.Command + +open Lean Elab Command Parser + +declare_syntax_cat balanced +syntax "!" : balanced +syntax "(" balanced* ")" : balanced + + +elab d:docComment "test" : command => do + let some ⟨pos, endPos⟩ := d.raw[1].getRange? (canonicalOnly := true) + | throwErrorAt d "Docstring doesn't have a canonical position" + let p := Parser.categoryParser `balanced 0 + let p := andthenFn whitespace p.fn + let text ← getFileMap + let input := text.source + + let endPos := input.prev <| input.prev endPos + + if h : endPos ≤ input.endPos then + let ictx := mkInputContext input (← getFileName) (endPos := endPos) (endPos_valid := h) + let env ← getEnv + let s := { mkParserState input with pos } + let s := p.run ictx { env, options := {} } (getTokenTable env) s + if !s.allErrors.isEmpty then + for (pos, _, err) in s.allErrors do + logMessage { + fileName := (← getFileName ) + pos := text.toPosition pos + endPos := some (text.toPosition endPos) + data := s!"{err.toString}" + } + else if ictx.atEnd s.pos then + logInfo s.stxStack.back + else + let pos := s.pos + logMessage { + fileName := (← getFileName ) + pos := text.toPosition pos + endPos := some (text.toPosition endPos) + data := s!"expected end of input" + } + else + throwError "Out of bounds" + + +/-- info: (!!(!)) -/ +#guard_msgs in +/-- +( ! ! (!) ) +-/ +test + +/-- error: unexpected end of input; expected ')' -/ +#guard_msgs in +/-- +( +-/ +test