feat: add a stop position field to the parser (#10043)

This PR allows Lean's parser to run with a final position prior to the
end of the string, so it can be invoked on a sub-region of the input.

This has applications in Verso proper, which parses Lean syntax in
contexts such as code blocks and docstrings, and it is a prerequisite to
parsing the contents of Lean docstrings.
This commit is contained in:
David Thrane Christiansen 2025-08-23 20:29:51 +02:00 committed by GitHub
parent 0d9b7fb6b8
commit c9727c2d19
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
13 changed files with 521 additions and 372 deletions

View file

@ -166,14 +166,16 @@ partial def findPrefix (t : Trie α) (pre : String) : Array α := go t 0
/-- Find the longest _key_ in the trie that is contained in the given string `s` at position `i`,
and return the associated value. -/
partial def matchPrefix (s : String) (t : Trie α) (i : String.Pos) : Option α :=
partial def matchPrefix (s : String) (t : Trie α) (i : String.Pos)
(endByte := s.utf8ByteSize)
(endByte_valid : endByte ≤ s.utf8ByteSize := by simp) : Option α :=
let rec loop
| leaf v, _, res =>
if v.isSome then v else res
| node1 v c' t', i, res =>
let res := if v.isSome then v else res
if h : i < s.utf8ByteSize then
let c := s.getUtf8Byte i h
if h : i < endByte then
let c := s.getUtf8Byte i (by omega)
if c == c'
then loop t' (i + 1) res
else res
@ -181,8 +183,8 @@ partial def matchPrefix (s : String) (t : Trie α) (i : String.Pos) : Option α
res
| node v cs ts, i, res =>
let res := if v.isSome then v else res
if h : i < s.utf8ByteSize then
let c := s.getUtf8Byte i h
if h : i < endByte then
let c := s.getUtf8Byte i (by omega)
match cs.findIdx? (· == c) with
| none => res
| some idx => loop ts[idx]! (i + 1) res

View file

@ -260,7 +260,7 @@ ruled out.
def LeanProcessingM.run (act : LeanProcessingM α) (oldInputCtx? : Option InputContext) :
ProcessingM α := do
-- compute position of syntactic change once
let firstDiffPos? := oldInputCtx?.map (·.input.firstDiffPos (← read).input)
let firstDiffPos? := oldInputCtx?.map (·.inputString.firstDiffPos (← read).inputString)
ReaderT.adapt ({ · with firstDiffPos? }) act
/--
@ -392,7 +392,7 @@ where
-- they are passed separately from `old`
if let some oldSuccess := old.result? then
-- make sure to update ranges of all reused tasks
let progressRange? := .some ⟨newParserState.pos, ctx.input.endPos⟩
let progressRange? := .some ⟨newParserState.pos, ctx.endPos⟩
return {
ictx
stx := newStx
@ -478,7 +478,7 @@ where
processHeader (stx : HeaderSyntax) (parserState : Parser.ModuleParserState) :
LeanProcessingM (SnapshotTask HeaderProcessedSnapshot) := do
let ctx ← read
SnapshotTask.ofIO none none (.some ⟨0, ctx.input.endPos⟩) <|
SnapshotTask.ofIO none none (.some ⟨0, ctx.endPos⟩) <|
ReaderT.run (r := ctx) <| -- re-enter reader in new task
withHeaderExceptions (α := HeaderProcessedSnapshot) ({ · with result? := none, metaSnap := default }) do
let setup ← match (← setupImports stx) with
@ -569,7 +569,7 @@ where
parseCmd oldNext newParserState oldResult.cmdState newProm sync cancelTk ctx
prom.resolve <| { old with nextCmdSnap? := some {
stx? := none
reportingRange := .some ⟨newParserState.pos, ctx.input.endPos⟩
reportingRange := .some ⟨newParserState.pos, ctx.endPos⟩
task := newProm.result!
cancelTk? := cancelTk
} }
@ -651,7 +651,7 @@ where
else some <$> IO.Promise.new
let nextCmdSnap? := next?.map ({
stx? := none
reportingRange := .some ⟨parserState.pos, ctx.input.endPos⟩
reportingRange := .some ⟨parserState.pos, ctx.endPos⟩
cancelTk? := parseCancelTk
task := ·.result!
})

View file

@ -143,7 +143,7 @@ def errorAtSavedPosFn (msg : String) (delta : Bool) : ParserFn := fun c s =>
match c.savedPos? with
| none => s
| some pos =>
let pos := if delta then c.input.next pos else pos
let pos := if delta then c.next pos else pos
s.mkUnexpectedErrorAt msg pos
/-- Generate an error at the position saved with the `withPosition` combinator.
@ -510,15 +510,15 @@ def many1Unbox (p : Parser) : Parser :=
partial def satisfyFn (p : Char → Bool) (errorMsg : String := "unexpected character") : ParserFn := fun c s =>
let i := s.pos
if h : c.input.atEnd i then s.mkEOIError
else if p (c.input.get' i h) then s.next' c.input i h
if h : c.atEnd i then s.mkEOIError
else if p (c.get' i h) then s.next' c i h
else s.mkUnexpectedError errorMsg
partial def takeUntilFn (p : Char → Bool) : ParserFn := fun c s =>
let i := s.pos
if h : c.input.atEnd i then s
else if p (c.input.get' i h) then s
else takeUntilFn p c (s.next' c.input i h)
if h : c.atEnd i then s
else if p (c.get' i h) then s
else takeUntilFn p c (s.next' c i h)
def takeWhileFn (p : Char → Bool) : ParserFn :=
takeUntilFn (fun c => !p c)
@ -528,26 +528,25 @@ def takeWhile1Fn (p : Char → Bool) (errorMsg : String) : ParserFn :=
variable (pushMissingOnError : Bool) in
partial def finishCommentBlock (nesting : Nat) : ParserFn := fun c s =>
let input := c.input
let i := s.pos
if h : input.atEnd i then eoi s
let i := s.pos
if h : c.atEnd i then eoi s
else
let curr := input.get' i h
let i := input.next' i h
let curr := c.get' i h
let i := c.next' i h
if curr == '-' then
if h : input.atEnd i then eoi s
if h : c.atEnd i then eoi s
else
let curr := input.get' i h
let curr := c.get' i h
if curr == '/' then -- "-/" end of comment
if nesting == 1 then s.next' input i h
else finishCommentBlock (nesting-1) c (s.next' input i h)
if nesting == 1 then s.next' c i h
else finishCommentBlock (nesting-1) c (s.next' c i h)
else
finishCommentBlock nesting c (s.setPos i)
else if curr == '/' then
if h : input.atEnd i then eoi s
if h : c.atEnd i then eoi s
else
let curr := input.get' i h
if curr == '-' then finishCommentBlock (nesting+1) c (s.next' input i h)
let curr := c.get' i h
if curr == '-' then finishCommentBlock (nesting+1) c (s.next' c i h)
else finishCommentBlock nesting c (s.setPos i)
else finishCommentBlock nesting c (s.setPos i)
where
@ -555,49 +554,46 @@ where
/-- Consume whitespace and comments -/
partial def whitespace : ParserFn := fun c s =>
let input := c.input
let i := s.pos
if h : input.atEnd i then s
let i := s.pos
if h : c.atEnd i then s
else
let curr := input.get' i h
let curr := c.get' i h
if curr == '\t' then
s.mkUnexpectedError (pushMissing := false) "tabs are not allowed; please configure your editor to expand them"
else if curr == '\r' then
s.mkUnexpectedError (pushMissing := false) "isolated carriage returns are not allowed"
else if curr.isWhitespace then whitespace c (s.next' input i h)
else if curr.isWhitespace then whitespace c (s.next' c i h)
else if curr == '-' then
let i := input.next' i h
let curr := input.get i
if curr == '-' then andthenFn (takeUntilFn (fun c => c = '\n')) whitespace c (s.next input i)
let i := c.next' i h
let curr := c.get i
if curr == '-' then andthenFn (takeUntilFn (fun c => c = '\n')) whitespace c (s.next c i)
else s
else if curr == '/' then
let i := input.next' i h
let curr := input.get i
let i := c.next' i h
let curr := c.get i
if curr == '-' then
let i := input.next i
let curr := input.get i
let i := c.next i
let curr := c.get i
if curr == '-' || curr == '!' then s -- "/--" and "/-!" doc comment are actual tokens
else andthenFn (finishCommentBlock (pushMissingOnError := false) 1) whitespace c (s.next input i)
else andthenFn (finishCommentBlock (pushMissingOnError := false) 1) whitespace c (s.next c i)
else s
else s
def mkEmptySubstringAt (s : String) (p : String.Pos) : Substring := {
str := s, startPos := p, stopPos := p
}
def ParserContext.mkEmptySubstringAt (c : ParserContext) (p : String.Pos) : Substring :=
c.substring p p
private def rawAux (startPos : String.Pos) (trailingWs : Bool) : ParserFn := fun c s =>
let input := c.input
let stopPos := s.pos
let leading := mkEmptySubstringAt input startPos
let val := input.extract startPos stopPos
let leading := c.mkEmptySubstringAt startPos
let val := c.extract startPos stopPos
if trailingWs then
let s := whitespace c s
let stopPos' := s.pos
let trailing := { str := input, startPos := stopPos, stopPos := stopPos' : Substring }
let trailing := c.substring (startPos := stopPos) (stopPos := stopPos')
let atom := mkAtom (SourceInfo.original leading startPos trailing (startPos + val)) val
s.pushSyntax atom
else
let trailing := mkEmptySubstringAt input stopPos
let trailing := c.mkEmptySubstringAt stopPos
let atom := mkAtom (SourceInfo.original leading startPos trailing (startPos + val)) val
s.pushSyntax atom
@ -615,12 +611,11 @@ def rawCh (c : Char) (trailingWs := false) : Parser := {
}
def hexDigitFn : ParserFn := fun c s =>
let input := c.input
let i := s.pos
if h : input.atEnd i then s.mkEOIError
let i := s.pos
if h : c.atEnd i then s.mkEOIError
else
let curr := input.get' i h
let i := input.next' i h
let curr := c.get' i h
let i := c.next' i h
if curr.isDigit || ('a' <= curr && curr <= 'f') || ('A' <= curr && curr <= 'F') then s.setPos i
else s.mkUnexpectedError "invalid hexadecimal numeral"
@ -630,17 +625,17 @@ Raises an error if the whitespace does not contain exactly one newline character
-/
partial def stringGapFn (seenNewline : Bool) : ParserFn := fun c s =>
let i := s.pos
if h : c.input.atEnd i then s -- let strLitFnAux handle the EOI error if !seenNewline
if h : c.atEnd i then s -- let strLitFnAux handle the EOI error if !seenNewline
else
let curr := c.input.get' i h
let curr := c.get' i h
if curr == '\n' then
if seenNewline then
-- Having more than one newline in a string gap is visually confusing
s.mkUnexpectedError "unexpected additional newline in string gap"
else
stringGapFn true c (s.next' c.input i h)
stringGapFn true c (s.next' c i h)
else if curr.isWhitespace then
stringGapFn seenNewline c (s.next' c.input i h)
stringGapFn seenNewline c (s.next' c i h)
else if seenNewline then
s
else
@ -653,17 +648,16 @@ Parses a string quotation after a `\`.
in particular `"\" newline whitespace*` gaps.
-/
def quotedCharCoreFn (isQuotable : Char → Bool) (inString : Bool) : ParserFn := fun c s =>
let input := c.input
let i := s.pos
if h : input.atEnd i then s.mkEOIError
let i := s.pos
if h : c.atEnd i then s.mkEOIError
else
let curr := input.get' i h
let curr := c.get' i h
if isQuotable curr then
s.next' input i h
s.next' c i h
else if curr == 'x' then
andthenFn hexDigitFn hexDigitFn c (s.next' input i h)
andthenFn hexDigitFn hexDigitFn c (s.next' c i h)
else if curr == 'u' then
andthenFn hexDigitFn (andthenFn hexDigitFn (andthenFn hexDigitFn hexDigitFn)) c (s.next' input i h)
andthenFn hexDigitFn (andthenFn hexDigitFn (andthenFn hexDigitFn hexDigitFn)) c (s.next' c i h)
else if inString && curr == '\n' then
stringGapFn false c s
else
@ -686,39 +680,36 @@ def quotedStringFn : ParserFn :=
def mkNodeToken (n : SyntaxNodeKind) (startPos : String.Pos) : ParserFn := fun c s => Id.run do
if s.hasError then
return s
let input := c.input
let stopPos := s.pos
let leading := mkEmptySubstringAt input startPos
let val := input.extract startPos stopPos
let leading := c.mkEmptySubstringAt startPos
let val := c.extract startPos stopPos
let s := whitespace c s
let wsStopPos := s.pos
let trailing := { str := input, startPos := stopPos, stopPos := wsStopPos : Substring }
let trailing := c.substring (startPos := stopPos) (stopPos := wsStopPos)
let info := SourceInfo.original leading startPos trailing stopPos
s.pushSyntax (Syntax.mkLit n val info)
def charLitFnAux (startPos : String.Pos) : ParserFn := fun c s =>
let input := c.input
let i := s.pos
if h : input.atEnd i then s.mkEOIError
let i := s.pos
if h : c.atEnd i then s.mkEOIError
else
let curr := input.get' i h
let s := s.setPos (input.next' i h)
let curr := c.get' i h
let s := s.setPos (c.next' i h)
let s := if curr == '\\' then quotedCharFn c s else s
if s.hasError then s
else
let i := s.pos
let curr := input.get i
let s := s.setPos (input.next i)
let curr := c.get i
let s := s.setPos (c.next i)
if curr == '\'' then mkNodeToken charLitKind startPos c s
else s.mkUnexpectedError "missing end of character literal"
partial def strLitFnAux (startPos : String.Pos) : ParserFn := fun c s =>
let input := c.input
let i := s.pos
if h : input.atEnd i then s.mkUnexpectedErrorAt "unterminated string literal" startPos
let i := s.pos
if h : c.atEnd i then s.mkUnexpectedErrorAt "unterminated string literal" startPos
else
let curr := input.get' i h
let s := s.setPos (input.next' i h)
let curr := c.get' i h
let s := s.setPos (c.next' i h)
if curr == '\"' then
mkNodeToken strLitKind startPos c s
else if curr == '\\' then andthenFn quotedStringFn (strLitFnAux startPos) c s
@ -730,12 +721,12 @@ If we are looking at a valid start to a raw string (`r##...#"`),
returns true.
We assume `i` begins at the position immediately after `r`.
-/
partial def isRawStrLitStart (input : String) (i : String.Pos) : Bool :=
if h : input.atEnd i then false
partial def isRawStrLitStart (c : ParserContext) (i : String.Pos) : Bool :=
if h : c.atEnd i then false
else
let curr := input.get' i h
let curr := c.get' i h
if curr == '#' then
isRawStrLitStart input (input.next' i h)
isRawStrLitStart c (c.next' i h)
else
curr == '"'
@ -755,12 +746,11 @@ where
The `num` variable counts the number of `#`s after the `r`.
-/
initState (num : Nat) : ParserFn := fun c s =>
let input := c.input
let i := s.pos
if h : input.atEnd i then errorUnterminated s
let i := s.pos
if h : c.atEnd i then errorUnterminated s
else
let curr := input.get' i h
let s := s.setPos (input.next' i h)
let curr := c.get' i h
let s := s.setPos (c.next' i h)
if curr == '#' then
initState (num + 1) c s
else if curr == '"' then
@ -773,12 +763,11 @@ where
the raw string literal, we switch to `closingState`.
-/
normalState (num : Nat) : ParserFn := fun c s =>
let input := c.input
let i := s.pos
if h : input.atEnd i then errorUnterminated s
let i := s.pos
if h : c.atEnd i then errorUnterminated s
else
let curr := input.get' i h
let s := s.setPos (input.next' i h)
let curr := c.get' i h
let s := s.setPos (c.next' i h)
if curr == '\"' then
if num == 0 then
mkNodeToken strLitKind startPos c s
@ -792,12 +781,11 @@ where
Note: `num > 0` since the `num = 0` case is entirely handled by `normalState`.
-/
closingState (num : Nat) (closingNum : Nat) : ParserFn := fun c s =>
let input := c.input
let i := s.pos
if h : input.atEnd i then errorUnterminated s
let i := s.pos
if h : c.atEnd i then errorUnterminated s
else
let curr := input.get' i h
let s := s.setPos (input.next' i h)
let curr := c.get' i h
let s := s.setPos (c.next' i h)
if curr == '#' then
if closingNum + 1 == num then
mkNodeToken strLitKind startPos c s
@ -815,30 +803,28 @@ Note: this does not report that it is expecting `_` if we reach EOI or an unexpe
Rationale: this error happens if there is already a `_`, and while sequences of `_` are allowed, it's a bit perverse to suggest extending the sequence.
-/
partial def takeDigitsFn (isDigit : Char → Bool) (expecting : String) (needDigit : Bool) : ParserFn := fun c s =>
let input := c.input
let i := s.pos
if h : input.atEnd i then
let i := s.pos
if h : c.atEnd i then
if needDigit then
s.mkEOIError [expecting]
else
s
else
let curr := input.get' i h
if curr == '_' then takeDigitsFn isDigit expecting true c (s.next' c.input i h)
else if isDigit curr then takeDigitsFn isDigit expecting false c (s.next' c.input i h)
let curr := c.get' i h
if curr == '_' then takeDigitsFn isDigit expecting true c (s.next' c i h)
else if isDigit curr then takeDigitsFn isDigit expecting false c (s.next' c i h)
else if needDigit then s.mkUnexpectedError "unexpected character" (expected := [expecting])
else s
def decimalNumberFn (startPos : String.Pos) (c : ParserContext) : ParserState → ParserState := fun s =>
let s := takeDigitsFn (fun c => c.isDigit) "decimal number" false c s
let input := c.input
let i := s.pos
if h : input.atEnd i then
let s := takeDigitsFn (fun c => c.isDigit) "decimal number" false c s
let i := s.pos
if h : c.atEnd i then
mkNodeToken numLitKind startPos c s
else
let curr := input.get' i h
let j := input.next i
if ∃ hj : ¬ input.atEnd j, curr = '.' && input.get' j hj = '.' then
let curr := c.get' i h
let j := c.next i
if ∃ hj : ¬ c.atEnd j, curr = '.' && c.get' j hj = '.' then
mkNodeToken numLitKind startPos c s
else if curr == '.' || curr == 'e' || curr == 'E' then
parseScientific s
@ -851,12 +837,11 @@ where
mkNodeToken scientificLitKind startPos c s
parseOptDot s :=
let input := c.input
let i := s.pos
let curr := input.get i
let curr := c.get i
if curr == '.' then
let i := input.next i
let curr := input.get i
let i := c.next i
let curr := c.get i
if curr.isDigit then
takeDigitsFn (fun c => c.isDigit) "decimal number" false c (s.setPos i)
else
@ -865,13 +850,12 @@ where
s
parseOptExp s :=
let input := c.input
let i := s.pos
let curr := input.get i
let curr := c.get i
if curr == 'e' || curr == 'E' then
let i := input.next i
let i := if input.get i == '-' || input.get i == '+' then input.next i else i
let curr := input.get i
let i := c.next i
let i := if c.get i == '-' || c.get i == '+' then c.next i else i
let curr := c.get i
if curr.isDigit then
takeDigitsFn (fun c => c.isDigit) "decimal number" false c (s.setPos i)
else
@ -892,36 +876,35 @@ def hexNumberFn (startPos : String.Pos) : ParserFn := fun c s =>
mkNodeToken numLitKind startPos c s
def numberFnAux : ParserFn := fun c s =>
let input := c.input
let startPos := s.pos
if h : input.atEnd startPos then s.mkEOIError
if h : c.atEnd startPos then s.mkEOIError
else
let curr := input.get' startPos h
let curr := c.get' startPos h
if curr == '0' then
let i := input.next' startPos h
let curr := input.get i
let i := c.next' startPos h
let curr := c.get i
if curr == 'b' || curr == 'B' then
binNumberFn startPos c (s.next input i)
binNumberFn startPos c (s.next c i)
else if curr == 'o' || curr == 'O' then
octalNumberFn startPos c (s.next input i)
octalNumberFn startPos c (s.next c i)
else if curr == 'x' || curr == 'X' then
hexNumberFn startPos c (s.next input i)
hexNumberFn startPos c (s.next c i)
else
decimalNumberFn startPos c (s.setPos i)
else if curr.isDigit then
decimalNumberFn startPos c (s.next input startPos)
decimalNumberFn startPos c (s.next c startPos)
else
s.mkError "numeral"
def isIdCont : String → ParserState → Bool := fun input s =>
def isIdCont : ParserContext → ParserState → Bool := fun c s =>
let i := s.pos
let curr := input.get i
let curr := c.get i
if curr == '.' then
let i := input.next i
if input.atEnd i then
let i := c.next i
if c.atEnd i then
false
else
let curr := input.get i
let curr := c.get i
isIdFirst curr || isIdBeginEscape curr
else
false
@ -942,13 +925,12 @@ def mkTokenAndFixPos (startPos : String.Pos) (tk : Option Token) : ParserFn := f
if c.forbiddenTk? == some tk then
s.mkErrorAt "forbidden token" startPos
else
let input := c.input
let leading := mkEmptySubstringAt input startPos
let leading := c.mkEmptySubstringAt startPos
let stopPos := startPos + tk
let s := s.setPos stopPos
let s := whitespace c s
let wsStopPos := s.pos
let trailing := { str := input, startPos := stopPos, stopPos := wsStopPos : Substring }
let trailing := c.substring (startPos := stopPos) (stopPos := wsStopPos)
let atom := mkAtom (SourceInfo.original leading startPos trailing stopPos) tk
s.pushSyntax atom
@ -957,45 +939,43 @@ def mkIdResult (startPos : String.Pos) (tk : Option Token) (val : Name) : Parser
if isToken startPos stopPos tk then
mkTokenAndFixPos startPos tk c s
else
let input := c.input
let rawVal := { str := input, startPos := startPos, stopPos := stopPos : Substring }
let rawVal := c.substring startPos stopPos
let s := whitespace c s
let trailingStopPos := s.pos
let leading := mkEmptySubstringAt input startPos
let trailing := { str := input, startPos := stopPos, stopPos := trailingStopPos : Substring }
let leading := c.mkEmptySubstringAt startPos
let trailing := c.substring (startPos := stopPos) (stopPos := trailingStopPos)
let info := SourceInfo.original leading startPos trailing stopPos
let atom := mkIdent info rawVal val
s.pushSyntax atom
partial def identFnAux (startPos : String.Pos) (tk : Option Token) (r : Name) : ParserFn :=
let rec parse (r : Name) (c s) :=
let input := c.input
let i := s.pos
if h : input.atEnd i then
let i := s.pos
if h : c.atEnd i then
s.mkEOIError
else
let curr := input.get' i h
let curr := c.get' i h
if isIdBeginEscape curr then
let startPart := input.next' i h
let startPart := c.next' i h
let s := takeUntilFn isIdEndEscape c (s.setPos startPart)
if h : input.atEnd s.pos then
if h : c.atEnd s.pos then
s.mkUnexpectedErrorAt "unterminated identifier escape" startPart
else
let stopPart := s.pos
let s := s.next' c.input s.pos h
let r := .str r (input.extract startPart stopPart)
if isIdCont input s then
let s := s.next input s.pos
let s := s.next' c s.pos h
let r := .str r (c.extract startPart stopPart)
if isIdCont c s then
let s := s.next c s.pos
parse r c s
else
mkIdResult startPos tk r c s
else if isIdFirst curr then
let startPart := i
let s := takeWhileFn isIdRest c (s.next input i)
let s := takeWhileFn isIdRest c (s.next c i)
let stopPart := s.pos
let r := .str r (input.extract startPart stopPart)
if isIdCont input s then
let s := s.next input s.pos
let r := .str r (c.extract startPart stopPart)
if isIdCont c s then
let s := s.next c s.pos
parse r c s
else
mkIdResult startPos tk r c s
@ -1007,8 +987,7 @@ private def isIdFirstOrBeginEscape (c : Char) : Bool :=
isIdFirst c || isIdBeginEscape c
private def nameLitAux (startPos : String.Pos) : ParserFn := fun c s =>
let input := c.input
let s := identFnAux startPos none .anonymous c (s.next input startPos)
let s := identFnAux startPos none .anonymous c (s.next c startPos)
if s.hasError then
s
else
@ -1020,21 +999,23 @@ private def nameLitAux (startPos : String.Pos) : ParserFn := fun c s =>
| _ => s.mkError "invalid Name literal"
private def tokenFnAux : ParserFn := fun c s =>
let input := c.input
let i := s.pos
let curr := input.get i
let curr := c.get i
if curr == '\"' then
strLitFnAux i c (s.next input i)
else if curr == '\'' && getNext input i != '\'' then
charLitFnAux i c (s.next input i)
strLitFnAux i c (s.next c i)
else if curr == '\'' && c.getNext i != '\'' then
charLitFnAux i c (s.next c i)
else if curr.isDigit then
numberFnAux c s
else if curr == '`' && isIdFirstOrBeginEscape (getNext input i) then
else if curr == '`' && isIdFirstOrBeginEscape (c.getNext i) then
nameLitAux i c s
else if curr == 'r' && isRawStrLitStart input (input.next i) then
rawStrLitFnAux i c (s.next input i)
else if curr == 'r' && isRawStrLitStart c (c.next i) then
rawStrLitFnAux i c (s.next c i)
else
let tk := c.tokens.matchPrefix input i
let tk := c.tokens.matchPrefix c.inputString i c.endPos.byteIdx <| by
let ⟨⟨{inputString, endPos := ⟨endPos⟩, endPos_valid, ..}, _, _, _⟩⟩ := c
rw [String.endPos] at endPos_valid
omega
identFnAux i tk .anonymous c s
private def updateTokenCache (startPos : String.Pos) (s : ParserState) : ParserState :=
@ -1048,9 +1029,8 @@ private def updateTokenCache (startPos : String.Pos) (s : ParserState) : ParserS
| other => other
def tokenFn (expected : List String := []) : ParserFn := fun c s =>
let input := c.input
let i := s.pos
if input.atEnd i then s.mkEOIError expected
let i := s.pos
if c.atEnd i then s.mkEOIError expected
else
let tkc := s.cache.tokenCache
if tkc.startPos == i then
@ -1078,9 +1058,8 @@ def peekToken (c : ParserContext) (s : ParserState) : ParserState × Except Pars
/-- Treat keywords as identifiers. -/
def rawIdentFn : ParserFn := fun c s =>
let input := c.input
let i := s.pos
if input.atEnd i then s.mkEOIError
let i := s.pos
if c.atEnd i then s.mkEOIError
else identFnAux i none .anonymous c s
def satisfySymbolFn (p : String → Bool) (expected : List String) : ParserFn := fun c s => Id.run do
@ -1158,10 +1137,9 @@ partial def strAux (sym : String) (errorMsg : String) (j : String.Pos) :ParserFn
if h₁ : sym.atEnd j then s
else
let i := s.pos
let input := c.input
if h₂ : input.atEnd i then s.mkError errorMsg
else if sym.get' j h₁ != input.get' i h₂ then s.mkError errorMsg
else parse (sym.next' j h₁) c (s.next' input i h₂)
if h₂ : c.atEnd i then s.mkError errorMsg
else if sym.get' j h₁ != c.get' i h₂ then s.mkError errorMsg
else parse (sym.next' j h₁) c (s.next' c i h₂)
parse j
def checkTailWs (prev : Syntax) : Bool :=
@ -1306,7 +1284,6 @@ def identEq (id : Name) : Parser := {
}
def hygieneInfoFn : ParserFn := fun c s => Id.run do
let input := c.input
let finish pos str trailing s :=
-- Builds an actual hygieneInfo node from empty string `str` and trailing whitespace `trailing`.
let info := SourceInfo.original str pos trailing pos
@ -1321,13 +1298,13 @@ def hygieneInfoFn : ParserFn := fun c s => Id.run do
if !s.stxStack.isEmpty then
let prev := s.stxStack.back
if let .original leading pos trailing endPos := prev.getTailInfo then
let str := mkEmptySubstringAt input endPos
let str := c.mkEmptySubstringAt endPos
-- steal the trailing whitespace from the last node and use it for this node
let s := s.popSyntax.pushSyntax <| prev.setTailInfo (.original leading pos str endPos)
return finish endPos str trailing s
-- The stack can be empty if this is either the first token, or if we are in a fresh cache.
-- In that case we just put the hygieneInfo at the current location.
let str := mkEmptySubstringAt input s.pos
let str := c.mkEmptySubstringAt s.pos
finish s.pos str str s
def hygieneInfoNoAntiquot : Parser := {
@ -1577,7 +1554,7 @@ This parser has the same arity as `p` - it just forwards the results of `p`. -/
def eoiFn : ParserFn := fun c s =>
let i := s.pos
if c.input.atEnd i then s
if c.atEnd i then s
else s.mkError "expected end of file"
def eoi : Parser := {
@ -1736,11 +1713,10 @@ def termParser (prec : Nat := 0) : Parser :=
fn := fun c s =>
let prev := s.stxStack.back
if checkTailNoWs prev then
let input := c.input
let i := s.pos
if h : input.atEnd i then s
let i := s.pos
if h : c.atEnd i then s
else
let curr := input.get' i h
let curr := c.get' i h
if curr == ':' then
s.mkUnexpectedError "unexpected ':'"
else s
@ -1775,7 +1751,7 @@ def tokenAntiquotFn : ParserFn := fun c s => Id.run do
def tokenWithAntiquot : Parser → Parser := withFn fun f c s =>
let s := f c s
-- fast check that is false in most cases
if c.input.get s.pos == '%' then
if c.get s.pos == '%' then
tokenAntiquotFn c s
else
s
@ -1811,7 +1787,7 @@ def unicodeSymbol (sym asciiSym : String) : Parser :=
def withAntiquotFn (antiquotP p : ParserFn) (isCatAntiquot := false) : ParserFn := fun c s =>
-- fast check that is false in most cases
if c.input.get s.pos == '$' then
if c.get s.pos == '$' then
-- Do not allow antiquotation choice nodes here as `antiquotP` is the strictly more general
-- antiquotation than any in `p`.
-- If it is a category antiquotation, do not backtrack into the category at all as that would
@ -1954,7 +1930,7 @@ def prattParser (kind : Name) (tables : PrattParsingTables) (behavior : LeadingI
def fieldIdxFn : ParserFn := fun c s =>
let initStackSz := s.stackSize
let iniPos := s.pos
let curr := c.input.get iniPos
let curr := c.get iniPos
if curr.isDigit && curr != '0' then
let s := takeWhileFn (fun c => c.isDigit) c s
mkNodeToken fieldIdxKind iniPos c s

View file

@ -443,12 +443,27 @@ def getSyntaxNodeKinds (env : Environment) : List SyntaxNodeKind :=
def getTokenTable (env : Environment) : TokenTable :=
(parserExtension.getState env).tokens
set_option linter.unusedVariables.funArgs false in
-- Note: `crlfToLf` preserves logical line and column numbers for each character.
def mkInputContext (input : String) (fileName : String) (normalizeLineEndings := true) : InputContext :=
let input' := if normalizeLineEndings then input.crlfToLf else input
{ input := input',
fileName := fileName,
fileMap := input'.toFileMap }
def mkInputContext (input : String) (fileName : String)
(normalizeLineEndings := true)
(endPos := input.endPos)
(endPos_valid : endPos ≤ input.endPos := by simp) :
InputContext :=
let text := FileMap.ofString input
let next := if normalizeLineEndings then
-- Convert the stop position to a line/column position so crlf translation doesn't invalidate it
let endPos' := text.toPosition endPos
let text := FileMap.ofString text.source.crlfToLf
(text, text.ofPosition endPos')
else
(text, endPos)
let text := next.1
let endPos' := next.2
if h : endPos' ≤ text.source.endPos then
.mk text.source fileName (fileMap := text) (endPos := endPos') (endPos_valid := h)
else
.mk text.source fileName (fileMap := text)
def mkParserState (input : String) : ParserState :=
{ cache := initCacheForInput input }
@ -460,7 +475,7 @@ def runParserCategory (env : Environment) (catName : Name) (input : String) (fil
let s := p.run ictx { env, options := {} } (getTokenTable env) (mkParserState input)
if !s.allErrors.isEmpty then
Except.error (s.toErrorMsg ictx)
else if ictx.input.atEnd s.pos then
else if ictx.atEnd s.pos then
Except.ok s.stxStack.back
else
Except.error ((s.mkError "end of input").toErrorMsg ictx)

View file

@ -83,7 +83,7 @@ def parseHeader (inputCtx : InputContext) : IO (TSyntax ``Module.header × Modul
let dummyEnv ← mkEmptyEnvironment
let p := andthenFn whitespace Module.header.fn
let tokens := Module.updateTokens (getTokenTable dummyEnv)
let s := p.run inputCtx { env := dummyEnv, options := {} } tokens (mkParserState inputCtx.input)
let s := p.run inputCtx { env := dummyEnv, options := {} } tokens (mkParserState inputCtx.inputString)
let stx := if s.stxStack.isEmpty then .missing else s.stxStack.back
let mut messages : MessageLog := {}
for (pos, stk, err) in s.allErrors do
@ -125,7 +125,7 @@ def isTerminalCommand (s : Syntax) : Bool :=
s.isOfKind ``Command.exit || s.isOfKind ``Command.import || s.isOfKind ``Command.eoi
private def consumeInput (inputCtx : InputContext) (pmctx : ParserModuleContext) (pos : String.Pos) : String.Pos :=
let s : ParserState := { cache := initCacheForInput inputCtx.input, pos := pos }
let s : ParserState := { cache := initCacheForInput inputCtx.inputString, pos := pos }
let s := tokenFn [] |>.run inputCtx pmctx (getTokenTable pmctx.env) s
match s.errorMsg with
| some _ => pos + ' '
@ -140,12 +140,12 @@ partial def parseCommand (inputCtx : InputContext) (pmctx : ParserModuleContext)
let mut messages := messages
let mut stx := Syntax.missing -- will always be assigned below
repeat
if inputCtx.input.atEnd pos then
if inputCtx.atEnd pos then
stx := mkEOI pos
break
let pos' := pos
let p := andthenFn whitespace topLevelCommandParserFn
let s := p.run inputCtx pmctx (getTokenTable pmctx.env) { cache := initCacheForInput inputCtx.input, pos }
let s := p.run inputCtx pmctx (getTokenTable pmctx.env) { cache := initCacheForInput inputCtx.inputString, pos }
-- save errors from sub-recoveries
for (rpos, rstk, recovered) in s.recoveredErrors do
messages := messages.add <| mkErrorMessage inputCtx rpos rstk recovered

View file

@ -15,16 +15,15 @@ def isQuotableCharForStrInterpolant (c : Char) : Bool :=
c == '{' || isQuotableCharDefault c
partial def interpolatedStrFn (p : ParserFn) : ParserFn := fun c s =>
let input := c.input
let stackSize := s.stackSize
let rec parse (startPos : String.Pos) (c : ParserContext) (s : ParserState) : ParserState :=
let i := s.pos
if input.atEnd i then
let i := s.pos
if c.atEnd i then
let s := s.mkError "unterminated string literal"
s.mkNode interpolatedStrKind stackSize
else
let curr := input.get i
let s := s.setPos (input.next i)
let curr := c.get i
let s := s.setPos (c.next i)
if curr == '\"' then
let s := mkNodeToken interpolatedStrLitKind startPos c s
s.mkNode interpolatedStrKind stackSize
@ -36,9 +35,9 @@ partial def interpolatedStrFn (p : ParserFn) : ParserFn := fun c s =>
if s.hasError then s
else
let i := s.pos
let curr := input.get i
let curr := c.get i
if curr == '}' then
let s := s.setPos (input.next i)
let s := s.setPos (c.next i)
parse i c s
else
let s := s.mkError "'}'"
@ -46,14 +45,14 @@ partial def interpolatedStrFn (p : ParserFn) : ParserFn := fun c s =>
else
parse startPos c s
let startPos := s.pos
if input.atEnd startPos then
if c.atEnd startPos then
s.mkEOIError
else
let curr := input.get s.pos;
let curr := c.get s.pos;
if curr != '\"' then
s.mkError "interpolated string"
else
let s := s.next input startPos
let s := s.next c startPos
parse startPos c s
@[inline] def interpolatedStrNoAntiquot (p : Parser) : Parser := {

View file

@ -48,15 +48,127 @@ def SyntaxNodeKindSet.insert (s : SyntaxNodeKindSet) (k : SyntaxNodeKind) : Synt
Input string and related data. Recall that the `FileMap` is a helper structure for mapping
`String.Pos` in the input string to line/column information. -/
structure InputContext where
input : String
mk' ::
inputString : String
fileName : String
fileMap : FileMap
endPos : String.Pos := input.endPos
endPos_valid : endPos ≤ input.endPos := by simp
fileMap : FileMap
endPos : String.Pos := inputString.endPos
endPos_valid : endPos ≤ inputString.endPos := by simp
instance : Inhabited InputContext where
default := ⟨"", default, default, "".endPos, String.Pos.mk_le_mk.mpr (Nat.le_refl _)⟩
namespace InputContext
def mk
(input fileName : String)
(endPos : String.Pos := input.endPos)
(endPos_valid : endPos ≤ input.endPos := by simp)
(fileMap : FileMap := FileMap.ofString input) :
InputContext where
inputString := input
endPos := endPos
endPos_valid := endPos_valid
fileName := fileName
fileMap := fileMap
@[deprecated "Call `InputContext.get`, `InputContext.get'`, `InputContext.next'`, and `InputContext.atEnd` directly" (since := "2025-08-21")]
def input (c : InputContext) : String := c.inputString.extract 0 c.endPos
/--
Returns `true` if a specified byte position is greater than or equal to the position which points to
the end of the input string. Otherwise, returns `false`.
-/
def atEnd (c : InputContext) (p : String.Pos) : Bool := p ≥ c.endPos
/--
Returns the character at position `p` of the input string. If `p` is not a valid position, returns
the fallback value `(default : Char)`, which is `'A'`, but does not panic.
-/
@[inline]
def get (c : InputContext) (p : String.Pos) : Char := c.inputString.get p
theorem not_atEnd_inputString {c : InputContext} {p : String.Pos} :
¬c.atEnd p → ¬c.inputString.atEnd p := by
intro h
let {inputString, endPos := ⟨e⟩, endPos_valid, ..} := c
cases p
simp only [String.endPos, String.Pos.mk_le_mk] at endPos_valid
simp_all [String.atEnd, InputContext.atEnd, String.Pos.mk_le_mk]
exact Nat.lt_of_lt_of_le h endPos_valid
/--
Returns the character at position `p` of the input string. Returns `(default : Char)`, which is
`'A'`, if `p` is not a valid position.
Requires evidence, `h`, that `p` is within bounds instead of performing a run-time bounds check as
in `InputContext.get`.
A typical pattern combines `get'` with a dependent `if`-expression to avoid the overhead of an
additional bounds check. For example:
```
def getInBounds? (s : String) (p : String.Pos) : Option Char :=
if h : s.atEnd p then none else some (s.get' p h)
```
Even with evidence of `¬ s.atEnd p`, `p` may be invalid if a byte index points into the middle of a
multi-byte UTF-8 character.
-/
@[inline]
def get' (c : InputContext) (p : String.Pos) (h : ¬c.atEnd p) : Char :=
c.inputString.get' p (c.not_atEnd_inputString h)
/--
Returns the next position in the input string after position `p`. If `p` is not a valid position or
`p = c.endPos`, returns the position one byte after `p`.
A run-time bounds check is performed to determine whether `p` is at the end of the string. If a
bounds check has already been performed, use `InputContext.next'` to avoid a repeated check.
-/
@[inline]
def next (c : InputContext) (p : String.Pos) : String.Pos :=
c.inputString.next p
/--
Returns the next position in the input string after position `p`. The result is unspecified if `p`
is not a valid position.
Requires evidence, `h`, that `p` is within bounds. No run-time bounds check is performed.
-/
@[inline]
def next' (c : InputContext) (p : String.Pos) (h : ¬c.atEnd p) : String.Pos :=
c.inputString.next' p (c.not_atEnd_inputString h)
/--
Creates a new string that consists of the region of the input string delimited by the two positions.
The result is `""` if the start position is greater than or equal to the end position or if the
start position is at the end of the string. If either position is invalid (that is, if either points
at the middle of a multi-byte UTF-8 character) then the result is unspecified.
-/
@[inline]
def extract (c : InputContext) : String.Pos → String.Pos → String :=
c.inputString.extract
/--
Extracts a substring of the input string, bounded by `startPos` and `stopPos`.
-/
@[inline]
def substring (c : InputContext) (startPos stopPos : String.Pos) : Substring :=
{ str := c.inputString, startPos, stopPos := min stopPos c.endPos }
/-- Return character after position `pos` -/
@[inline]
def getNext (input : InputContext) (pos : String.Pos) : Char :=
input.get (input.next pos)
/-- Returns the character position prior to `pos` -/
@[inline]
def prev (c : InputContext) (pos : String.Pos) : String.Pos :=
c.inputString.prev pos
end InputContext
/-- Input context derived from elaboration of previous commands. -/
structure ParserModuleContext where
env : Environment
@ -82,6 +194,16 @@ structure ParserContextCore extends InputContext, ParserModuleContext, Cacheable
/-- Opaque parser context updateable using `adaptCacheableContextFn` and `adaptUncacheableContextFn`. -/
structure ParserContext extends ParserContextCore where private mk ::
instance : Coe ParserContext InputContext := ⟨(·.toInputContext)⟩
/--
Modifies the ending position of a parser context.
-/
def ParserContext.setEndPos (c : ParserContext)
(endPos : String.Pos) (endPos_valid : endPos ≤ c.inputString.endPos) :
ParserContext :=
{ c with endPos, endPos_valid }
structure Error where
/--
If not `missing`, used for lazily calculating `unexpected` message and range in `mkErrorMessage`.
@ -237,11 +359,11 @@ def popSyntax (s : ParserState) : ParserState :=
def shrinkStack (s : ParserState) (iniStackSz : Nat) : ParserState :=
{ s with stxStack := s.stxStack.shrink iniStackSz }
def next (s : ParserState) (input : String) (pos : String.Pos) : ParserState :=
{ s with pos := input.next pos }
def next (s : ParserState) (c : ParserContext) (pos : String.Pos) : ParserState :=
{ s with pos := c.next pos }
def next' (s : ParserState) (input : String) (pos : String.Pos) (h : ¬ input.atEnd pos) : ParserState :=
{ s with pos := input.next' pos h }
def next' (s : ParserState) (c : ParserContext) (pos : String.Pos) (h : ¬ c.atEnd pos) : ParserState :=
{ s with pos := c.next' pos h }
def mkNode (s : ParserState) (k : SyntaxNodeKind) (iniStackSz : Nat) : ParserState :=
match s with

View file

@ -352,14 +352,12 @@ def trailingNode.formatter (k : SyntaxNodeKind) (_ _ : Nat) (p : Formatter) : Fo
def parseToken (s : String) : FormatterM ParserState :=
-- include comment tokens, e.g. when formatting `- -0`
return (Parser.andthenFn Parser.whitespace (Parser.tokenFn [])).run {
input := s,
fileName := "",
fileMap := FileMap.ofString ""
} {
env := ← getEnv,
options := ← getOptions
} ((← read).table) (Parser.mkParserState s)
let ictx := .mk s "" (fileMap := FileMap.ofString "")
return (Parser.andthenFn Parser.whitespace (Parser.tokenFn [])).run ictx
{
env := ← getEnv,
options := ← getOptions
} ((← read).table) (Parser.mkParserState s)
def pushToken (info : SourceInfo) (tk : String) (ident : Bool) : FormatterM Unit := do
if let SourceInfo.original _ _ ss _ := info then

View file

@ -97,10 +97,11 @@ structure DocumentMeta where
deriving Inhabited
/-- Extracts an `InputContext` from `doc`. -/
def DocumentMeta.mkInputContext (doc : DocumentMeta) : Parser.InputContext where
input := doc.text.source
fileName := (System.Uri.fileUriToPath? doc.uri).getD doc.uri |>.toString
fileMap := doc.text
def DocumentMeta.mkInputContext (doc : DocumentMeta) : Parser.InputContext :=
.mk
(input := doc.text.source)
(fileName := (System.Uri.fileUriToPath? doc.uri).getD doc.uri |>.toString)
(fileMap := doc.text)
/--
Replaces the range `r` (using LSP UTF-16 positions) in `text` (using UTF-8 positions)

View file

@ -39,28 +39,26 @@ public def wsFn : ParserFn :=
/-- Consumes the LF following a CR in a CRLF newline. -/
def crlfAuxFn : ParserFn := fun c s =>
let input := c.input
let errMsg := "invalid newline; no LF after CR"
if h : input.atEnd s.pos then
if h : c.atEnd s.pos then
s.mkUnexpectedError errMsg
else
let curr := input.get' s.pos h
let curr := c.get' s.pos h
if curr == '\n' then
s.next' input s.pos h
s.next' c s.pos h
else
s.mkUnexpectedError errMsg
/-- Consume a newline. -/
public def newlineFn : ParserFn := fun c s =>
let input := c.input
if h : input.atEnd s.pos then
if h : c.atEnd s.pos then
s.mkEOIError ["newline"]
else
let curr := input.get' s.pos h
let curr := c.get' s.pos h
if curr == '\n' then
s.next' input s.pos h
s.next' c s.pos h
else if curr == '\r' then
crlfAuxFn c (s.next' input s.pos h)
crlfAuxFn c (s.next' c s.pos h)
else
mkUnexpectedCharError s curr ["newline"]
@ -76,31 +74,29 @@ public def commentFn : ParserFn :=
/-- Consume optional whitespace (space, tab, or newline). -/
public partial def wsNewlineFn : ParserFn := fun c s =>
let input := c.input
if h : input.atEnd s.pos then
if h : c.atEnd s.pos then
s
else
let curr := input.get' s.pos h
let curr := c.get' s.pos h
if curr = ' ' || curr = '\t' || curr == '\n' then
wsNewlineFn c (s.next' input s.pos h)
wsNewlineFn c (s.next' c s.pos h)
else if curr == '\r' then
(crlfAuxFn >> wsNewlineFn) c (s.next' input s.pos h)
(crlfAuxFn >> wsNewlineFn) c (s.next' c s.pos h)
else
s
/-- Consume optional sequence of whitespace / newline(s) / comment (s). -/
public partial def trailingFn : ParserFn := fun c s =>
let input := c.input
if h : input.atEnd s.pos then
if h : c.atEnd s.pos then
s
else
let curr := input.get' s.pos h
let curr := c.get' s.pos h
if curr = ' ' || curr = '\t' || curr == '\n' then
trailingFn c (s.next' input s.pos h)
trailingFn c (s.next' c s.pos h)
else if curr == '\r' then
(crlfAuxFn >> trailingFn) c (s.next' input s.pos h)
(crlfAuxFn >> trailingFn) c (s.next' c s.pos h)
else if curr == '#' then
(commentBodyFn >> trailingFn) c (s.next' input s.pos h)
(commentBodyFn >> trailingFn) c (s.next' c s.pos h)
else
s
@ -114,23 +110,22 @@ public def isEscapeChar (c : Char) : Bool :=
/-- Consumes a TOML string escape sequence after a `\`. -/
def escapeSeqFn (stringGap : Bool) : ParserFn := fun c s =>
let input := c.input
let expected := ["escape sequence"]
if h : input.atEnd s.pos then
if h : c.atEnd s.pos then
s.mkEOIError expected
else
let curr := input.get' s.pos h
let curr := c.get' s.pos h
let ifStringGap (p : ParserFn) :=
if stringGap then
p c (s.next' input s.pos h)
p c (s.next' c s.pos h)
else
s.mkUnexpectedError "string gap is forbidden here" expected
if isEscapeChar curr then
s.next' input s.pos h
s.next' c s.pos h
else if curr == 'u' then
repeatFn 4 hexDigitFn c (s.next' input s.pos h)
repeatFn 4 hexDigitFn c (s.next' c s.pos h)
else if curr == 'U' then
repeatFn 8 hexDigitFn c (s.next' input s.pos h)
repeatFn 8 hexDigitFn c (s.next' c s.pos h)
else if curr == ' ' || curr == '\t' then
ifStringGap (wsFn >> newlineFn >> wsNewlineFn)
else if curr == '\n' then
@ -141,50 +136,47 @@ def escapeSeqFn (stringGap : Bool) : ParserFn := fun c s =>
s.mkUnexpectedError "invalid escape sequence"
partial def basicStringAuxFn (startPos : String.Pos) : ParserFn := fun c s =>
let input := c.input
if h : input.atEnd s.pos then
if h : c.atEnd s.pos then
s.mkUnexpectedErrorAt "unterminated basic string" startPos
else
let curr := input.get' s.pos h
let curr := c.get' s.pos h
if curr == '\"' then
s.next' input s.pos h
s.next' c s.pos h
else if curr == '\\' then
(escapeSeqFn false >> basicStringAuxFn startPos) c (s.next' input s.pos h)
(escapeSeqFn false >> basicStringAuxFn startPos) c (s.next' c s.pos h)
else if isControlChar curr then
mkUnexpectedCharError s curr
else
basicStringAuxFn startPos c (s.next' input s.pos h)
basicStringAuxFn startPos c (s.next' c s.pos h)
public def basicStringFn : ParserFn := usePosFn fun startPos =>
chFn '\"' ["basic string"] >> basicStringAuxFn startPos
partial def literalStringAuxFn (startPos : String.Pos) : ParserFn := fun c s =>
let input := c.input
if h : input.atEnd s.pos then
if h : c.atEnd s.pos then
s.mkUnexpectedErrorAt "unterminated literal string" startPos
else
let curr := input.get' s.pos h
let curr := c.get' s.pos h
if curr == '\'' then
s.next' input s.pos h
s.next' c s.pos h
else if isControlChar curr then
mkUnexpectedCharError s curr
else
literalStringAuxFn startPos c (s.next' input s.pos h)
literalStringAuxFn startPos c (s.next' c s.pos h)
public def literalStringFn : ParserFn := usePosFn fun startPos =>
chFn '\'' ["literal string"] >> literalStringAuxFn startPos
partial def mlLiteralStringAuxFn (startPos : String.Pos) (quoteDepth : Nat) : ParserFn := fun c s =>
let input := c.input
if h : input.atEnd s.pos then
if h : c.atEnd s.pos then
if quoteDepth ≥ 3 then
s
else
s.mkUnexpectedErrorAt "unterminated multi-line literal string" startPos
else
let curr := input.get' s.pos h
let curr := c.get' s.pos h
if curr == '\'' then
let s := s.next' input s.pos h
let s := s.next' c s.pos h
if quoteDepth ≥ 5 then
s.mkUnexpectedError "too many quotes"
else
@ -192,29 +184,28 @@ partial def mlLiteralStringAuxFn (startPos : String.Pos) (quoteDepth : Nat) : P
else if quoteDepth ≥ 3 then
s
else if curr == '\n' then
mlLiteralStringAuxFn startPos 0 c (s.next' input s.pos h)
mlLiteralStringAuxFn startPos 0 c (s.next' c s.pos h)
else if curr == '\r' then
(crlfAuxFn >> mlLiteralStringAuxFn startPos 0) c (s.next' input s.pos h)
(crlfAuxFn >> mlLiteralStringAuxFn startPos 0) c (s.next' c s.pos h)
else if isControlChar curr then
mkUnexpectedCharError s curr
else
mlLiteralStringAuxFn startPos 0 c (s.next' input s.pos h)
mlLiteralStringAuxFn startPos 0 c (s.next' c s.pos h)
public def mlLiteralStringFn : ParserFn := usePosFn fun startPos =>
atomicFn (repeatFn 3 (chFn '\'' ["multi-line literal string"])) >>
mlLiteralStringAuxFn startPos 0
partial def mlBasicStringAuxFn (startPos : String.Pos) (quoteDepth : Nat) : ParserFn := fun c s =>
let input := c.input
if h : input.atEnd s.pos then
if h : c.atEnd s.pos then
if quoteDepth ≥ 3 then
s
else
s.mkUnexpectedErrorAt "unterminated multi-line basic string" startPos
else
let curr := input.get' s.pos h
let curr := c.get' s.pos h
if curr == '\"' then
let s := s.next' input s.pos h
let s := s.next' c s.pos h
if quoteDepth ≥ 5 then
s.mkUnexpectedError "too many quotes"
else
@ -222,15 +213,15 @@ partial def mlBasicStringAuxFn (startPos : String.Pos) (quoteDepth : Nat) : Pars
else if quoteDepth ≥ 3 then
s
else if curr == '\n' then
mlBasicStringAuxFn startPos 0 c (s.next' input s.pos h)
mlBasicStringAuxFn startPos 0 c (s.next' c s.pos h)
else if curr == '\r' then
(crlfAuxFn >> mlBasicStringAuxFn startPos 0) c (s.next' input s.pos h)
(crlfAuxFn >> mlBasicStringAuxFn startPos 0) c (s.next' c s.pos h)
else if curr == '\\' then
(escapeSeqFn true >> mlBasicStringAuxFn startPos 0) c (s.next' input s.pos h)
(escapeSeqFn true >> mlBasicStringAuxFn startPos 0) c (s.next' c s.pos h)
else if isControlChar curr then
mkUnexpectedCharError s curr
else
mlBasicStringAuxFn startPos 0 c (s.next' input s.pos h)
mlBasicStringAuxFn startPos 0 c (s.next' c s.pos h)
public def mlBasicStringFn : ParserFn := usePosFn fun startPos =>
atomicFn (repeatFn 3 (chFn '\"' ["multi-line basic string"])) >>
@ -244,19 +235,18 @@ def hourMinFn : ParserFn :=
digitPairFn ["hour digit"] >> chFn ':' >> digitPairFn ["minute digit"]
def timeTailFn (allowOffset : Bool) : ParserFn := fun c s =>
let input := c.input
if h : input.atEnd s.pos then
if h : c.atEnd s.pos then
s
else
let curr := input.get' s.pos h
let curr := c.get' s.pos h
if curr = '.' then
let s := s.next' input s.pos h
let s := s.next' c s.pos h
let s := takeWhile1Fn (·.isDigit) ["millisecond"] c s
if s.hasError then s else
if h : input.atEnd s.pos then s else
timeOffsetFn (input.get' s.pos h) (input.next' s.pos h) c s
if h : c.atEnd s.pos then s else
timeOffsetFn (c.get' s.pos h) (c.next' s.pos h) c s
else
timeOffsetFn curr (input.next' s.pos h) c s
timeOffsetFn curr (c.next' s.pos h) c s
where
@[inline] timeOffsetFn curr nextPos c s :=
if curr == 'Z' || curr == 'z' then
@ -277,15 +267,14 @@ public def timeFn (allowOffset := false) : ParserFn :=
def optTimeFn : ParserFn := fun c s =>
let i := s.pos
let input := c.input
if h : input.atEnd i then
if h : c.atEnd i then
s
else
let curr := input.get' i h
let curr := c.get' i h
if curr = 'T' || curr = 't' then
timeFn true c (s.next' input i h)
timeFn true c (s.next' c i h)
else if curr = ' ' then
let tPos := input.next' i h
let tPos := c.next' i h
let s := timeFn true c (s.setPos tPos)
if s.hasError && s.pos == tPos then s.restore (s.stackSize-1) i else s
else
@ -299,29 +288,27 @@ public def dateTimeFn : ParserFn :=
repeatFn 4 (digitFn ["year digit"]) >> chFn '-' >> dateTimeAuxFn
def decExpFn : ParserFn := fun c s =>
let input := c.input
let expected := ["decimal exponent"]
if h : input.atEnd s.pos then
if h : c.atEnd s.pos then
s.mkEOIError expected
else
let curr := input.get' s.pos h
let curr := c.get' s.pos h
if curr = '-' || curr == '+' then
let s := s.next' input s.pos h
let s := s.next' c s.pos h
sepByChar1Fn (·.isDigit) '_' expected c s
else if curr.isDigit then
let s := s.next' input s.pos h
let s := s.next' c s.pos h
sepByChar1AuxFn (·.isDigit) '_' expected c s
else
mkUnexpectedCharError s curr expected
def optDecExpFn : ParserFn := fun c s =>
let input := c.input
if h : input.atEnd s.pos then
if h : c.atEnd s.pos then
s
else
let curr := input.get' s.pos h
let curr := c.get' s.pos h
if curr == 'e' || curr == 'E' then
decExpFn c (s.next' input s.pos h)
decExpFn c (s.next' c s.pos h)
else
s
@ -342,11 +329,10 @@ def decNumberTailAuxFn (startPos : String.Pos) (curr : Char) (nextPos : String.P
pushLit `Lake.Toml.decInt startPos skipFn c s
def decNumberTailFn (startPos : String.Pos) : ParserFn := fun c s =>
let input := c.input
if h : input.atEnd s.pos then
if h : c.atEnd s.pos then
pushLit `Lake.Toml.decInt startPos skipFn c s
else
decNumberTailAuxFn startPos (input.get' s.pos h) (input.next' s.pos h) c s
decNumberTailAuxFn startPos (c.get' s.pos h) (c.next' s.pos h) c s
mutual
@ -358,27 +344,25 @@ partial def decNumberSepFn (startPos : String.Pos) (curr : Char) (nextPos : Stri
decNumberTailAuxFn startPos curr nextPos c s
partial def decNumberAuxFn (startPos : String.Pos) : ParserFn := fun c s =>
let input := c.input
if h : input.atEnd s.pos then
if h : c.atEnd s.pos then
pushLit `Lake.Toml.decInt startPos skipFn c s
else
let curr := input.get' s.pos h
let curr := c.get' s.pos h
if curr.isDigit then
let s := s.next' input s.pos h
let s := s.next' c s.pos h
decNumberAuxFn startPos c s
else
decNumberSepFn startPos curr (input.next' s.pos h) c s
decNumberSepFn startPos curr (c.next' s.pos h) c s
partial def decNumberFn (startPos : String.Pos) : ParserFn := fun c s =>
let i := s.pos
let input := c.input
let expected := ["decimal integer", "float"]
if h : input.atEnd i then
if h : c.atEnd i then
s.mkEOIError expected
else
let curr := input.get' i h
let curr := c.get' i h
if curr.isDigit then
let s := s.next' input i h
let s := s.next' c i h
decNumberAuxFn startPos c s
else
mkUnexpectedCharError s curr expected
@ -392,37 +376,35 @@ def nanAuxFn (startPos : String.Pos) : ParserFn :=
strFn "an" ["'nan'"] >> pushLit `Lake.Toml.float startPos
def decimalFn (startPos : String.Pos) : ParserFn := fun c s =>
let input := c.input
let expected := ["decimal integer", "float"]
if h : input.atEnd s.pos then
if h : c.atEnd s.pos then
s.mkEOIError expected
else
let curr := input.get' s.pos h
let curr := c.get' s.pos h
if curr == '0' then
decNumberTailFn startPos c (s.next' input s.pos h)
decNumberTailFn startPos c (s.next' c s.pos h)
else if curr.isDigit then
decNumberAuxFn startPos c (s.next' input s.pos h)
decNumberAuxFn startPos c (s.next' c s.pos h)
else if curr == 'i' then
infAuxFn startPos c (s.next' input s.pos h)
infAuxFn startPos c (s.next' c s.pos h)
else if curr == 'n' then
nanAuxFn startPos c (s.next' input s.pos h)
nanAuxFn startPos c (s.next' c s.pos h)
else
mkUnexpectedCharError s curr expected
def decNumeralAuxFn (startPos : String.Pos) : ParserFn := fun c s =>
let input := c.input
if h : input.atEnd s.pos then
if h : c.atEnd s.pos then
s.mkEOIError ["decimal integer", "float", "date-time"]
else -- `NN`
let curr := input.get' s.pos h
let nextPos := input.next' s.pos h
let curr := c.get' s.pos h
let nextPos := c.next' s.pos h
if curr.isDigit then
let s := s.setPos nextPos
if h : input.atEnd s.pos then
if h : c.atEnd s.pos then
pushLit `Lake.Toml.decInt startPos skipFn c s
else
let curr := input.get' s.pos h
let nextPos := input.next' s.pos h
let curr := c.get' s.pos h
let nextPos := c.next' s.pos h
if curr == ':' then -- `HH:`
let s := s.setPos nextPos
let s := timeAuxFn false c s
@ -430,18 +412,18 @@ def decNumeralAuxFn (startPos : String.Pos) : ParserFn := fun c s =>
pushLit `Lake.Toml.dateTime startPos skipFn c s
else if curr.isDigit then -- `NNN`
let s := s.setPos nextPos
if h : input.atEnd s.pos then
if h : c.atEnd s.pos then
pushLit `Lake.Toml.decInt startPos skipFn c s
else
let curr := input.get' s.pos h
let nextPos := input.next' s.pos h
let curr := c.get' s.pos h
let nextPos := c.next' s.pos h
if curr.isDigit then -- `NNNN`
let s := s.setPos nextPos
if h : input.atEnd nextPos then
if h : c.atEnd nextPos then
pushLit `Lake.Toml.decInt startPos skipFn c s
else
let curr := input.get' s.pos h
let nextPos := input.next' s.pos h
let curr := c.get' s.pos h
let nextPos := c.next' s.pos h
if curr == '-' then -- `YYYY-`
let s := s.setPos nextPos
let s := dateTimeAuxFn c s
@ -460,49 +442,48 @@ def decNumeralAuxFn (startPos : String.Pos) : ParserFn := fun c s =>
decNumberSepFn startPos curr nextPos c s
public def numeralFn : ParserFn := atomicFn fun c s =>
let input := c.input
let startPos := s.pos
let expected := ["integer", "float", "date-time"]
if h : input.atEnd s.pos then
if h : c.atEnd s.pos then
s.mkEOIError expected
else
let curr := input.get' s.pos h
let curr := c.get' s.pos h
if curr == '0' then
let s := s.next' input startPos h
if h : input.atEnd s.pos then
let s := s.next' c startPos h
if h : c.atEnd s.pos then
pushLit `Lake.Toml.decInt startPos skipFn c s
else
let curr := input.get' s.pos h
let curr := c.get' s.pos h
if curr == 'b' then
let s := s.next' input s.pos h
let s := s.next' c s.pos h
let s := sepByChar1Fn isBinDigit '_' ["binary integer"] c s
if s.hasError then s else
pushLit `Lake.Toml.binNum startPos skipFn c s
else if curr == 'o' then
let s := s.next' input s.pos h
let s := s.next' c s.pos h
let s := sepByChar1Fn isOctDigit '_' ["octal integer"] c s
if s.hasError then s else
pushLit `Lake.Toml.octNum startPos skipFn c s
else if curr == 'x' then
let s := s.next' input s.pos h
let s := s.next' c s.pos h
let s := sepByChar1Fn isHexDigit '_' ["hexadecimal integer"] c s
if s.hasError then s else
pushLit `Lake.Toml.hexNum startPos skipFn c s
else if curr.isDigit then
let s := s.next' input s.pos h
let s := s.next' c s.pos h
let s := (chFn ':' >> timeAuxFn false) c s
if s.hasError then s else
pushLit `Lake.Toml.dateTime startPos skipFn c s
else
decNumberTailAuxFn startPos curr (input.next' s.pos h) c s
decNumberTailAuxFn startPos curr (c.next' s.pos h) c s
else if curr.isDigit then
decNumeralAuxFn startPos c (s.next' input s.pos h)
decNumeralAuxFn startPos c (s.next' c s.pos h)
else if curr == '+' || curr == '-' then
decimalFn startPos c (s.next' input s.pos h)
decimalFn startPos c (s.next' c s.pos h)
else if curr == 'i' then
infAuxFn startPos c (s.next' input s.pos h)
infAuxFn startPos c (s.next' c s.pos h)
else if curr == 'n' then
nanAuxFn startPos c (s.next' input s.pos h)
nanAuxFn startPos c (s.next' c s.pos h)
else
s.mkUnexpectedError s!"unexpected '{curr}'" expected

View file

@ -24,10 +24,10 @@ public def loadToml (ictx : InputContext) : EIO MessageLog Table := do
| .ok env => pure env
| .error e => throw <| MessageLog.empty.add <| mkMessageNoPos ictx <|
m!"failed to initialize TOML environment: {e}"
let s := toml.fn.run ictx { env, options := {} } {} (mkParserState ictx.input)
let s := toml.fn.run ictx { env, options := {} } {} (mkParserState ictx.inputString)
if let some errorMsg := s.errorMsg then
throw <| MessageLog.empty.add <| mkParserErrorMessage ictx s errorMsg
else if ictx.input.atEnd s.pos then
else if ictx.atEnd s.pos then
let act := elabToml ⟨s.stxStack.back⟩
match (← act.run {fileName := ictx.fileName, fileMap := ictx.fileMap} {env} |>.toBaseIO) with
| .ok (t, s) =>

View file

@ -59,12 +59,12 @@ public def mkUnexpectedCharError (s : ParserState) (c : Char) (expected : List
@[inline] public def satisfyFn (p : Char → Bool) (expected : List String := []) : ParserFn := fun c s =>
let i := s.pos
if h : c.input.atEnd i then
if h : c.atEnd i then
s.mkEOIError expected
else
let curr := c.input.get' i h
let curr := c.get' i h
if p curr then
s.next' c.input i h
s.next' c i h
else
mkUnexpectedCharError s curr expected
@ -98,26 +98,24 @@ mutual
public partial def sepByChar1AuxFn (p : Char → Bool) (sep : Char) (expected : List String := []) : ParserFn := fun c s =>
let i := s.pos
let input := c.input
if h : input.atEnd i then
if h : c.atEnd i then
s
else
let curr := input.get' i h
let curr := c.get' i h
if p curr then
sepByChar1AuxFn p sep expected c (s.next' input i h)
sepByChar1AuxFn p sep expected c (s.next' c i h)
else if curr == sep then
sepByChar1Fn p sep expected c (s.next' input i h)
sepByChar1Fn p sep expected c (s.next' c i h)
else
s
public partial def sepByChar1Fn (p : Char → Bool) (sep : Char) (expected : List String := []) : ParserFn := fun c s =>
let i := s.pos
let input := c.input
if h : input.atEnd i then
if h : c.atEnd i then
s
else
let curr := input.get' i h
let s := s.next' input i h
let curr := c.get' i h
let s := s.next' c i h
if p curr then
sepByChar1AuxFn p sep expected c s
else if curr == sep then
@ -129,13 +127,12 @@ end
/-- Push a new atom onto the syntax stack. -/
public def pushAtom (startPos : String.Pos) (trailingFn := skipFn) : ParserFn := fun c s =>
let input := c.input
let stopPos := s.pos
let leading := mkEmptySubstringAt input startPos
let val := input.extract startPos stopPos
let leading := c.mkEmptySubstringAt startPos
let val := c.extract startPos stopPos
let s := trailingFn c s
let stopPos' := s.pos
let trailing := { str := input, startPos := stopPos, stopPos := stopPos' : Substring }
let trailing := c.substring (startPos := stopPos) (stopPos := stopPos')
let atom := mkAtom (SourceInfo.original leading startPos trailing (startPos + val)) val
s.pushSyntax atom
@ -188,13 +185,12 @@ public def strAtom.parenthesizer (_ : String) (_ : List String) (_ : ParserFn)
/-- Push `(Syntax.node kind <new-atom>)` onto the syntax stack. -/
public def pushLit (kind : SyntaxNodeKind) (startPos : String.Pos) (trailingFn := skipFn) : ParserFn := fun c s =>
let input := c.input
let stopPos := s.pos
let leading := mkEmptySubstringAt input startPos
let val := input.extract startPos stopPos
let leading := c.mkEmptySubstringAt startPos
let val := c.extract startPos stopPos
let s := trailingFn c s
let wsStopPos := s.pos
let trailing := { str := input, startPos := stopPos, stopPos := wsStopPos : Substring }
let trailing := c.substring (startPos := stopPos) (stopPos := wsStopPos)
let info := SourceInfo.original leading startPos trailing stopPos
s.pushSyntax (Syntax.mkLit kind val info)

View file

@ -0,0 +1,59 @@
import Lean.Elab.Command
open Lean Elab Command Parser
declare_syntax_cat balanced
syntax "!" : balanced
syntax "(" balanced* ")" : balanced
elab d:docComment "test" : command => do
let some ⟨pos, endPos⟩ := d.raw[1].getRange? (canonicalOnly := true)
| throwErrorAt d "Docstring doesn't have a canonical position"
let p := Parser.categoryParser `balanced 0
let p := andthenFn whitespace p.fn
let text ← getFileMap
let input := text.source
let endPos := input.prev <| input.prev endPos
if h : endPos ≤ input.endPos then
let ictx := mkInputContext input (← getFileName) (endPos := endPos) (endPos_valid := h)
let env ← getEnv
let s := { mkParserState input with pos }
let s := p.run ictx { env, options := {} } (getTokenTable env) s
if !s.allErrors.isEmpty then
for (pos, _, err) in s.allErrors do
logMessage {
fileName := (← getFileName )
pos := text.toPosition pos
endPos := some (text.toPosition endPos)
data := s!"{err.toString}"
}
else if ictx.atEnd s.pos then
logInfo s.stxStack.back
else
let pos := s.pos
logMessage {
fileName := (← getFileName )
pos := text.toPosition pos
endPos := some (text.toPosition endPos)
data := s!"expected end of input"
}
else
throwError "Out of bounds"
/-- info: (!!(!)) -/
#guard_msgs in
/--
( ! ! (!) )
-/
test
/-- error: unexpected end of input; expected ')' -/
#guard_msgs in
/--
(
-/
test