feat: add a stop position field to the parser (#10043)
This PR allows Lean's parser to run with a final position prior to the end of the string, so it can be invoked on a sub-region of the input. This has applications in Verso proper, which parses Lean syntax in contexts such as code blocks and docstrings, and it is a prerequisite to parsing the contents of Lean docstrings.
This commit is contained in:
parent
0d9b7fb6b8
commit
c9727c2d19
13 changed files with 521 additions and 372 deletions
|
|
@ -166,14 +166,16 @@ partial def findPrefix (t : Trie α) (pre : String) : Array α := go t 0
|
|||
|
||||
/-- Find the longest _key_ in the trie that is contained in the given string `s` at position `i`,
|
||||
and return the associated value. -/
|
||||
partial def matchPrefix (s : String) (t : Trie α) (i : String.Pos) : Option α :=
|
||||
partial def matchPrefix (s : String) (t : Trie α) (i : String.Pos)
|
||||
(endByte := s.utf8ByteSize)
|
||||
(endByte_valid : endByte ≤ s.utf8ByteSize := by simp) : Option α :=
|
||||
let rec loop
|
||||
| leaf v, _, res =>
|
||||
if v.isSome then v else res
|
||||
| node1 v c' t', i, res =>
|
||||
let res := if v.isSome then v else res
|
||||
if h : i < s.utf8ByteSize then
|
||||
let c := s.getUtf8Byte i h
|
||||
if h : i < endByte then
|
||||
let c := s.getUtf8Byte i (by omega)
|
||||
if c == c'
|
||||
then loop t' (i + 1) res
|
||||
else res
|
||||
|
|
@ -181,8 +183,8 @@ partial def matchPrefix (s : String) (t : Trie α) (i : String.Pos) : Option α
|
|||
res
|
||||
| node v cs ts, i, res =>
|
||||
let res := if v.isSome then v else res
|
||||
if h : i < s.utf8ByteSize then
|
||||
let c := s.getUtf8Byte i h
|
||||
if h : i < endByte then
|
||||
let c := s.getUtf8Byte i (by omega)
|
||||
match cs.findIdx? (· == c) with
|
||||
| none => res
|
||||
| some idx => loop ts[idx]! (i + 1) res
|
||||
|
|
|
|||
|
|
@ -260,7 +260,7 @@ ruled out.
|
|||
def LeanProcessingM.run (act : LeanProcessingM α) (oldInputCtx? : Option InputContext) :
|
||||
ProcessingM α := do
|
||||
-- compute position of syntactic change once
|
||||
let firstDiffPos? := oldInputCtx?.map (·.input.firstDiffPos (← read).input)
|
||||
let firstDiffPos? := oldInputCtx?.map (·.inputString.firstDiffPos (← read).inputString)
|
||||
ReaderT.adapt ({ · with firstDiffPos? }) act
|
||||
|
||||
/--
|
||||
|
|
@ -392,7 +392,7 @@ where
|
|||
-- they are passed separately from `old`
|
||||
if let some oldSuccess := old.result? then
|
||||
-- make sure to update ranges of all reused tasks
|
||||
let progressRange? := .some ⟨newParserState.pos, ctx.input.endPos⟩
|
||||
let progressRange? := .some ⟨newParserState.pos, ctx.endPos⟩
|
||||
return {
|
||||
ictx
|
||||
stx := newStx
|
||||
|
|
@ -478,7 +478,7 @@ where
|
|||
processHeader (stx : HeaderSyntax) (parserState : Parser.ModuleParserState) :
|
||||
LeanProcessingM (SnapshotTask HeaderProcessedSnapshot) := do
|
||||
let ctx ← read
|
||||
SnapshotTask.ofIO none none (.some ⟨0, ctx.input.endPos⟩) <|
|
||||
SnapshotTask.ofIO none none (.some ⟨0, ctx.endPos⟩) <|
|
||||
ReaderT.run (r := ctx) <| -- re-enter reader in new task
|
||||
withHeaderExceptions (α := HeaderProcessedSnapshot) ({ · with result? := none, metaSnap := default }) do
|
||||
let setup ← match (← setupImports stx) with
|
||||
|
|
@ -569,7 +569,7 @@ where
|
|||
parseCmd oldNext newParserState oldResult.cmdState newProm sync cancelTk ctx
|
||||
prom.resolve <| { old with nextCmdSnap? := some {
|
||||
stx? := none
|
||||
reportingRange := .some ⟨newParserState.pos, ctx.input.endPos⟩
|
||||
reportingRange := .some ⟨newParserState.pos, ctx.endPos⟩
|
||||
task := newProm.result!
|
||||
cancelTk? := cancelTk
|
||||
} }
|
||||
|
|
@ -651,7 +651,7 @@ where
|
|||
else some <$> IO.Promise.new
|
||||
let nextCmdSnap? := next?.map ({
|
||||
stx? := none
|
||||
reportingRange := .some ⟨parserState.pos, ctx.input.endPos⟩
|
||||
reportingRange := .some ⟨parserState.pos, ctx.endPos⟩
|
||||
cancelTk? := parseCancelTk
|
||||
task := ·.result!
|
||||
})
|
||||
|
|
|
|||
|
|
@ -143,7 +143,7 @@ def errorAtSavedPosFn (msg : String) (delta : Bool) : ParserFn := fun c s =>
|
|||
match c.savedPos? with
|
||||
| none => s
|
||||
| some pos =>
|
||||
let pos := if delta then c.input.next pos else pos
|
||||
let pos := if delta then c.next pos else pos
|
||||
s.mkUnexpectedErrorAt msg pos
|
||||
|
||||
/-- Generate an error at the position saved with the `withPosition` combinator.
|
||||
|
|
@ -510,15 +510,15 @@ def many1Unbox (p : Parser) : Parser :=
|
|||
|
||||
partial def satisfyFn (p : Char → Bool) (errorMsg : String := "unexpected character") : ParserFn := fun c s =>
|
||||
let i := s.pos
|
||||
if h : c.input.atEnd i then s.mkEOIError
|
||||
else if p (c.input.get' i h) then s.next' c.input i h
|
||||
if h : c.atEnd i then s.mkEOIError
|
||||
else if p (c.get' i h) then s.next' c i h
|
||||
else s.mkUnexpectedError errorMsg
|
||||
|
||||
partial def takeUntilFn (p : Char → Bool) : ParserFn := fun c s =>
|
||||
let i := s.pos
|
||||
if h : c.input.atEnd i then s
|
||||
else if p (c.input.get' i h) then s
|
||||
else takeUntilFn p c (s.next' c.input i h)
|
||||
if h : c.atEnd i then s
|
||||
else if p (c.get' i h) then s
|
||||
else takeUntilFn p c (s.next' c i h)
|
||||
|
||||
def takeWhileFn (p : Char → Bool) : ParserFn :=
|
||||
takeUntilFn (fun c => !p c)
|
||||
|
|
@ -528,26 +528,25 @@ def takeWhile1Fn (p : Char → Bool) (errorMsg : String) : ParserFn :=
|
|||
|
||||
variable (pushMissingOnError : Bool) in
|
||||
partial def finishCommentBlock (nesting : Nat) : ParserFn := fun c s =>
|
||||
let input := c.input
|
||||
let i := s.pos
|
||||
if h : input.atEnd i then eoi s
|
||||
let i := s.pos
|
||||
if h : c.atEnd i then eoi s
|
||||
else
|
||||
let curr := input.get' i h
|
||||
let i := input.next' i h
|
||||
let curr := c.get' i h
|
||||
let i := c.next' i h
|
||||
if curr == '-' then
|
||||
if h : input.atEnd i then eoi s
|
||||
if h : c.atEnd i then eoi s
|
||||
else
|
||||
let curr := input.get' i h
|
||||
let curr := c.get' i h
|
||||
if curr == '/' then -- "-/" end of comment
|
||||
if nesting == 1 then s.next' input i h
|
||||
else finishCommentBlock (nesting-1) c (s.next' input i h)
|
||||
if nesting == 1 then s.next' c i h
|
||||
else finishCommentBlock (nesting-1) c (s.next' c i h)
|
||||
else
|
||||
finishCommentBlock nesting c (s.setPos i)
|
||||
else if curr == '/' then
|
||||
if h : input.atEnd i then eoi s
|
||||
if h : c.atEnd i then eoi s
|
||||
else
|
||||
let curr := input.get' i h
|
||||
if curr == '-' then finishCommentBlock (nesting+1) c (s.next' input i h)
|
||||
let curr := c.get' i h
|
||||
if curr == '-' then finishCommentBlock (nesting+1) c (s.next' c i h)
|
||||
else finishCommentBlock nesting c (s.setPos i)
|
||||
else finishCommentBlock nesting c (s.setPos i)
|
||||
where
|
||||
|
|
@ -555,49 +554,46 @@ where
|
|||
|
||||
/-- Consume whitespace and comments -/
|
||||
partial def whitespace : ParserFn := fun c s =>
|
||||
let input := c.input
|
||||
let i := s.pos
|
||||
if h : input.atEnd i then s
|
||||
let i := s.pos
|
||||
if h : c.atEnd i then s
|
||||
else
|
||||
let curr := input.get' i h
|
||||
let curr := c.get' i h
|
||||
if curr == '\t' then
|
||||
s.mkUnexpectedError (pushMissing := false) "tabs are not allowed; please configure your editor to expand them"
|
||||
else if curr == '\r' then
|
||||
s.mkUnexpectedError (pushMissing := false) "isolated carriage returns are not allowed"
|
||||
else if curr.isWhitespace then whitespace c (s.next' input i h)
|
||||
else if curr.isWhitespace then whitespace c (s.next' c i h)
|
||||
else if curr == '-' then
|
||||
let i := input.next' i h
|
||||
let curr := input.get i
|
||||
if curr == '-' then andthenFn (takeUntilFn (fun c => c = '\n')) whitespace c (s.next input i)
|
||||
let i := c.next' i h
|
||||
let curr := c.get i
|
||||
if curr == '-' then andthenFn (takeUntilFn (fun c => c = '\n')) whitespace c (s.next c i)
|
||||
else s
|
||||
else if curr == '/' then
|
||||
let i := input.next' i h
|
||||
let curr := input.get i
|
||||
let i := c.next' i h
|
||||
let curr := c.get i
|
||||
if curr == '-' then
|
||||
let i := input.next i
|
||||
let curr := input.get i
|
||||
let i := c.next i
|
||||
let curr := c.get i
|
||||
if curr == '-' || curr == '!' then s -- "/--" and "/-!" doc comment are actual tokens
|
||||
else andthenFn (finishCommentBlock (pushMissingOnError := false) 1) whitespace c (s.next input i)
|
||||
else andthenFn (finishCommentBlock (pushMissingOnError := false) 1) whitespace c (s.next c i)
|
||||
else s
|
||||
else s
|
||||
|
||||
def mkEmptySubstringAt (s : String) (p : String.Pos) : Substring := {
|
||||
str := s, startPos := p, stopPos := p
|
||||
}
|
||||
def ParserContext.mkEmptySubstringAt (c : ParserContext) (p : String.Pos) : Substring :=
|
||||
c.substring p p
|
||||
|
||||
private def rawAux (startPos : String.Pos) (trailingWs : Bool) : ParserFn := fun c s =>
|
||||
let input := c.input
|
||||
let stopPos := s.pos
|
||||
let leading := mkEmptySubstringAt input startPos
|
||||
let val := input.extract startPos stopPos
|
||||
let leading := c.mkEmptySubstringAt startPos
|
||||
let val := c.extract startPos stopPos
|
||||
if trailingWs then
|
||||
let s := whitespace c s
|
||||
let stopPos' := s.pos
|
||||
let trailing := { str := input, startPos := stopPos, stopPos := stopPos' : Substring }
|
||||
let trailing := c.substring (startPos := stopPos) (stopPos := stopPos')
|
||||
let atom := mkAtom (SourceInfo.original leading startPos trailing (startPos + val)) val
|
||||
s.pushSyntax atom
|
||||
else
|
||||
let trailing := mkEmptySubstringAt input stopPos
|
||||
let trailing := c.mkEmptySubstringAt stopPos
|
||||
let atom := mkAtom (SourceInfo.original leading startPos trailing (startPos + val)) val
|
||||
s.pushSyntax atom
|
||||
|
||||
|
|
@ -615,12 +611,11 @@ def rawCh (c : Char) (trailingWs := false) : Parser := {
|
|||
}
|
||||
|
||||
def hexDigitFn : ParserFn := fun c s =>
|
||||
let input := c.input
|
||||
let i := s.pos
|
||||
if h : input.atEnd i then s.mkEOIError
|
||||
let i := s.pos
|
||||
if h : c.atEnd i then s.mkEOIError
|
||||
else
|
||||
let curr := input.get' i h
|
||||
let i := input.next' i h
|
||||
let curr := c.get' i h
|
||||
let i := c.next' i h
|
||||
if curr.isDigit || ('a' <= curr && curr <= 'f') || ('A' <= curr && curr <= 'F') then s.setPos i
|
||||
else s.mkUnexpectedError "invalid hexadecimal numeral"
|
||||
|
||||
|
|
@ -630,17 +625,17 @@ Raises an error if the whitespace does not contain exactly one newline character
|
|||
-/
|
||||
partial def stringGapFn (seenNewline : Bool) : ParserFn := fun c s =>
|
||||
let i := s.pos
|
||||
if h : c.input.atEnd i then s -- let strLitFnAux handle the EOI error if !seenNewline
|
||||
if h : c.atEnd i then s -- let strLitFnAux handle the EOI error if !seenNewline
|
||||
else
|
||||
let curr := c.input.get' i h
|
||||
let curr := c.get' i h
|
||||
if curr == '\n' then
|
||||
if seenNewline then
|
||||
-- Having more than one newline in a string gap is visually confusing
|
||||
s.mkUnexpectedError "unexpected additional newline in string gap"
|
||||
else
|
||||
stringGapFn true c (s.next' c.input i h)
|
||||
stringGapFn true c (s.next' c i h)
|
||||
else if curr.isWhitespace then
|
||||
stringGapFn seenNewline c (s.next' c.input i h)
|
||||
stringGapFn seenNewline c (s.next' c i h)
|
||||
else if seenNewline then
|
||||
s
|
||||
else
|
||||
|
|
@ -653,17 +648,16 @@ Parses a string quotation after a `\`.
|
|||
in particular `"\" newline whitespace*` gaps.
|
||||
-/
|
||||
def quotedCharCoreFn (isQuotable : Char → Bool) (inString : Bool) : ParserFn := fun c s =>
|
||||
let input := c.input
|
||||
let i := s.pos
|
||||
if h : input.atEnd i then s.mkEOIError
|
||||
let i := s.pos
|
||||
if h : c.atEnd i then s.mkEOIError
|
||||
else
|
||||
let curr := input.get' i h
|
||||
let curr := c.get' i h
|
||||
if isQuotable curr then
|
||||
s.next' input i h
|
||||
s.next' c i h
|
||||
else if curr == 'x' then
|
||||
andthenFn hexDigitFn hexDigitFn c (s.next' input i h)
|
||||
andthenFn hexDigitFn hexDigitFn c (s.next' c i h)
|
||||
else if curr == 'u' then
|
||||
andthenFn hexDigitFn (andthenFn hexDigitFn (andthenFn hexDigitFn hexDigitFn)) c (s.next' input i h)
|
||||
andthenFn hexDigitFn (andthenFn hexDigitFn (andthenFn hexDigitFn hexDigitFn)) c (s.next' c i h)
|
||||
else if inString && curr == '\n' then
|
||||
stringGapFn false c s
|
||||
else
|
||||
|
|
@ -686,39 +680,36 @@ def quotedStringFn : ParserFn :=
|
|||
def mkNodeToken (n : SyntaxNodeKind) (startPos : String.Pos) : ParserFn := fun c s => Id.run do
|
||||
if s.hasError then
|
||||
return s
|
||||
let input := c.input
|
||||
let stopPos := s.pos
|
||||
let leading := mkEmptySubstringAt input startPos
|
||||
let val := input.extract startPos stopPos
|
||||
let leading := c.mkEmptySubstringAt startPos
|
||||
let val := c.extract startPos stopPos
|
||||
let s := whitespace c s
|
||||
let wsStopPos := s.pos
|
||||
let trailing := { str := input, startPos := stopPos, stopPos := wsStopPos : Substring }
|
||||
let trailing := c.substring (startPos := stopPos) (stopPos := wsStopPos)
|
||||
let info := SourceInfo.original leading startPos trailing stopPos
|
||||
s.pushSyntax (Syntax.mkLit n val info)
|
||||
|
||||
def charLitFnAux (startPos : String.Pos) : ParserFn := fun c s =>
|
||||
let input := c.input
|
||||
let i := s.pos
|
||||
if h : input.atEnd i then s.mkEOIError
|
||||
let i := s.pos
|
||||
if h : c.atEnd i then s.mkEOIError
|
||||
else
|
||||
let curr := input.get' i h
|
||||
let s := s.setPos (input.next' i h)
|
||||
let curr := c.get' i h
|
||||
let s := s.setPos (c.next' i h)
|
||||
let s := if curr == '\\' then quotedCharFn c s else s
|
||||
if s.hasError then s
|
||||
else
|
||||
let i := s.pos
|
||||
let curr := input.get i
|
||||
let s := s.setPos (input.next i)
|
||||
let curr := c.get i
|
||||
let s := s.setPos (c.next i)
|
||||
if curr == '\'' then mkNodeToken charLitKind startPos c s
|
||||
else s.mkUnexpectedError "missing end of character literal"
|
||||
|
||||
partial def strLitFnAux (startPos : String.Pos) : ParserFn := fun c s =>
|
||||
let input := c.input
|
||||
let i := s.pos
|
||||
if h : input.atEnd i then s.mkUnexpectedErrorAt "unterminated string literal" startPos
|
||||
let i := s.pos
|
||||
if h : c.atEnd i then s.mkUnexpectedErrorAt "unterminated string literal" startPos
|
||||
else
|
||||
let curr := input.get' i h
|
||||
let s := s.setPos (input.next' i h)
|
||||
let curr := c.get' i h
|
||||
let s := s.setPos (c.next' i h)
|
||||
if curr == '\"' then
|
||||
mkNodeToken strLitKind startPos c s
|
||||
else if curr == '\\' then andthenFn quotedStringFn (strLitFnAux startPos) c s
|
||||
|
|
@ -730,12 +721,12 @@ If we are looking at a valid start to a raw string (`r##...#"`),
|
|||
returns true.
|
||||
We assume `i` begins at the position immediately after `r`.
|
||||
-/
|
||||
partial def isRawStrLitStart (input : String) (i : String.Pos) : Bool :=
|
||||
if h : input.atEnd i then false
|
||||
partial def isRawStrLitStart (c : ParserContext) (i : String.Pos) : Bool :=
|
||||
if h : c.atEnd i then false
|
||||
else
|
||||
let curr := input.get' i h
|
||||
let curr := c.get' i h
|
||||
if curr == '#' then
|
||||
isRawStrLitStart input (input.next' i h)
|
||||
isRawStrLitStart c (c.next' i h)
|
||||
else
|
||||
curr == '"'
|
||||
|
||||
|
|
@ -755,12 +746,11 @@ where
|
|||
The `num` variable counts the number of `#`s after the `r`.
|
||||
-/
|
||||
initState (num : Nat) : ParserFn := fun c s =>
|
||||
let input := c.input
|
||||
let i := s.pos
|
||||
if h : input.atEnd i then errorUnterminated s
|
||||
let i := s.pos
|
||||
if h : c.atEnd i then errorUnterminated s
|
||||
else
|
||||
let curr := input.get' i h
|
||||
let s := s.setPos (input.next' i h)
|
||||
let curr := c.get' i h
|
||||
let s := s.setPos (c.next' i h)
|
||||
if curr == '#' then
|
||||
initState (num + 1) c s
|
||||
else if curr == '"' then
|
||||
|
|
@ -773,12 +763,11 @@ where
|
|||
the raw string literal, we switch to `closingState`.
|
||||
-/
|
||||
normalState (num : Nat) : ParserFn := fun c s =>
|
||||
let input := c.input
|
||||
let i := s.pos
|
||||
if h : input.atEnd i then errorUnterminated s
|
||||
let i := s.pos
|
||||
if h : c.atEnd i then errorUnterminated s
|
||||
else
|
||||
let curr := input.get' i h
|
||||
let s := s.setPos (input.next' i h)
|
||||
let curr := c.get' i h
|
||||
let s := s.setPos (c.next' i h)
|
||||
if curr == '\"' then
|
||||
if num == 0 then
|
||||
mkNodeToken strLitKind startPos c s
|
||||
|
|
@ -792,12 +781,11 @@ where
|
|||
Note: `num > 0` since the `num = 0` case is entirely handled by `normalState`.
|
||||
-/
|
||||
closingState (num : Nat) (closingNum : Nat) : ParserFn := fun c s =>
|
||||
let input := c.input
|
||||
let i := s.pos
|
||||
if h : input.atEnd i then errorUnterminated s
|
||||
let i := s.pos
|
||||
if h : c.atEnd i then errorUnterminated s
|
||||
else
|
||||
let curr := input.get' i h
|
||||
let s := s.setPos (input.next' i h)
|
||||
let curr := c.get' i h
|
||||
let s := s.setPos (c.next' i h)
|
||||
if curr == '#' then
|
||||
if closingNum + 1 == num then
|
||||
mkNodeToken strLitKind startPos c s
|
||||
|
|
@ -815,30 +803,28 @@ Note: this does not report that it is expecting `_` if we reach EOI or an unexpe
|
|||
Rationale: this error happens if there is already a `_`, and while sequences of `_` are allowed, it's a bit perverse to suggest extending the sequence.
|
||||
-/
|
||||
partial def takeDigitsFn (isDigit : Char → Bool) (expecting : String) (needDigit : Bool) : ParserFn := fun c s =>
|
||||
let input := c.input
|
||||
let i := s.pos
|
||||
if h : input.atEnd i then
|
||||
let i := s.pos
|
||||
if h : c.atEnd i then
|
||||
if needDigit then
|
||||
s.mkEOIError [expecting]
|
||||
else
|
||||
s
|
||||
else
|
||||
let curr := input.get' i h
|
||||
if curr == '_' then takeDigitsFn isDigit expecting true c (s.next' c.input i h)
|
||||
else if isDigit curr then takeDigitsFn isDigit expecting false c (s.next' c.input i h)
|
||||
let curr := c.get' i h
|
||||
if curr == '_' then takeDigitsFn isDigit expecting true c (s.next' c i h)
|
||||
else if isDigit curr then takeDigitsFn isDigit expecting false c (s.next' c i h)
|
||||
else if needDigit then s.mkUnexpectedError "unexpected character" (expected := [expecting])
|
||||
else s
|
||||
|
||||
def decimalNumberFn (startPos : String.Pos) (c : ParserContext) : ParserState → ParserState := fun s =>
|
||||
let s := takeDigitsFn (fun c => c.isDigit) "decimal number" false c s
|
||||
let input := c.input
|
||||
let i := s.pos
|
||||
if h : input.atEnd i then
|
||||
let s := takeDigitsFn (fun c => c.isDigit) "decimal number" false c s
|
||||
let i := s.pos
|
||||
if h : c.atEnd i then
|
||||
mkNodeToken numLitKind startPos c s
|
||||
else
|
||||
let curr := input.get' i h
|
||||
let j := input.next i
|
||||
if ∃ hj : ¬ input.atEnd j, curr = '.' && input.get' j hj = '.' then
|
||||
let curr := c.get' i h
|
||||
let j := c.next i
|
||||
if ∃ hj : ¬ c.atEnd j, curr = '.' && c.get' j hj = '.' then
|
||||
mkNodeToken numLitKind startPos c s
|
||||
else if curr == '.' || curr == 'e' || curr == 'E' then
|
||||
parseScientific s
|
||||
|
|
@ -851,12 +837,11 @@ where
|
|||
mkNodeToken scientificLitKind startPos c s
|
||||
|
||||
parseOptDot s :=
|
||||
let input := c.input
|
||||
let i := s.pos
|
||||
let curr := input.get i
|
||||
let curr := c.get i
|
||||
if curr == '.' then
|
||||
let i := input.next i
|
||||
let curr := input.get i
|
||||
let i := c.next i
|
||||
let curr := c.get i
|
||||
if curr.isDigit then
|
||||
takeDigitsFn (fun c => c.isDigit) "decimal number" false c (s.setPos i)
|
||||
else
|
||||
|
|
@ -865,13 +850,12 @@ where
|
|||
s
|
||||
|
||||
parseOptExp s :=
|
||||
let input := c.input
|
||||
let i := s.pos
|
||||
let curr := input.get i
|
||||
let curr := c.get i
|
||||
if curr == 'e' || curr == 'E' then
|
||||
let i := input.next i
|
||||
let i := if input.get i == '-' || input.get i == '+' then input.next i else i
|
||||
let curr := input.get i
|
||||
let i := c.next i
|
||||
let i := if c.get i == '-' || c.get i == '+' then c.next i else i
|
||||
let curr := c.get i
|
||||
if curr.isDigit then
|
||||
takeDigitsFn (fun c => c.isDigit) "decimal number" false c (s.setPos i)
|
||||
else
|
||||
|
|
@ -892,36 +876,35 @@ def hexNumberFn (startPos : String.Pos) : ParserFn := fun c s =>
|
|||
mkNodeToken numLitKind startPos c s
|
||||
|
||||
def numberFnAux : ParserFn := fun c s =>
|
||||
let input := c.input
|
||||
let startPos := s.pos
|
||||
if h : input.atEnd startPos then s.mkEOIError
|
||||
if h : c.atEnd startPos then s.mkEOIError
|
||||
else
|
||||
let curr := input.get' startPos h
|
||||
let curr := c.get' startPos h
|
||||
if curr == '0' then
|
||||
let i := input.next' startPos h
|
||||
let curr := input.get i
|
||||
let i := c.next' startPos h
|
||||
let curr := c.get i
|
||||
if curr == 'b' || curr == 'B' then
|
||||
binNumberFn startPos c (s.next input i)
|
||||
binNumberFn startPos c (s.next c i)
|
||||
else if curr == 'o' || curr == 'O' then
|
||||
octalNumberFn startPos c (s.next input i)
|
||||
octalNumberFn startPos c (s.next c i)
|
||||
else if curr == 'x' || curr == 'X' then
|
||||
hexNumberFn startPos c (s.next input i)
|
||||
hexNumberFn startPos c (s.next c i)
|
||||
else
|
||||
decimalNumberFn startPos c (s.setPos i)
|
||||
else if curr.isDigit then
|
||||
decimalNumberFn startPos c (s.next input startPos)
|
||||
decimalNumberFn startPos c (s.next c startPos)
|
||||
else
|
||||
s.mkError "numeral"
|
||||
|
||||
def isIdCont : String → ParserState → Bool := fun input s =>
|
||||
def isIdCont : ParserContext → ParserState → Bool := fun c s =>
|
||||
let i := s.pos
|
||||
let curr := input.get i
|
||||
let curr := c.get i
|
||||
if curr == '.' then
|
||||
let i := input.next i
|
||||
if input.atEnd i then
|
||||
let i := c.next i
|
||||
if c.atEnd i then
|
||||
false
|
||||
else
|
||||
let curr := input.get i
|
||||
let curr := c.get i
|
||||
isIdFirst curr || isIdBeginEscape curr
|
||||
else
|
||||
false
|
||||
|
|
@ -942,13 +925,12 @@ def mkTokenAndFixPos (startPos : String.Pos) (tk : Option Token) : ParserFn := f
|
|||
if c.forbiddenTk? == some tk then
|
||||
s.mkErrorAt "forbidden token" startPos
|
||||
else
|
||||
let input := c.input
|
||||
let leading := mkEmptySubstringAt input startPos
|
||||
let leading := c.mkEmptySubstringAt startPos
|
||||
let stopPos := startPos + tk
|
||||
let s := s.setPos stopPos
|
||||
let s := whitespace c s
|
||||
let wsStopPos := s.pos
|
||||
let trailing := { str := input, startPos := stopPos, stopPos := wsStopPos : Substring }
|
||||
let trailing := c.substring (startPos := stopPos) (stopPos := wsStopPos)
|
||||
let atom := mkAtom (SourceInfo.original leading startPos trailing stopPos) tk
|
||||
s.pushSyntax atom
|
||||
|
||||
|
|
@ -957,45 +939,43 @@ def mkIdResult (startPos : String.Pos) (tk : Option Token) (val : Name) : Parser
|
|||
if isToken startPos stopPos tk then
|
||||
mkTokenAndFixPos startPos tk c s
|
||||
else
|
||||
let input := c.input
|
||||
let rawVal := { str := input, startPos := startPos, stopPos := stopPos : Substring }
|
||||
let rawVal := c.substring startPos stopPos
|
||||
let s := whitespace c s
|
||||
let trailingStopPos := s.pos
|
||||
let leading := mkEmptySubstringAt input startPos
|
||||
let trailing := { str := input, startPos := stopPos, stopPos := trailingStopPos : Substring }
|
||||
let leading := c.mkEmptySubstringAt startPos
|
||||
let trailing := c.substring (startPos := stopPos) (stopPos := trailingStopPos)
|
||||
let info := SourceInfo.original leading startPos trailing stopPos
|
||||
let atom := mkIdent info rawVal val
|
||||
s.pushSyntax atom
|
||||
|
||||
partial def identFnAux (startPos : String.Pos) (tk : Option Token) (r : Name) : ParserFn :=
|
||||
let rec parse (r : Name) (c s) :=
|
||||
let input := c.input
|
||||
let i := s.pos
|
||||
if h : input.atEnd i then
|
||||
let i := s.pos
|
||||
if h : c.atEnd i then
|
||||
s.mkEOIError
|
||||
else
|
||||
let curr := input.get' i h
|
||||
let curr := c.get' i h
|
||||
if isIdBeginEscape curr then
|
||||
let startPart := input.next' i h
|
||||
let startPart := c.next' i h
|
||||
let s := takeUntilFn isIdEndEscape c (s.setPos startPart)
|
||||
if h : input.atEnd s.pos then
|
||||
if h : c.atEnd s.pos then
|
||||
s.mkUnexpectedErrorAt "unterminated identifier escape" startPart
|
||||
else
|
||||
let stopPart := s.pos
|
||||
let s := s.next' c.input s.pos h
|
||||
let r := .str r (input.extract startPart stopPart)
|
||||
if isIdCont input s then
|
||||
let s := s.next input s.pos
|
||||
let s := s.next' c s.pos h
|
||||
let r := .str r (c.extract startPart stopPart)
|
||||
if isIdCont c s then
|
||||
let s := s.next c s.pos
|
||||
parse r c s
|
||||
else
|
||||
mkIdResult startPos tk r c s
|
||||
else if isIdFirst curr then
|
||||
let startPart := i
|
||||
let s := takeWhileFn isIdRest c (s.next input i)
|
||||
let s := takeWhileFn isIdRest c (s.next c i)
|
||||
let stopPart := s.pos
|
||||
let r := .str r (input.extract startPart stopPart)
|
||||
if isIdCont input s then
|
||||
let s := s.next input s.pos
|
||||
let r := .str r (c.extract startPart stopPart)
|
||||
if isIdCont c s then
|
||||
let s := s.next c s.pos
|
||||
parse r c s
|
||||
else
|
||||
mkIdResult startPos tk r c s
|
||||
|
|
@ -1007,8 +987,7 @@ private def isIdFirstOrBeginEscape (c : Char) : Bool :=
|
|||
isIdFirst c || isIdBeginEscape c
|
||||
|
||||
private def nameLitAux (startPos : String.Pos) : ParserFn := fun c s =>
|
||||
let input := c.input
|
||||
let s := identFnAux startPos none .anonymous c (s.next input startPos)
|
||||
let s := identFnAux startPos none .anonymous c (s.next c startPos)
|
||||
if s.hasError then
|
||||
s
|
||||
else
|
||||
|
|
@ -1020,21 +999,23 @@ private def nameLitAux (startPos : String.Pos) : ParserFn := fun c s =>
|
|||
| _ => s.mkError "invalid Name literal"
|
||||
|
||||
private def tokenFnAux : ParserFn := fun c s =>
|
||||
let input := c.input
|
||||
let i := s.pos
|
||||
let curr := input.get i
|
||||
let curr := c.get i
|
||||
if curr == '\"' then
|
||||
strLitFnAux i c (s.next input i)
|
||||
else if curr == '\'' && getNext input i != '\'' then
|
||||
charLitFnAux i c (s.next input i)
|
||||
strLitFnAux i c (s.next c i)
|
||||
else if curr == '\'' && c.getNext i != '\'' then
|
||||
charLitFnAux i c (s.next c i)
|
||||
else if curr.isDigit then
|
||||
numberFnAux c s
|
||||
else if curr == '`' && isIdFirstOrBeginEscape (getNext input i) then
|
||||
else if curr == '`' && isIdFirstOrBeginEscape (c.getNext i) then
|
||||
nameLitAux i c s
|
||||
else if curr == 'r' && isRawStrLitStart input (input.next i) then
|
||||
rawStrLitFnAux i c (s.next input i)
|
||||
else if curr == 'r' && isRawStrLitStart c (c.next i) then
|
||||
rawStrLitFnAux i c (s.next c i)
|
||||
else
|
||||
let tk := c.tokens.matchPrefix input i
|
||||
let tk := c.tokens.matchPrefix c.inputString i c.endPos.byteIdx <| by
|
||||
let ⟨⟨{inputString, endPos := ⟨endPos⟩, endPos_valid, ..}, _, _, _⟩⟩ := c
|
||||
rw [String.endPos] at endPos_valid
|
||||
omega
|
||||
identFnAux i tk .anonymous c s
|
||||
|
||||
private def updateTokenCache (startPos : String.Pos) (s : ParserState) : ParserState :=
|
||||
|
|
@ -1048,9 +1029,8 @@ private def updateTokenCache (startPos : String.Pos) (s : ParserState) : ParserS
|
|||
| other => other
|
||||
|
||||
def tokenFn (expected : List String := []) : ParserFn := fun c s =>
|
||||
let input := c.input
|
||||
let i := s.pos
|
||||
if input.atEnd i then s.mkEOIError expected
|
||||
let i := s.pos
|
||||
if c.atEnd i then s.mkEOIError expected
|
||||
else
|
||||
let tkc := s.cache.tokenCache
|
||||
if tkc.startPos == i then
|
||||
|
|
@ -1078,9 +1058,8 @@ def peekToken (c : ParserContext) (s : ParserState) : ParserState × Except Pars
|
|||
|
||||
/-- Treat keywords as identifiers. -/
|
||||
def rawIdentFn : ParserFn := fun c s =>
|
||||
let input := c.input
|
||||
let i := s.pos
|
||||
if input.atEnd i then s.mkEOIError
|
||||
let i := s.pos
|
||||
if c.atEnd i then s.mkEOIError
|
||||
else identFnAux i none .anonymous c s
|
||||
|
||||
def satisfySymbolFn (p : String → Bool) (expected : List String) : ParserFn := fun c s => Id.run do
|
||||
|
|
@ -1158,10 +1137,9 @@ partial def strAux (sym : String) (errorMsg : String) (j : String.Pos) :ParserFn
|
|||
if h₁ : sym.atEnd j then s
|
||||
else
|
||||
let i := s.pos
|
||||
let input := c.input
|
||||
if h₂ : input.atEnd i then s.mkError errorMsg
|
||||
else if sym.get' j h₁ != input.get' i h₂ then s.mkError errorMsg
|
||||
else parse (sym.next' j h₁) c (s.next' input i h₂)
|
||||
if h₂ : c.atEnd i then s.mkError errorMsg
|
||||
else if sym.get' j h₁ != c.get' i h₂ then s.mkError errorMsg
|
||||
else parse (sym.next' j h₁) c (s.next' c i h₂)
|
||||
parse j
|
||||
|
||||
def checkTailWs (prev : Syntax) : Bool :=
|
||||
|
|
@ -1306,7 +1284,6 @@ def identEq (id : Name) : Parser := {
|
|||
}
|
||||
|
||||
def hygieneInfoFn : ParserFn := fun c s => Id.run do
|
||||
let input := c.input
|
||||
let finish pos str trailing s :=
|
||||
-- Builds an actual hygieneInfo node from empty string `str` and trailing whitespace `trailing`.
|
||||
let info := SourceInfo.original str pos trailing pos
|
||||
|
|
@ -1321,13 +1298,13 @@ def hygieneInfoFn : ParserFn := fun c s => Id.run do
|
|||
if !s.stxStack.isEmpty then
|
||||
let prev := s.stxStack.back
|
||||
if let .original leading pos trailing endPos := prev.getTailInfo then
|
||||
let str := mkEmptySubstringAt input endPos
|
||||
let str := c.mkEmptySubstringAt endPos
|
||||
-- steal the trailing whitespace from the last node and use it for this node
|
||||
let s := s.popSyntax.pushSyntax <| prev.setTailInfo (.original leading pos str endPos)
|
||||
return finish endPos str trailing s
|
||||
-- The stack can be empty if this is either the first token, or if we are in a fresh cache.
|
||||
-- In that case we just put the hygieneInfo at the current location.
|
||||
let str := mkEmptySubstringAt input s.pos
|
||||
let str := c.mkEmptySubstringAt s.pos
|
||||
finish s.pos str str s
|
||||
|
||||
def hygieneInfoNoAntiquot : Parser := {
|
||||
|
|
@ -1577,7 +1554,7 @@ This parser has the same arity as `p` - it just forwards the results of `p`. -/
|
|||
|
||||
def eoiFn : ParserFn := fun c s =>
|
||||
let i := s.pos
|
||||
if c.input.atEnd i then s
|
||||
if c.atEnd i then s
|
||||
else s.mkError "expected end of file"
|
||||
|
||||
def eoi : Parser := {
|
||||
|
|
@ -1736,11 +1713,10 @@ def termParser (prec : Nat := 0) : Parser :=
|
|||
fn := fun c s =>
|
||||
let prev := s.stxStack.back
|
||||
if checkTailNoWs prev then
|
||||
let input := c.input
|
||||
let i := s.pos
|
||||
if h : input.atEnd i then s
|
||||
let i := s.pos
|
||||
if h : c.atEnd i then s
|
||||
else
|
||||
let curr := input.get' i h
|
||||
let curr := c.get' i h
|
||||
if curr == ':' then
|
||||
s.mkUnexpectedError "unexpected ':'"
|
||||
else s
|
||||
|
|
@ -1775,7 +1751,7 @@ def tokenAntiquotFn : ParserFn := fun c s => Id.run do
|
|||
def tokenWithAntiquot : Parser → Parser := withFn fun f c s =>
|
||||
let s := f c s
|
||||
-- fast check that is false in most cases
|
||||
if c.input.get s.pos == '%' then
|
||||
if c.get s.pos == '%' then
|
||||
tokenAntiquotFn c s
|
||||
else
|
||||
s
|
||||
|
|
@ -1811,7 +1787,7 @@ def unicodeSymbol (sym asciiSym : String) : Parser :=
|
|||
|
||||
def withAntiquotFn (antiquotP p : ParserFn) (isCatAntiquot := false) : ParserFn := fun c s =>
|
||||
-- fast check that is false in most cases
|
||||
if c.input.get s.pos == '$' then
|
||||
if c.get s.pos == '$' then
|
||||
-- Do not allow antiquotation choice nodes here as `antiquotP` is the strictly more general
|
||||
-- antiquotation than any in `p`.
|
||||
-- If it is a category antiquotation, do not backtrack into the category at all as that would
|
||||
|
|
@ -1954,7 +1930,7 @@ def prattParser (kind : Name) (tables : PrattParsingTables) (behavior : LeadingI
|
|||
def fieldIdxFn : ParserFn := fun c s =>
|
||||
let initStackSz := s.stackSize
|
||||
let iniPos := s.pos
|
||||
let curr := c.input.get iniPos
|
||||
let curr := c.get iniPos
|
||||
if curr.isDigit && curr != '0' then
|
||||
let s := takeWhileFn (fun c => c.isDigit) c s
|
||||
mkNodeToken fieldIdxKind iniPos c s
|
||||
|
|
|
|||
|
|
@ -443,12 +443,27 @@ def getSyntaxNodeKinds (env : Environment) : List SyntaxNodeKind :=
|
|||
def getTokenTable (env : Environment) : TokenTable :=
|
||||
(parserExtension.getState env).tokens
|
||||
|
||||
set_option linter.unusedVariables.funArgs false in
|
||||
-- Note: `crlfToLf` preserves logical line and column numbers for each character.
|
||||
def mkInputContext (input : String) (fileName : String) (normalizeLineEndings := true) : InputContext :=
|
||||
let input' := if normalizeLineEndings then input.crlfToLf else input
|
||||
{ input := input',
|
||||
fileName := fileName,
|
||||
fileMap := input'.toFileMap }
|
||||
def mkInputContext (input : String) (fileName : String)
|
||||
(normalizeLineEndings := true)
|
||||
(endPos := input.endPos)
|
||||
(endPos_valid : endPos ≤ input.endPos := by simp) :
|
||||
InputContext :=
|
||||
let text := FileMap.ofString input
|
||||
let next := if normalizeLineEndings then
|
||||
-- Convert the stop position to a line/column position so crlf translation doesn't invalidate it
|
||||
let endPos' := text.toPosition endPos
|
||||
let text := FileMap.ofString text.source.crlfToLf
|
||||
(text, text.ofPosition endPos')
|
||||
else
|
||||
(text, endPos)
|
||||
let text := next.1
|
||||
let endPos' := next.2
|
||||
if h : endPos' ≤ text.source.endPos then
|
||||
.mk text.source fileName (fileMap := text) (endPos := endPos') (endPos_valid := h)
|
||||
else
|
||||
.mk text.source fileName (fileMap := text)
|
||||
|
||||
def mkParserState (input : String) : ParserState :=
|
||||
{ cache := initCacheForInput input }
|
||||
|
|
@ -460,7 +475,7 @@ def runParserCategory (env : Environment) (catName : Name) (input : String) (fil
|
|||
let s := p.run ictx { env, options := {} } (getTokenTable env) (mkParserState input)
|
||||
if !s.allErrors.isEmpty then
|
||||
Except.error (s.toErrorMsg ictx)
|
||||
else if ictx.input.atEnd s.pos then
|
||||
else if ictx.atEnd s.pos then
|
||||
Except.ok s.stxStack.back
|
||||
else
|
||||
Except.error ((s.mkError "end of input").toErrorMsg ictx)
|
||||
|
|
|
|||
|
|
@ -83,7 +83,7 @@ def parseHeader (inputCtx : InputContext) : IO (TSyntax ``Module.header × Modul
|
|||
let dummyEnv ← mkEmptyEnvironment
|
||||
let p := andthenFn whitespace Module.header.fn
|
||||
let tokens := Module.updateTokens (getTokenTable dummyEnv)
|
||||
let s := p.run inputCtx { env := dummyEnv, options := {} } tokens (mkParserState inputCtx.input)
|
||||
let s := p.run inputCtx { env := dummyEnv, options := {} } tokens (mkParserState inputCtx.inputString)
|
||||
let stx := if s.stxStack.isEmpty then .missing else s.stxStack.back
|
||||
let mut messages : MessageLog := {}
|
||||
for (pos, stk, err) in s.allErrors do
|
||||
|
|
@ -125,7 +125,7 @@ def isTerminalCommand (s : Syntax) : Bool :=
|
|||
s.isOfKind ``Command.exit || s.isOfKind ``Command.import || s.isOfKind ``Command.eoi
|
||||
|
||||
private def consumeInput (inputCtx : InputContext) (pmctx : ParserModuleContext) (pos : String.Pos) : String.Pos :=
|
||||
let s : ParserState := { cache := initCacheForInput inputCtx.input, pos := pos }
|
||||
let s : ParserState := { cache := initCacheForInput inputCtx.inputString, pos := pos }
|
||||
let s := tokenFn [] |>.run inputCtx pmctx (getTokenTable pmctx.env) s
|
||||
match s.errorMsg with
|
||||
| some _ => pos + ' '
|
||||
|
|
@ -140,12 +140,12 @@ partial def parseCommand (inputCtx : InputContext) (pmctx : ParserModuleContext)
|
|||
let mut messages := messages
|
||||
let mut stx := Syntax.missing -- will always be assigned below
|
||||
repeat
|
||||
if inputCtx.input.atEnd pos then
|
||||
if inputCtx.atEnd pos then
|
||||
stx := mkEOI pos
|
||||
break
|
||||
let pos' := pos
|
||||
let p := andthenFn whitespace topLevelCommandParserFn
|
||||
let s := p.run inputCtx pmctx (getTokenTable pmctx.env) { cache := initCacheForInput inputCtx.input, pos }
|
||||
let s := p.run inputCtx pmctx (getTokenTable pmctx.env) { cache := initCacheForInput inputCtx.inputString, pos }
|
||||
-- save errors from sub-recoveries
|
||||
for (rpos, rstk, recovered) in s.recoveredErrors do
|
||||
messages := messages.add <| mkErrorMessage inputCtx rpos rstk recovered
|
||||
|
|
|
|||
|
|
@ -15,16 +15,15 @@ def isQuotableCharForStrInterpolant (c : Char) : Bool :=
|
|||
c == '{' || isQuotableCharDefault c
|
||||
|
||||
partial def interpolatedStrFn (p : ParserFn) : ParserFn := fun c s =>
|
||||
let input := c.input
|
||||
let stackSize := s.stackSize
|
||||
let rec parse (startPos : String.Pos) (c : ParserContext) (s : ParserState) : ParserState :=
|
||||
let i := s.pos
|
||||
if input.atEnd i then
|
||||
let i := s.pos
|
||||
if c.atEnd i then
|
||||
let s := s.mkError "unterminated string literal"
|
||||
s.mkNode interpolatedStrKind stackSize
|
||||
else
|
||||
let curr := input.get i
|
||||
let s := s.setPos (input.next i)
|
||||
let curr := c.get i
|
||||
let s := s.setPos (c.next i)
|
||||
if curr == '\"' then
|
||||
let s := mkNodeToken interpolatedStrLitKind startPos c s
|
||||
s.mkNode interpolatedStrKind stackSize
|
||||
|
|
@ -36,9 +35,9 @@ partial def interpolatedStrFn (p : ParserFn) : ParserFn := fun c s =>
|
|||
if s.hasError then s
|
||||
else
|
||||
let i := s.pos
|
||||
let curr := input.get i
|
||||
let curr := c.get i
|
||||
if curr == '}' then
|
||||
let s := s.setPos (input.next i)
|
||||
let s := s.setPos (c.next i)
|
||||
parse i c s
|
||||
else
|
||||
let s := s.mkError "'}'"
|
||||
|
|
@ -46,14 +45,14 @@ partial def interpolatedStrFn (p : ParserFn) : ParserFn := fun c s =>
|
|||
else
|
||||
parse startPos c s
|
||||
let startPos := s.pos
|
||||
if input.atEnd startPos then
|
||||
if c.atEnd startPos then
|
||||
s.mkEOIError
|
||||
else
|
||||
let curr := input.get s.pos;
|
||||
let curr := c.get s.pos;
|
||||
if curr != '\"' then
|
||||
s.mkError "interpolated string"
|
||||
else
|
||||
let s := s.next input startPos
|
||||
let s := s.next c startPos
|
||||
parse startPos c s
|
||||
|
||||
@[inline] def interpolatedStrNoAntiquot (p : Parser) : Parser := {
|
||||
|
|
|
|||
|
|
@ -48,15 +48,127 @@ def SyntaxNodeKindSet.insert (s : SyntaxNodeKindSet) (k : SyntaxNodeKind) : Synt
|
|||
Input string and related data. Recall that the `FileMap` is a helper structure for mapping
|
||||
`String.Pos` in the input string to line/column information. -/
|
||||
structure InputContext where
|
||||
input : String
|
||||
mk' ::
|
||||
inputString : String
|
||||
fileName : String
|
||||
fileMap : FileMap
|
||||
endPos : String.Pos := input.endPos
|
||||
endPos_valid : endPos ≤ input.endPos := by simp
|
||||
fileMap : FileMap
|
||||
endPos : String.Pos := inputString.endPos
|
||||
endPos_valid : endPos ≤ inputString.endPos := by simp
|
||||
|
||||
instance : Inhabited InputContext where
|
||||
default := ⟨"", default, default, "".endPos, String.Pos.mk_le_mk.mpr (Nat.le_refl _)⟩
|
||||
|
||||
|
||||
namespace InputContext
|
||||
def mk
|
||||
(input fileName : String)
|
||||
(endPos : String.Pos := input.endPos)
|
||||
(endPos_valid : endPos ≤ input.endPos := by simp)
|
||||
(fileMap : FileMap := FileMap.ofString input) :
|
||||
InputContext where
|
||||
inputString := input
|
||||
endPos := endPos
|
||||
endPos_valid := endPos_valid
|
||||
fileName := fileName
|
||||
fileMap := fileMap
|
||||
|
||||
@[deprecated "Call `InputContext.get`, `InputContext.get'`, `InputContext.next'`, and `InputContext.atEnd` directly" (since := "2025-08-21")]
|
||||
def input (c : InputContext) : String := c.inputString.extract 0 c.endPos
|
||||
|
||||
/--
|
||||
Returns `true` if a specified byte position is greater than or equal to the position which points to
|
||||
the end of the input string. Otherwise, returns `false`.
|
||||
-/
|
||||
def atEnd (c : InputContext) (p : String.Pos) : Bool := p ≥ c.endPos
|
||||
|
||||
/--
|
||||
Returns the character at position `p` of the input string. If `p` is not a valid position, returns
|
||||
the fallback value `(default : Char)`, which is `'A'`, but does not panic.
|
||||
-/
|
||||
@[inline]
|
||||
def get (c : InputContext) (p : String.Pos) : Char := c.inputString.get p
|
||||
|
||||
theorem not_atEnd_inputString {c : InputContext} {p : String.Pos} :
|
||||
¬c.atEnd p → ¬c.inputString.atEnd p := by
|
||||
intro h
|
||||
let {inputString, endPos := ⟨e⟩, endPos_valid, ..} := c
|
||||
cases p
|
||||
simp only [String.endPos, String.Pos.mk_le_mk] at endPos_valid
|
||||
simp_all [String.atEnd, InputContext.atEnd, String.Pos.mk_le_mk]
|
||||
exact Nat.lt_of_lt_of_le h endPos_valid
|
||||
|
||||
/--
|
||||
Returns the character at position `p` of the input string. Returns `(default : Char)`, which is
|
||||
`'A'`, if `p` is not a valid position.
|
||||
|
||||
Requires evidence, `h`, that `p` is within bounds instead of performing a run-time bounds check as
|
||||
in `InputContext.get`.
|
||||
|
||||
A typical pattern combines `get'` with a dependent `if`-expression to avoid the overhead of an
|
||||
additional bounds check. For example:
|
||||
```
|
||||
def getInBounds? (s : String) (p : String.Pos) : Option Char :=
|
||||
if h : s.atEnd p then none else some (s.get' p h)
|
||||
```
|
||||
Even with evidence of `¬ s.atEnd p`, `p` may be invalid if a byte index points into the middle of a
|
||||
multi-byte UTF-8 character.
|
||||
-/
|
||||
@[inline]
|
||||
def get' (c : InputContext) (p : String.Pos) (h : ¬c.atEnd p) : Char :=
|
||||
c.inputString.get' p (c.not_atEnd_inputString h)
|
||||
|
||||
/--
|
||||
Returns the next position in the input string after position `p`. If `p` is not a valid position or
|
||||
`p = c.endPos`, returns the position one byte after `p`.
|
||||
|
||||
A run-time bounds check is performed to determine whether `p` is at the end of the string. If a
|
||||
bounds check has already been performed, use `InputContext.next'` to avoid a repeated check.
|
||||
-/
|
||||
@[inline]
|
||||
def next (c : InputContext) (p : String.Pos) : String.Pos :=
|
||||
c.inputString.next p
|
||||
|
||||
/--
|
||||
Returns the next position in the input string after position `p`. The result is unspecified if `p`
|
||||
is not a valid position.
|
||||
|
||||
Requires evidence, `h`, that `p` is within bounds. No run-time bounds check is performed.
|
||||
-/
|
||||
@[inline]
|
||||
def next' (c : InputContext) (p : String.Pos) (h : ¬c.atEnd p) : String.Pos :=
|
||||
c.inputString.next' p (c.not_atEnd_inputString h)
|
||||
|
||||
/--
|
||||
Creates a new string that consists of the region of the input string delimited by the two positions.
|
||||
|
||||
The result is `""` if the start position is greater than or equal to the end position or if the
|
||||
start position is at the end of the string. If either position is invalid (that is, if either points
|
||||
at the middle of a multi-byte UTF-8 character) then the result is unspecified.
|
||||
-/
|
||||
@[inline]
|
||||
def extract (c : InputContext) : String.Pos → String.Pos → String :=
|
||||
c.inputString.extract
|
||||
|
||||
/--
|
||||
Extracts a substring of the input string, bounded by `startPos` and `stopPos`.
|
||||
-/
|
||||
@[inline]
|
||||
def substring (c : InputContext) (startPos stopPos : String.Pos) : Substring :=
|
||||
{ str := c.inputString, startPos, stopPos := min stopPos c.endPos }
|
||||
|
||||
/-- Return character after position `pos` -/
|
||||
@[inline]
|
||||
def getNext (input : InputContext) (pos : String.Pos) : Char :=
|
||||
input.get (input.next pos)
|
||||
|
||||
/-- Returns the character position prior to `pos` -/
|
||||
@[inline]
|
||||
def prev (c : InputContext) (pos : String.Pos) : String.Pos :=
|
||||
c.inputString.prev pos
|
||||
|
||||
end InputContext
|
||||
|
||||
|
||||
/-- Input context derived from elaboration of previous commands. -/
|
||||
structure ParserModuleContext where
|
||||
env : Environment
|
||||
|
|
@ -82,6 +194,16 @@ structure ParserContextCore extends InputContext, ParserModuleContext, Cacheable
|
|||
/-- Opaque parser context updateable using `adaptCacheableContextFn` and `adaptUncacheableContextFn`. -/
|
||||
structure ParserContext extends ParserContextCore where private mk ::
|
||||
|
||||
instance : Coe ParserContext InputContext := ⟨(·.toInputContext)⟩
|
||||
|
||||
/--
|
||||
Modifies the ending position of a parser context.
|
||||
-/
|
||||
def ParserContext.setEndPos (c : ParserContext)
|
||||
(endPos : String.Pos) (endPos_valid : endPos ≤ c.inputString.endPos) :
|
||||
ParserContext :=
|
||||
{ c with endPos, endPos_valid }
|
||||
|
||||
structure Error where
|
||||
/--
|
||||
If not `missing`, used for lazily calculating `unexpected` message and range in `mkErrorMessage`.
|
||||
|
|
@ -237,11 +359,11 @@ def popSyntax (s : ParserState) : ParserState :=
|
|||
def shrinkStack (s : ParserState) (iniStackSz : Nat) : ParserState :=
|
||||
{ s with stxStack := s.stxStack.shrink iniStackSz }
|
||||
|
||||
def next (s : ParserState) (input : String) (pos : String.Pos) : ParserState :=
|
||||
{ s with pos := input.next pos }
|
||||
def next (s : ParserState) (c : ParserContext) (pos : String.Pos) : ParserState :=
|
||||
{ s with pos := c.next pos }
|
||||
|
||||
def next' (s : ParserState) (input : String) (pos : String.Pos) (h : ¬ input.atEnd pos) : ParserState :=
|
||||
{ s with pos := input.next' pos h }
|
||||
def next' (s : ParserState) (c : ParserContext) (pos : String.Pos) (h : ¬ c.atEnd pos) : ParserState :=
|
||||
{ s with pos := c.next' pos h }
|
||||
|
||||
def mkNode (s : ParserState) (k : SyntaxNodeKind) (iniStackSz : Nat) : ParserState :=
|
||||
match s with
|
||||
|
|
|
|||
|
|
@ -352,14 +352,12 @@ def trailingNode.formatter (k : SyntaxNodeKind) (_ _ : Nat) (p : Formatter) : Fo
|
|||
|
||||
def parseToken (s : String) : FormatterM ParserState :=
|
||||
-- include comment tokens, e.g. when formatting `- -0`
|
||||
return (Parser.andthenFn Parser.whitespace (Parser.tokenFn [])).run {
|
||||
input := s,
|
||||
fileName := "",
|
||||
fileMap := FileMap.ofString ""
|
||||
} {
|
||||
env := ← getEnv,
|
||||
options := ← getOptions
|
||||
} ((← read).table) (Parser.mkParserState s)
|
||||
let ictx := .mk s "" (fileMap := FileMap.ofString "")
|
||||
return (Parser.andthenFn Parser.whitespace (Parser.tokenFn [])).run ictx
|
||||
{
|
||||
env := ← getEnv,
|
||||
options := ← getOptions
|
||||
} ((← read).table) (Parser.mkParserState s)
|
||||
|
||||
def pushToken (info : SourceInfo) (tk : String) (ident : Bool) : FormatterM Unit := do
|
||||
if let SourceInfo.original _ _ ss _ := info then
|
||||
|
|
|
|||
|
|
@ -97,10 +97,11 @@ structure DocumentMeta where
|
|||
deriving Inhabited
|
||||
|
||||
/-- Extracts an `InputContext` from `doc`. -/
|
||||
def DocumentMeta.mkInputContext (doc : DocumentMeta) : Parser.InputContext where
|
||||
input := doc.text.source
|
||||
fileName := (System.Uri.fileUriToPath? doc.uri).getD doc.uri |>.toString
|
||||
fileMap := doc.text
|
||||
def DocumentMeta.mkInputContext (doc : DocumentMeta) : Parser.InputContext :=
|
||||
.mk
|
||||
(input := doc.text.source)
|
||||
(fileName := (System.Uri.fileUriToPath? doc.uri).getD doc.uri |>.toString)
|
||||
(fileMap := doc.text)
|
||||
|
||||
/--
|
||||
Replaces the range `r` (using LSP UTF-16 positions) in `text` (using UTF-8 positions)
|
||||
|
|
|
|||
|
|
@ -39,28 +39,26 @@ public def wsFn : ParserFn :=
|
|||
|
||||
/-- Consumes the LF following a CR in a CRLF newline. -/
|
||||
def crlfAuxFn : ParserFn := fun c s =>
|
||||
let input := c.input
|
||||
let errMsg := "invalid newline; no LF after CR"
|
||||
if h : input.atEnd s.pos then
|
||||
if h : c.atEnd s.pos then
|
||||
s.mkUnexpectedError errMsg
|
||||
else
|
||||
let curr := input.get' s.pos h
|
||||
let curr := c.get' s.pos h
|
||||
if curr == '\n' then
|
||||
s.next' input s.pos h
|
||||
s.next' c s.pos h
|
||||
else
|
||||
s.mkUnexpectedError errMsg
|
||||
|
||||
/-- Consume a newline. -/
|
||||
public def newlineFn : ParserFn := fun c s =>
|
||||
let input := c.input
|
||||
if h : input.atEnd s.pos then
|
||||
if h : c.atEnd s.pos then
|
||||
s.mkEOIError ["newline"]
|
||||
else
|
||||
let curr := input.get' s.pos h
|
||||
let curr := c.get' s.pos h
|
||||
if curr == '\n' then
|
||||
s.next' input s.pos h
|
||||
s.next' c s.pos h
|
||||
else if curr == '\r' then
|
||||
crlfAuxFn c (s.next' input s.pos h)
|
||||
crlfAuxFn c (s.next' c s.pos h)
|
||||
else
|
||||
mkUnexpectedCharError s curr ["newline"]
|
||||
|
||||
|
|
@ -76,31 +74,29 @@ public def commentFn : ParserFn :=
|
|||
|
||||
/-- Consume optional whitespace (space, tab, or newline). -/
|
||||
public partial def wsNewlineFn : ParserFn := fun c s =>
|
||||
let input := c.input
|
||||
if h : input.atEnd s.pos then
|
||||
if h : c.atEnd s.pos then
|
||||
s
|
||||
else
|
||||
let curr := input.get' s.pos h
|
||||
let curr := c.get' s.pos h
|
||||
if curr = ' ' || curr = '\t' || curr == '\n' then
|
||||
wsNewlineFn c (s.next' input s.pos h)
|
||||
wsNewlineFn c (s.next' c s.pos h)
|
||||
else if curr == '\r' then
|
||||
(crlfAuxFn >> wsNewlineFn) c (s.next' input s.pos h)
|
||||
(crlfAuxFn >> wsNewlineFn) c (s.next' c s.pos h)
|
||||
else
|
||||
s
|
||||
|
||||
/-- Consume optional sequence of whitespace / newline(s) / comment (s). -/
|
||||
public partial def trailingFn : ParserFn := fun c s =>
|
||||
let input := c.input
|
||||
if h : input.atEnd s.pos then
|
||||
if h : c.atEnd s.pos then
|
||||
s
|
||||
else
|
||||
let curr := input.get' s.pos h
|
||||
let curr := c.get' s.pos h
|
||||
if curr = ' ' || curr = '\t' || curr == '\n' then
|
||||
trailingFn c (s.next' input s.pos h)
|
||||
trailingFn c (s.next' c s.pos h)
|
||||
else if curr == '\r' then
|
||||
(crlfAuxFn >> trailingFn) c (s.next' input s.pos h)
|
||||
(crlfAuxFn >> trailingFn) c (s.next' c s.pos h)
|
||||
else if curr == '#' then
|
||||
(commentBodyFn >> trailingFn) c (s.next' input s.pos h)
|
||||
(commentBodyFn >> trailingFn) c (s.next' c s.pos h)
|
||||
else
|
||||
s
|
||||
|
||||
|
|
@ -114,23 +110,22 @@ public def isEscapeChar (c : Char) : Bool :=
|
|||
|
||||
/-- Consumes a TOML string escape sequence after a `\`. -/
|
||||
def escapeSeqFn (stringGap : Bool) : ParserFn := fun c s =>
|
||||
let input := c.input
|
||||
let expected := ["escape sequence"]
|
||||
if h : input.atEnd s.pos then
|
||||
if h : c.atEnd s.pos then
|
||||
s.mkEOIError expected
|
||||
else
|
||||
let curr := input.get' s.pos h
|
||||
let curr := c.get' s.pos h
|
||||
let ifStringGap (p : ParserFn) :=
|
||||
if stringGap then
|
||||
p c (s.next' input s.pos h)
|
||||
p c (s.next' c s.pos h)
|
||||
else
|
||||
s.mkUnexpectedError "string gap is forbidden here" expected
|
||||
if isEscapeChar curr then
|
||||
s.next' input s.pos h
|
||||
s.next' c s.pos h
|
||||
else if curr == 'u' then
|
||||
repeatFn 4 hexDigitFn c (s.next' input s.pos h)
|
||||
repeatFn 4 hexDigitFn c (s.next' c s.pos h)
|
||||
else if curr == 'U' then
|
||||
repeatFn 8 hexDigitFn c (s.next' input s.pos h)
|
||||
repeatFn 8 hexDigitFn c (s.next' c s.pos h)
|
||||
else if curr == ' ' || curr == '\t' then
|
||||
ifStringGap (wsFn >> newlineFn >> wsNewlineFn)
|
||||
else if curr == '\n' then
|
||||
|
|
@ -141,50 +136,47 @@ def escapeSeqFn (stringGap : Bool) : ParserFn := fun c s =>
|
|||
s.mkUnexpectedError "invalid escape sequence"
|
||||
|
||||
partial def basicStringAuxFn (startPos : String.Pos) : ParserFn := fun c s =>
|
||||
let input := c.input
|
||||
if h : input.atEnd s.pos then
|
||||
if h : c.atEnd s.pos then
|
||||
s.mkUnexpectedErrorAt "unterminated basic string" startPos
|
||||
else
|
||||
let curr := input.get' s.pos h
|
||||
let curr := c.get' s.pos h
|
||||
if curr == '\"' then
|
||||
s.next' input s.pos h
|
||||
s.next' c s.pos h
|
||||
else if curr == '\\' then
|
||||
(escapeSeqFn false >> basicStringAuxFn startPos) c (s.next' input s.pos h)
|
||||
(escapeSeqFn false >> basicStringAuxFn startPos) c (s.next' c s.pos h)
|
||||
else if isControlChar curr then
|
||||
mkUnexpectedCharError s curr
|
||||
else
|
||||
basicStringAuxFn startPos c (s.next' input s.pos h)
|
||||
basicStringAuxFn startPos c (s.next' c s.pos h)
|
||||
|
||||
public def basicStringFn : ParserFn := usePosFn fun startPos =>
|
||||
chFn '\"' ["basic string"] >> basicStringAuxFn startPos
|
||||
|
||||
partial def literalStringAuxFn (startPos : String.Pos) : ParserFn := fun c s =>
|
||||
let input := c.input
|
||||
if h : input.atEnd s.pos then
|
||||
if h : c.atEnd s.pos then
|
||||
s.mkUnexpectedErrorAt "unterminated literal string" startPos
|
||||
else
|
||||
let curr := input.get' s.pos h
|
||||
let curr := c.get' s.pos h
|
||||
if curr == '\'' then
|
||||
s.next' input s.pos h
|
||||
s.next' c s.pos h
|
||||
else if isControlChar curr then
|
||||
mkUnexpectedCharError s curr
|
||||
else
|
||||
literalStringAuxFn startPos c (s.next' input s.pos h)
|
||||
literalStringAuxFn startPos c (s.next' c s.pos h)
|
||||
|
||||
public def literalStringFn : ParserFn := usePosFn fun startPos =>
|
||||
chFn '\'' ["literal string"] >> literalStringAuxFn startPos
|
||||
|
||||
partial def mlLiteralStringAuxFn (startPos : String.Pos) (quoteDepth : Nat) : ParserFn := fun c s =>
|
||||
let input := c.input
|
||||
if h : input.atEnd s.pos then
|
||||
if h : c.atEnd s.pos then
|
||||
if quoteDepth ≥ 3 then
|
||||
s
|
||||
else
|
||||
s.mkUnexpectedErrorAt "unterminated multi-line literal string" startPos
|
||||
else
|
||||
let curr := input.get' s.pos h
|
||||
let curr := c.get' s.pos h
|
||||
if curr == '\'' then
|
||||
let s := s.next' input s.pos h
|
||||
let s := s.next' c s.pos h
|
||||
if quoteDepth ≥ 5 then
|
||||
s.mkUnexpectedError "too many quotes"
|
||||
else
|
||||
|
|
@ -192,29 +184,28 @@ partial def mlLiteralStringAuxFn (startPos : String.Pos) (quoteDepth : Nat) : P
|
|||
else if quoteDepth ≥ 3 then
|
||||
s
|
||||
else if curr == '\n' then
|
||||
mlLiteralStringAuxFn startPos 0 c (s.next' input s.pos h)
|
||||
mlLiteralStringAuxFn startPos 0 c (s.next' c s.pos h)
|
||||
else if curr == '\r' then
|
||||
(crlfAuxFn >> mlLiteralStringAuxFn startPos 0) c (s.next' input s.pos h)
|
||||
(crlfAuxFn >> mlLiteralStringAuxFn startPos 0) c (s.next' c s.pos h)
|
||||
else if isControlChar curr then
|
||||
mkUnexpectedCharError s curr
|
||||
else
|
||||
mlLiteralStringAuxFn startPos 0 c (s.next' input s.pos h)
|
||||
mlLiteralStringAuxFn startPos 0 c (s.next' c s.pos h)
|
||||
|
||||
public def mlLiteralStringFn : ParserFn := usePosFn fun startPos =>
|
||||
atomicFn (repeatFn 3 (chFn '\'' ["multi-line literal string"])) >>
|
||||
mlLiteralStringAuxFn startPos 0
|
||||
|
||||
partial def mlBasicStringAuxFn (startPos : String.Pos) (quoteDepth : Nat) : ParserFn := fun c s =>
|
||||
let input := c.input
|
||||
if h : input.atEnd s.pos then
|
||||
if h : c.atEnd s.pos then
|
||||
if quoteDepth ≥ 3 then
|
||||
s
|
||||
else
|
||||
s.mkUnexpectedErrorAt "unterminated multi-line basic string" startPos
|
||||
else
|
||||
let curr := input.get' s.pos h
|
||||
let curr := c.get' s.pos h
|
||||
if curr == '\"' then
|
||||
let s := s.next' input s.pos h
|
||||
let s := s.next' c s.pos h
|
||||
if quoteDepth ≥ 5 then
|
||||
s.mkUnexpectedError "too many quotes"
|
||||
else
|
||||
|
|
@ -222,15 +213,15 @@ partial def mlBasicStringAuxFn (startPos : String.Pos) (quoteDepth : Nat) : Pars
|
|||
else if quoteDepth ≥ 3 then
|
||||
s
|
||||
else if curr == '\n' then
|
||||
mlBasicStringAuxFn startPos 0 c (s.next' input s.pos h)
|
||||
mlBasicStringAuxFn startPos 0 c (s.next' c s.pos h)
|
||||
else if curr == '\r' then
|
||||
(crlfAuxFn >> mlBasicStringAuxFn startPos 0) c (s.next' input s.pos h)
|
||||
(crlfAuxFn >> mlBasicStringAuxFn startPos 0) c (s.next' c s.pos h)
|
||||
else if curr == '\\' then
|
||||
(escapeSeqFn true >> mlBasicStringAuxFn startPos 0) c (s.next' input s.pos h)
|
||||
(escapeSeqFn true >> mlBasicStringAuxFn startPos 0) c (s.next' c s.pos h)
|
||||
else if isControlChar curr then
|
||||
mkUnexpectedCharError s curr
|
||||
else
|
||||
mlBasicStringAuxFn startPos 0 c (s.next' input s.pos h)
|
||||
mlBasicStringAuxFn startPos 0 c (s.next' c s.pos h)
|
||||
|
||||
public def mlBasicStringFn : ParserFn := usePosFn fun startPos =>
|
||||
atomicFn (repeatFn 3 (chFn '\"' ["multi-line basic string"])) >>
|
||||
|
|
@ -244,19 +235,18 @@ def hourMinFn : ParserFn :=
|
|||
digitPairFn ["hour digit"] >> chFn ':' >> digitPairFn ["minute digit"]
|
||||
|
||||
def timeTailFn (allowOffset : Bool) : ParserFn := fun c s =>
|
||||
let input := c.input
|
||||
if h : input.atEnd s.pos then
|
||||
if h : c.atEnd s.pos then
|
||||
s
|
||||
else
|
||||
let curr := input.get' s.pos h
|
||||
let curr := c.get' s.pos h
|
||||
if curr = '.' then
|
||||
let s := s.next' input s.pos h
|
||||
let s := s.next' c s.pos h
|
||||
let s := takeWhile1Fn (·.isDigit) ["millisecond"] c s
|
||||
if s.hasError then s else
|
||||
if h : input.atEnd s.pos then s else
|
||||
timeOffsetFn (input.get' s.pos h) (input.next' s.pos h) c s
|
||||
if h : c.atEnd s.pos then s else
|
||||
timeOffsetFn (c.get' s.pos h) (c.next' s.pos h) c s
|
||||
else
|
||||
timeOffsetFn curr (input.next' s.pos h) c s
|
||||
timeOffsetFn curr (c.next' s.pos h) c s
|
||||
where
|
||||
@[inline] timeOffsetFn curr nextPos c s :=
|
||||
if curr == 'Z' || curr == 'z' then
|
||||
|
|
@ -277,15 +267,14 @@ public def timeFn (allowOffset := false) : ParserFn :=
|
|||
|
||||
def optTimeFn : ParserFn := fun c s =>
|
||||
let i := s.pos
|
||||
let input := c.input
|
||||
if h : input.atEnd i then
|
||||
if h : c.atEnd i then
|
||||
s
|
||||
else
|
||||
let curr := input.get' i h
|
||||
let curr := c.get' i h
|
||||
if curr = 'T' || curr = 't' then
|
||||
timeFn true c (s.next' input i h)
|
||||
timeFn true c (s.next' c i h)
|
||||
else if curr = ' ' then
|
||||
let tPos := input.next' i h
|
||||
let tPos := c.next' i h
|
||||
let s := timeFn true c (s.setPos tPos)
|
||||
if s.hasError && s.pos == tPos then s.restore (s.stackSize-1) i else s
|
||||
else
|
||||
|
|
@ -299,29 +288,27 @@ public def dateTimeFn : ParserFn :=
|
|||
repeatFn 4 (digitFn ["year digit"]) >> chFn '-' >> dateTimeAuxFn
|
||||
|
||||
def decExpFn : ParserFn := fun c s =>
|
||||
let input := c.input
|
||||
let expected := ["decimal exponent"]
|
||||
if h : input.atEnd s.pos then
|
||||
if h : c.atEnd s.pos then
|
||||
s.mkEOIError expected
|
||||
else
|
||||
let curr := input.get' s.pos h
|
||||
let curr := c.get' s.pos h
|
||||
if curr = '-' || curr == '+' then
|
||||
let s := s.next' input s.pos h
|
||||
let s := s.next' c s.pos h
|
||||
sepByChar1Fn (·.isDigit) '_' expected c s
|
||||
else if curr.isDigit then
|
||||
let s := s.next' input s.pos h
|
||||
let s := s.next' c s.pos h
|
||||
sepByChar1AuxFn (·.isDigit) '_' expected c s
|
||||
else
|
||||
mkUnexpectedCharError s curr expected
|
||||
|
||||
def optDecExpFn : ParserFn := fun c s =>
|
||||
let input := c.input
|
||||
if h : input.atEnd s.pos then
|
||||
if h : c.atEnd s.pos then
|
||||
s
|
||||
else
|
||||
let curr := input.get' s.pos h
|
||||
let curr := c.get' s.pos h
|
||||
if curr == 'e' || curr == 'E' then
|
||||
decExpFn c (s.next' input s.pos h)
|
||||
decExpFn c (s.next' c s.pos h)
|
||||
else
|
||||
s
|
||||
|
||||
|
|
@ -342,11 +329,10 @@ def decNumberTailAuxFn (startPos : String.Pos) (curr : Char) (nextPos : String.P
|
|||
pushLit `Lake.Toml.decInt startPos skipFn c s
|
||||
|
||||
def decNumberTailFn (startPos : String.Pos) : ParserFn := fun c s =>
|
||||
let input := c.input
|
||||
if h : input.atEnd s.pos then
|
||||
if h : c.atEnd s.pos then
|
||||
pushLit `Lake.Toml.decInt startPos skipFn c s
|
||||
else
|
||||
decNumberTailAuxFn startPos (input.get' s.pos h) (input.next' s.pos h) c s
|
||||
decNumberTailAuxFn startPos (c.get' s.pos h) (c.next' s.pos h) c s
|
||||
|
||||
mutual
|
||||
|
||||
|
|
@ -358,27 +344,25 @@ partial def decNumberSepFn (startPos : String.Pos) (curr : Char) (nextPos : Stri
|
|||
decNumberTailAuxFn startPos curr nextPos c s
|
||||
|
||||
partial def decNumberAuxFn (startPos : String.Pos) : ParserFn := fun c s =>
|
||||
let input := c.input
|
||||
if h : input.atEnd s.pos then
|
||||
if h : c.atEnd s.pos then
|
||||
pushLit `Lake.Toml.decInt startPos skipFn c s
|
||||
else
|
||||
let curr := input.get' s.pos h
|
||||
let curr := c.get' s.pos h
|
||||
if curr.isDigit then
|
||||
let s := s.next' input s.pos h
|
||||
let s := s.next' c s.pos h
|
||||
decNumberAuxFn startPos c s
|
||||
else
|
||||
decNumberSepFn startPos curr (input.next' s.pos h) c s
|
||||
decNumberSepFn startPos curr (c.next' s.pos h) c s
|
||||
|
||||
partial def decNumberFn (startPos : String.Pos) : ParserFn := fun c s =>
|
||||
let i := s.pos
|
||||
let input := c.input
|
||||
let expected := ["decimal integer", "float"]
|
||||
if h : input.atEnd i then
|
||||
if h : c.atEnd i then
|
||||
s.mkEOIError expected
|
||||
else
|
||||
let curr := input.get' i h
|
||||
let curr := c.get' i h
|
||||
if curr.isDigit then
|
||||
let s := s.next' input i h
|
||||
let s := s.next' c i h
|
||||
decNumberAuxFn startPos c s
|
||||
else
|
||||
mkUnexpectedCharError s curr expected
|
||||
|
|
@ -392,37 +376,35 @@ def nanAuxFn (startPos : String.Pos) : ParserFn :=
|
|||
strFn "an" ["'nan'"] >> pushLit `Lake.Toml.float startPos
|
||||
|
||||
def decimalFn (startPos : String.Pos) : ParserFn := fun c s =>
|
||||
let input := c.input
|
||||
let expected := ["decimal integer", "float"]
|
||||
if h : input.atEnd s.pos then
|
||||
if h : c.atEnd s.pos then
|
||||
s.mkEOIError expected
|
||||
else
|
||||
let curr := input.get' s.pos h
|
||||
let curr := c.get' s.pos h
|
||||
if curr == '0' then
|
||||
decNumberTailFn startPos c (s.next' input s.pos h)
|
||||
decNumberTailFn startPos c (s.next' c s.pos h)
|
||||
else if curr.isDigit then
|
||||
decNumberAuxFn startPos c (s.next' input s.pos h)
|
||||
decNumberAuxFn startPos c (s.next' c s.pos h)
|
||||
else if curr == 'i' then
|
||||
infAuxFn startPos c (s.next' input s.pos h)
|
||||
infAuxFn startPos c (s.next' c s.pos h)
|
||||
else if curr == 'n' then
|
||||
nanAuxFn startPos c (s.next' input s.pos h)
|
||||
nanAuxFn startPos c (s.next' c s.pos h)
|
||||
else
|
||||
mkUnexpectedCharError s curr expected
|
||||
|
||||
def decNumeralAuxFn (startPos : String.Pos) : ParserFn := fun c s =>
|
||||
let input := c.input
|
||||
if h : input.atEnd s.pos then
|
||||
if h : c.atEnd s.pos then
|
||||
s.mkEOIError ["decimal integer", "float", "date-time"]
|
||||
else -- `NN`
|
||||
let curr := input.get' s.pos h
|
||||
let nextPos := input.next' s.pos h
|
||||
let curr := c.get' s.pos h
|
||||
let nextPos := c.next' s.pos h
|
||||
if curr.isDigit then
|
||||
let s := s.setPos nextPos
|
||||
if h : input.atEnd s.pos then
|
||||
if h : c.atEnd s.pos then
|
||||
pushLit `Lake.Toml.decInt startPos skipFn c s
|
||||
else
|
||||
let curr := input.get' s.pos h
|
||||
let nextPos := input.next' s.pos h
|
||||
let curr := c.get' s.pos h
|
||||
let nextPos := c.next' s.pos h
|
||||
if curr == ':' then -- `HH:`
|
||||
let s := s.setPos nextPos
|
||||
let s := timeAuxFn false c s
|
||||
|
|
@ -430,18 +412,18 @@ def decNumeralAuxFn (startPos : String.Pos) : ParserFn := fun c s =>
|
|||
pushLit `Lake.Toml.dateTime startPos skipFn c s
|
||||
else if curr.isDigit then -- `NNN`
|
||||
let s := s.setPos nextPos
|
||||
if h : input.atEnd s.pos then
|
||||
if h : c.atEnd s.pos then
|
||||
pushLit `Lake.Toml.decInt startPos skipFn c s
|
||||
else
|
||||
let curr := input.get' s.pos h
|
||||
let nextPos := input.next' s.pos h
|
||||
let curr := c.get' s.pos h
|
||||
let nextPos := c.next' s.pos h
|
||||
if curr.isDigit then -- `NNNN`
|
||||
let s := s.setPos nextPos
|
||||
if h : input.atEnd nextPos then
|
||||
if h : c.atEnd nextPos then
|
||||
pushLit `Lake.Toml.decInt startPos skipFn c s
|
||||
else
|
||||
let curr := input.get' s.pos h
|
||||
let nextPos := input.next' s.pos h
|
||||
let curr := c.get' s.pos h
|
||||
let nextPos := c.next' s.pos h
|
||||
if curr == '-' then -- `YYYY-`
|
||||
let s := s.setPos nextPos
|
||||
let s := dateTimeAuxFn c s
|
||||
|
|
@ -460,49 +442,48 @@ def decNumeralAuxFn (startPos : String.Pos) : ParserFn := fun c s =>
|
|||
decNumberSepFn startPos curr nextPos c s
|
||||
|
||||
public def numeralFn : ParserFn := atomicFn fun c s =>
|
||||
let input := c.input
|
||||
let startPos := s.pos
|
||||
let expected := ["integer", "float", "date-time"]
|
||||
if h : input.atEnd s.pos then
|
||||
if h : c.atEnd s.pos then
|
||||
s.mkEOIError expected
|
||||
else
|
||||
let curr := input.get' s.pos h
|
||||
let curr := c.get' s.pos h
|
||||
if curr == '0' then
|
||||
let s := s.next' input startPos h
|
||||
if h : input.atEnd s.pos then
|
||||
let s := s.next' c startPos h
|
||||
if h : c.atEnd s.pos then
|
||||
pushLit `Lake.Toml.decInt startPos skipFn c s
|
||||
else
|
||||
let curr := input.get' s.pos h
|
||||
let curr := c.get' s.pos h
|
||||
if curr == 'b' then
|
||||
let s := s.next' input s.pos h
|
||||
let s := s.next' c s.pos h
|
||||
let s := sepByChar1Fn isBinDigit '_' ["binary integer"] c s
|
||||
if s.hasError then s else
|
||||
pushLit `Lake.Toml.binNum startPos skipFn c s
|
||||
else if curr == 'o' then
|
||||
let s := s.next' input s.pos h
|
||||
let s := s.next' c s.pos h
|
||||
let s := sepByChar1Fn isOctDigit '_' ["octal integer"] c s
|
||||
if s.hasError then s else
|
||||
pushLit `Lake.Toml.octNum startPos skipFn c s
|
||||
else if curr == 'x' then
|
||||
let s := s.next' input s.pos h
|
||||
let s := s.next' c s.pos h
|
||||
let s := sepByChar1Fn isHexDigit '_' ["hexadecimal integer"] c s
|
||||
if s.hasError then s else
|
||||
pushLit `Lake.Toml.hexNum startPos skipFn c s
|
||||
else if curr.isDigit then
|
||||
let s := s.next' input s.pos h
|
||||
let s := s.next' c s.pos h
|
||||
let s := (chFn ':' >> timeAuxFn false) c s
|
||||
if s.hasError then s else
|
||||
pushLit `Lake.Toml.dateTime startPos skipFn c s
|
||||
else
|
||||
decNumberTailAuxFn startPos curr (input.next' s.pos h) c s
|
||||
decNumberTailAuxFn startPos curr (c.next' s.pos h) c s
|
||||
else if curr.isDigit then
|
||||
decNumeralAuxFn startPos c (s.next' input s.pos h)
|
||||
decNumeralAuxFn startPos c (s.next' c s.pos h)
|
||||
else if curr == '+' || curr == '-' then
|
||||
decimalFn startPos c (s.next' input s.pos h)
|
||||
decimalFn startPos c (s.next' c s.pos h)
|
||||
else if curr == 'i' then
|
||||
infAuxFn startPos c (s.next' input s.pos h)
|
||||
infAuxFn startPos c (s.next' c s.pos h)
|
||||
else if curr == 'n' then
|
||||
nanAuxFn startPos c (s.next' input s.pos h)
|
||||
nanAuxFn startPos c (s.next' c s.pos h)
|
||||
else
|
||||
s.mkUnexpectedError s!"unexpected '{curr}'" expected
|
||||
|
||||
|
|
|
|||
|
|
@ -24,10 +24,10 @@ public def loadToml (ictx : InputContext) : EIO MessageLog Table := do
|
|||
| .ok env => pure env
|
||||
| .error e => throw <| MessageLog.empty.add <| mkMessageNoPos ictx <|
|
||||
m!"failed to initialize TOML environment: {e}"
|
||||
let s := toml.fn.run ictx { env, options := {} } {} (mkParserState ictx.input)
|
||||
let s := toml.fn.run ictx { env, options := {} } {} (mkParserState ictx.inputString)
|
||||
if let some errorMsg := s.errorMsg then
|
||||
throw <| MessageLog.empty.add <| mkParserErrorMessage ictx s errorMsg
|
||||
else if ictx.input.atEnd s.pos then
|
||||
else if ictx.atEnd s.pos then
|
||||
let act := elabToml ⟨s.stxStack.back⟩
|
||||
match (← act.run {fileName := ictx.fileName, fileMap := ictx.fileMap} {env} |>.toBaseIO) with
|
||||
| .ok (t, s) =>
|
||||
|
|
|
|||
|
|
@ -59,12 +59,12 @@ public def mkUnexpectedCharError (s : ParserState) (c : Char) (expected : List
|
|||
|
||||
@[inline] public def satisfyFn (p : Char → Bool) (expected : List String := []) : ParserFn := fun c s =>
|
||||
let i := s.pos
|
||||
if h : c.input.atEnd i then
|
||||
if h : c.atEnd i then
|
||||
s.mkEOIError expected
|
||||
else
|
||||
let curr := c.input.get' i h
|
||||
let curr := c.get' i h
|
||||
if p curr then
|
||||
s.next' c.input i h
|
||||
s.next' c i h
|
||||
else
|
||||
mkUnexpectedCharError s curr expected
|
||||
|
||||
|
|
@ -98,26 +98,24 @@ mutual
|
|||
|
||||
public partial def sepByChar1AuxFn (p : Char → Bool) (sep : Char) (expected : List String := []) : ParserFn := fun c s =>
|
||||
let i := s.pos
|
||||
let input := c.input
|
||||
if h : input.atEnd i then
|
||||
if h : c.atEnd i then
|
||||
s
|
||||
else
|
||||
let curr := input.get' i h
|
||||
let curr := c.get' i h
|
||||
if p curr then
|
||||
sepByChar1AuxFn p sep expected c (s.next' input i h)
|
||||
sepByChar1AuxFn p sep expected c (s.next' c i h)
|
||||
else if curr == sep then
|
||||
sepByChar1Fn p sep expected c (s.next' input i h)
|
||||
sepByChar1Fn p sep expected c (s.next' c i h)
|
||||
else
|
||||
s
|
||||
|
||||
public partial def sepByChar1Fn (p : Char → Bool) (sep : Char) (expected : List String := []) : ParserFn := fun c s =>
|
||||
let i := s.pos
|
||||
let input := c.input
|
||||
if h : input.atEnd i then
|
||||
if h : c.atEnd i then
|
||||
s
|
||||
else
|
||||
let curr := input.get' i h
|
||||
let s := s.next' input i h
|
||||
let curr := c.get' i h
|
||||
let s := s.next' c i h
|
||||
if p curr then
|
||||
sepByChar1AuxFn p sep expected c s
|
||||
else if curr == sep then
|
||||
|
|
@ -129,13 +127,12 @@ end
|
|||
|
||||
/-- Push a new atom onto the syntax stack. -/
|
||||
public def pushAtom (startPos : String.Pos) (trailingFn := skipFn) : ParserFn := fun c s =>
|
||||
let input := c.input
|
||||
let stopPos := s.pos
|
||||
let leading := mkEmptySubstringAt input startPos
|
||||
let val := input.extract startPos stopPos
|
||||
let leading := c.mkEmptySubstringAt startPos
|
||||
let val := c.extract startPos stopPos
|
||||
let s := trailingFn c s
|
||||
let stopPos' := s.pos
|
||||
let trailing := { str := input, startPos := stopPos, stopPos := stopPos' : Substring }
|
||||
let trailing := c.substring (startPos := stopPos) (stopPos := stopPos')
|
||||
let atom := mkAtom (SourceInfo.original leading startPos trailing (startPos + val)) val
|
||||
s.pushSyntax atom
|
||||
|
||||
|
|
@ -188,13 +185,12 @@ public def strAtom.parenthesizer (_ : String) (_ : List String) (_ : ParserFn)
|
|||
|
||||
/-- Push `(Syntax.node kind <new-atom>)` onto the syntax stack. -/
|
||||
public def pushLit (kind : SyntaxNodeKind) (startPos : String.Pos) (trailingFn := skipFn) : ParserFn := fun c s =>
|
||||
let input := c.input
|
||||
let stopPos := s.pos
|
||||
let leading := mkEmptySubstringAt input startPos
|
||||
let val := input.extract startPos stopPos
|
||||
let leading := c.mkEmptySubstringAt startPos
|
||||
let val := c.extract startPos stopPos
|
||||
let s := trailingFn c s
|
||||
let wsStopPos := s.pos
|
||||
let trailing := { str := input, startPos := stopPos, stopPos := wsStopPos : Substring }
|
||||
let trailing := c.substring (startPos := stopPos) (stopPos := wsStopPos)
|
||||
let info := SourceInfo.original leading startPos trailing stopPos
|
||||
s.pushSyntax (Syntax.mkLit kind val info)
|
||||
|
||||
|
|
|
|||
59
tests/lean/run/parseEnd.lean
Normal file
59
tests/lean/run/parseEnd.lean
Normal file
|
|
@ -0,0 +1,59 @@
|
|||
import Lean.Elab.Command
|
||||
|
||||
open Lean Elab Command Parser
|
||||
|
||||
declare_syntax_cat balanced
|
||||
syntax "!" : balanced
|
||||
syntax "(" balanced* ")" : balanced
|
||||
|
||||
|
||||
elab d:docComment "test" : command => do
|
||||
let some ⟨pos, endPos⟩ := d.raw[1].getRange? (canonicalOnly := true)
|
||||
| throwErrorAt d "Docstring doesn't have a canonical position"
|
||||
let p := Parser.categoryParser `balanced 0
|
||||
let p := andthenFn whitespace p.fn
|
||||
let text ← getFileMap
|
||||
let input := text.source
|
||||
|
||||
let endPos := input.prev <| input.prev endPos
|
||||
|
||||
if h : endPos ≤ input.endPos then
|
||||
let ictx := mkInputContext input (← getFileName) (endPos := endPos) (endPos_valid := h)
|
||||
let env ← getEnv
|
||||
let s := { mkParserState input with pos }
|
||||
let s := p.run ictx { env, options := {} } (getTokenTable env) s
|
||||
if !s.allErrors.isEmpty then
|
||||
for (pos, _, err) in s.allErrors do
|
||||
logMessage {
|
||||
fileName := (← getFileName )
|
||||
pos := text.toPosition pos
|
||||
endPos := some (text.toPosition endPos)
|
||||
data := s!"{err.toString}"
|
||||
}
|
||||
else if ictx.atEnd s.pos then
|
||||
logInfo s.stxStack.back
|
||||
else
|
||||
let pos := s.pos
|
||||
logMessage {
|
||||
fileName := (← getFileName )
|
||||
pos := text.toPosition pos
|
||||
endPos := some (text.toPosition endPos)
|
||||
data := s!"expected end of input"
|
||||
}
|
||||
else
|
||||
throwError "Out of bounds"
|
||||
|
||||
|
||||
/-- info: (!!(!)) -/
|
||||
#guard_msgs in
|
||||
/--
|
||||
( ! ! (!) )
|
||||
-/
|
||||
test
|
||||
|
||||
/-- error: unexpected end of input; expected ')' -/
|
||||
#guard_msgs in
|
||||
/--
|
||||
(
|
||||
-/
|
||||
test
|
||||
Loading…
Add table
Reference in a new issue