refactor: do not use String.Iterator (#11127)

This PR removes all uses of `String.Iterator` from core, preferring
`String.ValidPos` instead.

In an upcoming PR, `String.Iterator` will be renamed to
`String.Legacy.Iterator`.
This commit is contained in:
Markus Himmel 2025-11-11 12:46:58 +01:00 committed by GitHub
parent d1e19f2aa0
commit 2c2fcff4f8
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
20 changed files with 448 additions and 235 deletions

View file

@ -11,6 +11,7 @@ public import Init.Data.String.Bootstrap
public import Init.Data.String.Decode
public import Init.Data.String.Defs
public import Init.Data.String.Extra
public import Init.Data.String.Iterator
public import Init.Data.String.Lemmas
public import Init.Data.String.Repr
public import Init.Data.String.Bootstrap

View file

@ -1756,7 +1756,11 @@ theorem Slice.Pos.prev_lt {s : Slice} {p : s.Pos} {h} : p.prev h < p :=
theorem ValidPos.prev_lt {s : String} {p : s.ValidPos} {h} : p.prev h < p := by
simp [← toSlice_lt, toSlice_prev]
/-- Advances the position `p` `n` times, saturating at `s.endPos` if necessary. -/
/--
Advances the position `p` `n` times.
If this would move `p` past the end of `s`, the result is `s.endPos`.
-/
def Slice.Pos.nextn {s : Slice} (p : s.Pos) (n : Nat) : s.Pos :=
match n with
| 0 => p
@ -1766,7 +1770,11 @@ def Slice.Pos.nextn {s : Slice} (p : s.Pos) (n : Nat) : s.Pos :=
else
p
/-- Iterates `p.prev` `n` times, saturating at `s.startPos` if necessary. -/
/--
Iterates `p.prev` `n` times.
If this would move `p` past the start of `s`, the result is `s.endPos`.
-/
def Slice.Pos.prevn {s : Slice} (p : s.Pos) (n : Nat) : s.Pos :=
match n with
| 0 => p
@ -1776,6 +1784,24 @@ def Slice.Pos.prevn {s : Slice} (p : s.Pos) (n : Nat) : s.Pos :=
else
p
/--
Advances the position `p` `n` times.
If this would move `p` past the end of `s`, the result is `s.endValidPos`.
-/
@[inline]
def ValidPos.nextn {s : String} (p : s.ValidPos) (n : Nat) : s.ValidPos :=
(p.toSlice.nextn n).ofSlice
/--
Iterates `p.prev` `n` times.
If this would move `p` past the start of `s`, the result is `s.startValidPos`.
-/
@[inline]
def ValidPos.prevn {s : String} (p : s.ValidPos) (n : Nat) : s.ValidPos :=
(p.toSlice.prevn n).ofSlice
@[expose]
def Pos.Raw.utf8GetAux : List Char → Pos.Raw → Pos.Raw → Char
| [], _, _ => default
@ -1905,6 +1931,16 @@ theorem ValidPos.byteIdx_lt_utf8ByteSize {s : String} (p : s.ValidPos) (h : p
theorem ValidPos.lt_next {s : String} (p : s.ValidPos) {h} : p < p.next h := by
simp [← ValidPos.toSlice_lt, toSlice_next]
theorem ValidPos.ne_startPos_of_lt {s : String} {p q : s.ValidPos} :
p < q → q ≠ s.startValidPos := by
simp only [lt_iff, Pos.Raw.lt_iff, ne_eq, ValidPos.ext_iff, offset_startValidPos, Pos.Raw.ext_iff,
Pos.Raw.byteIdx_zero]
omega
theorem ValidPos.next_ne_startValidPos {s : String} {p : s.ValidPos} {h} :
p.next h ≠ s.startValidPos :=
ne_startPos_of_lt p.lt_next
@[simp]
theorem ValidPos.str_toSlice {s : String} {p : s.ValidPos} : p.toSlice.str = p := by
ext
@ -1955,11 +1991,32 @@ theorem endExclusive_replaceStart {s : String} {p : s.ValidPos} :
(s.replaceStart p).endExclusive = s.endValidPos := by
simp [replaceStart]
@[simp]
theorem utf8ByteSize_toSlice {s : String} : s.toSlice.utf8ByteSize = s.utf8ByteSize := by
simp [Slice.utf8ByteSize_eq]
@[simp]
theorem utf8ByteSize_replaceStart {s : String} {p : s.ValidPos} :
(s.replaceStart p).utf8ByteSize = s.utf8ByteSize - p.offset.byteIdx := by
simp [replaceStart]
@[simp]
theorem utf8ByteSize_replaceEnd {s : String} {p : s.ValidPos} :
(s.replaceEnd p).utf8ByteSize = p.offset.byteIdx := by
simp [replaceEnd]
theorem Pos.Raw.isValidForSlice_stringReplaceStart {s : String} {p : s.ValidPos} {q : Pos.Raw} :
q.IsValidForSlice (s.replaceStart p) ↔ (q.offsetBy p.offset).IsValid s := by
rw [replaceStart, isValidForSlice_replaceStart, isValidForSlice_toSlice_iff,
ValidPos.offset_toSlice]
/-
Given a string and two valid positions within the string, obtain a slice on the string formed by
the new bounds, or panic if the given end is strictly less than the given start.
-/
def replaceStartEnd! (s : String) (p₁ p₂ : s.ValidPos) : Slice :=
s.toSlice.replaceStartEnd! p₁.toSlice p₂.toSlice
theorem ValidPos.utf8Encode_get_eq_extract {s : String} (pos : s.ValidPos) (h : pos ≠ s.endValidPos) :
List.utf8Encode [pos.get h] = s.bytes.extract pos.offset.byteIdx (pos.offset.byteIdx + (pos.get h).utf8Size) := by
rw [get_eq_get_toSlice, Slice.Pos.utf8Encode_get_eq_extract]
@ -1969,6 +2026,96 @@ theorem ValidPos.eq_copy_replaceEnd_append_get {s : String} {pos : s.ValidPos} (
s = (s.replaceEnd pos).copy ++ singleton (pos.get h) ++ (s.replaceStart (pos.next h)).copy := by
simp [← bytes_inj, utf8Encode_get_eq_extract pos h, Slice.bytes_copy, ← size_bytes]
/-- Given a position in `s.replaceStart p₀`, obtain the corresponding position in `s`. -/
@[inline]
def ValidPos.ofReplaceStart {s : String} {p₀ : s.ValidPos} (pos : (s.replaceStart p₀).Pos) :
s.ValidPos where
offset := pos.offset.offsetBy p₀.offset
isValid := Pos.Raw.isValidForSlice_stringReplaceStart.1 pos.isValidForSlice
@[simp]
theorem ValidPos.offset_ofReplaceStart {s : String} {p₀ : s.ValidPos}
{pos : (s.replaceStart p₀).Pos} : (ofReplaceStart pos).offset = pos.offset.offsetBy p₀.offset :=
(rfl)
/-- Given a position in `s` that is at least `p₀`, obtain the corresponding position in
`s.replaceStart p₀`. -/
@[inline]
def ValidPos.toReplaceStart {s : String} (p₀ : s.ValidPos) (pos : s.ValidPos) (h : p₀ ≤ pos) :
(s.replaceStart p₀).Pos where
offset := pos.offset.unoffsetBy p₀.offset
isValidForSlice := Pos.Raw.isValidForSlice_stringReplaceStart.2 (by
simpa [Pos.Raw.offsetBy_unoffsetBy_of_le (Pos.Raw.le_iff.1 h)] using pos.isValid)
@[simp]
theorem ValidPos.offset_toReplaceStart {s : String} {p₀ : s.ValidPos} {pos : s.ValidPos} {h} :
(toReplaceStart p₀ pos h).offset = pos.offset.unoffsetBy p₀.offset := (rfl)
@[simp]
theorem ValidPos.ofReplaceStart_startPos {s : String} {pos : s.ValidPos} :
ofReplaceStart (s.replaceStart pos).startPos = pos :=
ValidPos.ext (by simp)
@[simp]
theorem ValidPos.ofReplaceStart_endPos {s : String} {pos : s.ValidPos} :
ofReplaceStart (s.replaceStart pos).endPos = s.endValidPos := by
have := pos.isValid.le_rawEndPos
simp_all [ValidPos.ext_iff, String.Pos.Raw.ext_iff, Pos.Raw.le_iff]
theorem ValidPos.ofReplaceStart_inj {s : String} {p₀ : s.ValidPos}
{pos pos' : (s.replaceStart p₀).Pos} :
ofReplaceStart pos = ofReplaceStart pos' ↔ pos = pos' := by
simp [ValidPos.ext_iff, String.Pos.Raw.ext_iff, Slice.Pos.ext_iff]
theorem ValidPos.get_eq_get_ofReplaceStart {s : String} {p₀ : s.ValidPos}
{pos : (s.replaceStart p₀).Pos} {h} :
pos.get h = (ofReplaceStart pos).get (by rwa [← ofReplaceStart_endPos, ne_eq, ofReplaceStart_inj]) := by
simp [ValidPos.get, Slice.Pos.get]
/-- Given a position in `s.replaceEnd p₀`, obtain the corresponding position in `s`. -/
@[inline]
def ValidPos.ofReplaceEnd {s : String} {p₀ : s.ValidPos} (pos : (s.replaceEnd p₀).Pos) : s.ValidPos where
offset := pos.offset
isValid := (Pos.Raw.isValidForSlice_stringReplaceEnd.1 pos.isValidForSlice).2
@[simp]
theorem ValidPos.offset_ofReplaceEnd {s : String} {p₀ : s.ValidPos} {pos : (s.replaceEnd p₀).Pos} :
(ofReplaceEnd pos).offset = pos.offset := (rfl)
/-- Given a position in `s` that is at most `p₀`, obtain the corresponding position in `s.replaceEnd p₀`. -/
@[inline]
def ValidPos.toReplaceEnd {s : String} (p₀ : s.ValidPos) (pos : s.ValidPos) (h : pos ≤ p₀) :
(s.replaceEnd p₀).Pos where
offset := pos.offset
isValidForSlice := Pos.Raw.isValidForSlice_stringReplaceEnd.2 ⟨h, pos.isValid⟩
@[simp]
theorem ValidPos.offset_toReplaceEnd {s : String} {p₀ : s.ValidPos} {pos : s.ValidPos} {h : pos ≤ p₀} :
(toReplaceEnd p₀ pos h).offset = pos.offset := (rfl)
theorem Slice.Pos.le_nextn {s : Slice} {p : s.Pos} {n : Nat} : p ≤ p.nextn n := by
fun_induction nextn with
| case1 => simp [Slice.Pos.le_iff]
| case2 p n h ih =>
simp only [Pos.le_iff] at *
exact Pos.Raw.le_of_lt (Pos.Raw.lt_of_lt_of_le lt_next ih)
| case3 => simp [Slice.Pos.le_iff]
theorem ValidPos.le_nextn {s : String} {p : s.ValidPos} {n : Nat} :
p ≤ p.nextn n := by
simpa [nextn, ValidPos.le_iff, ← offset_toSlice] using Slice.Pos.le_nextn
theorem Slice.Pos.prevn_le {s : Slice} {p : s.Pos} {n : Nat} : p.prevn n ≤ p := by
fun_induction prevn with
| case1 => simp [le_iff]
| case2 p n h ih =>
simp only [Pos.le_iff] at *
exact Pos.Raw.le_of_lt (Pos.Raw.lt_of_le_of_lt ih prev_lt)
| case3 => simp [le_iff]
theorem ValidPos.prevn_le {s : String} {p : s.ValidPos} {n : Nat} :
p.prevn n ≤ p := by
simpa [nextn, ValidPos.le_iff, ← offset_toSlice] using Slice.Pos.prevn_le
/--
Returns the next position in a string after position `p`. If `p` is not a valid position or
`p = s.endPos`, returns the position one byte after `p`.

View file

@ -589,6 +589,32 @@ instance {s : Slice} (l r : s.Pos) : Decidable (l ≤ r) :=
instance {s : Slice} (l r : s.Pos) : Decidable (l < r) :=
decidable_of_iff' _ Slice.Pos.lt_iff
/--
`pos.IsAtEnd` is just shorthand for `pos = s.endValidPos` that is easier to write if `s` is long.
-/
abbrev ValidPos.IsAtEnd {s : String} (pos : s.ValidPos) : Prop :=
pos = s.endValidPos
@[simp]
theorem ValidPos.isAtEnd_iff {s : String} {pos : s.ValidPos} :
pos.IsAtEnd ↔ pos = s.endValidPos := Iff.rfl
instance {s : String} {pos : s.ValidPos} : Decidable pos.IsAtEnd :=
decidable_of_iff _ ValidPos.isAtEnd_iff
/--
`pos.IsAtEnd` is just shorthand for `pos = s.endPos` that is easier to write if `s` is long.
-/
abbrev Slice.Pos.IsAtEnd {s : Slice} (pos : s.Pos) : Prop :=
pos = s.endPos
@[simp]
theorem Slice.Pos.isAtEnd_iff {s : Slice} {pos : s.Pos} :
pos.IsAtEnd ↔ pos = s.endPos := Iff.rfl
instance {s : Slice} {pos : s.Pos} : Decidable pos.IsAtEnd :=
decidable_of_iff _ Slice.Pos.isAtEnd_iff
/-- Returns the byte at a position in a slice that is not the end position. -/
@[inline, expose]
def Slice.Pos.byte {s : Slice} (pos : s.Pos) (h : pos ≠ s.endPos) : UInt8 :=

View file

@ -9,25 +9,12 @@ prelude
import all Init.Data.ByteArray.Basic
public import Init.Data.String.Basic
import all Init.Data.String.Basic
public import Init.Data.String.Iterator
import all Init.Data.String.Iterator
public import Init.Data.String.Substring
public import Init.Data.String.Modify
import Init.Data.String.Search
public section
namespace Substring
/--
Returns an iterator into the underlying string, at the substring's starting position. The ending
position is discarded, so the iterator alone cannot be used to determine whether its current
position is within the original substring.
-/
@[inline] def toIterator : Substring → String.Iterator
| ⟨s, b, _⟩ => ⟨s, b⟩
end Substring
namespace String
/--
@ -62,89 +49,40 @@ Checks whether an array of bytes is a valid UTF-8 encoding of a string.
abbrev validateUTF8 (a : ByteArray) : Bool :=
a.validateUTF8
theorem Iterator.sizeOf_next_lt_of_hasNext (i : String.Iterator) (h : i.hasNext) : sizeOf i.next < sizeOf i := by
cases i; rename_i s pos; simp [Iterator.next, Iterator.sizeOf_eq]; simp [Iterator.hasNext] at h
exact Nat.sub_lt_sub_left h (String.Pos.Raw.lt_next s pos)
macro_rules
| `(tactic| decreasing_trivial) =>
`(tactic| with_reducible apply String.Iterator.sizeOf_next_lt_of_hasNext; assumption)
theorem Iterator.sizeOf_next_lt_of_atEnd (i : String.Iterator) (h : ¬ i.atEnd = true) : sizeOf i.next < sizeOf i :=
have h : i.hasNext := decide_eq_true <| Nat.gt_of_not_le <| mt decide_eq_true h
sizeOf_next_lt_of_hasNext i h
macro_rules
| `(tactic| decreasing_trivial) =>
`(tactic| with_reducible apply String.Iterator.sizeOf_next_lt_of_atEnd; assumption)
namespace Iterator
/--
Replaces the current character in the string.
Does nothing if the iterator is at the end of the string. If both the replacement character and the
replaced character are 7-bit ASCII characters and the string is not shared, then it is updated
in-place and not copied.
-/
@[inline] def setCurr : Iterator → Char → Iterator
| ⟨s, i⟩, c => ⟨i.set s c, i⟩
/--
Moves the iterator forward until the Boolean predicate `p` returns `true` for the iterator's current
character or until the end of the string is reached. Does nothing if the current character already
satisfies `p`.
-/
@[specialize] def find (it : Iterator) (p : Char → Bool) : Iterator :=
if it.atEnd then it
else if p it.curr then it
else find it.next p
/--
Iterates over a string, updating a state at each character using the provided function `f`, until
`f` returns `none`. Begins with the state `init`. Returns the state and character for which `f`
returns `none`.
-/
@[specialize] def foldUntil (it : Iterator) (init : α) (f : α → Char → Option α) : α × Iterator :=
if it.atEnd then
(init, it)
else if let some a := f init it.curr then
foldUntil it.next a f
else
(init, it)
end Iterator
private def findLeadingSpacesSize (s : String) : Nat :=
let it := s.iter
let it := it.find (· == '\n') |>.next
consumeSpaces it 0 s.length
let it := s.startValidPos
let it := it.find? (· == '\n') |>.bind String.ValidPos.next?
match it with
| some it => consumeSpaces it 0 s.length
| none => 0
where
consumeSpaces (it : String.Iterator) (curr min : Nat) : Nat :=
if it.atEnd then min
else if it.curr == ' ' || it.curr == '\t' then consumeSpaces it.next (curr + 1) min
else if it.curr == '\n' then findNextLine it.next min
else findNextLine it.next (Nat.min curr min)
findNextLine (it : String.Iterator) (min : Nat) : Nat :=
if it.atEnd then min
else if it.curr == '\n' then consumeSpaces it.next 0 min
else findNextLine it.next min
consumeSpaces {s : String} (it : s.ValidPos) (curr min : Nat) : Nat :=
if h : it.IsAtEnd then min
else if it.get h == ' ' || it.get h == '\t' then consumeSpaces (it.next h) (curr + 1) min
else if it.get h == '\n' then findNextLine (it.next h) min
else findNextLine (it.next h) (Nat.min curr min)
termination_by it
findNextLine {s : String} (it : s.ValidPos) (min : Nat) : Nat :=
if h : it.IsAtEnd then min
else if it.get h == '\n' then consumeSpaces (it.next h) 0 min
else findNextLine (it.next h) min
termination_by it
private def removeNumLeadingSpaces (n : Nat) (s : String) : String :=
consumeSpaces n s.iter ""
consumeSpaces n s.startValidPos ""
where
consumeSpaces (n : Nat) (it : String.Iterator) (r : String) : String :=
consumeSpaces (n : Nat) {s : String} (it : s.ValidPos) (r : String) : String :=
match n with
| 0 => saveLine it r
| n+1 =>
if it.atEnd then r
else if it.curr == ' ' || it.curr == '\t' then consumeSpaces n it.next r
if h : it.IsAtEnd then r
else if it.get h == ' ' || it.get h == '\t' then consumeSpaces n (it.next h) r
else saveLine it r
termination_by (it, 1)
saveLine (it : String.Iterator) (r : String) : String :=
if it.atEnd then r
else if it.curr == '\n' then consumeSpaces n it.next (r.push '\n')
else saveLine it.next (r.push it.curr)
saveLine {s : String} (it : s.ValidPos) (r : String) : String :=
if h : it.IsAtEnd then r
else if it.get h == '\n' then consumeSpaces n (it.next h) (r.push '\n')
else saveLine (it.next h) (r.push (it.get h))
termination_by (it, 0)
/--

View file

@ -7,6 +7,7 @@ module
prelude
public import Init.Data.String.Basic
public import Init.Data.String.Modify
/-!
# `String.Iterator`
@ -191,6 +192,73 @@ def prevn : Iterator → Nat → Iterator
| it, 0 => it
| it, i+1 => prevn it.prev i
theorem sizeOf_next_lt_of_hasNext (i : String.Iterator) (h : i.hasNext) : sizeOf i.next < sizeOf i := by
cases i; rename_i s pos; simp [Iterator.next, Iterator.sizeOf_eq]; simp [Iterator.hasNext] at h
exact Nat.sub_lt_sub_left h (String.Pos.Raw.lt_next s pos)
macro_rules
| `(tactic| decreasing_trivial) =>
`(tactic| with_reducible apply String.Iterator.sizeOf_next_lt_of_hasNext; assumption)
theorem sizeOf_next_lt_of_atEnd (i : String.Iterator) (h : ¬ i.atEnd = true) : sizeOf i.next < sizeOf i :=
have h : i.hasNext := decide_eq_true <| Nat.gt_of_not_le <| mt decide_eq_true h
sizeOf_next_lt_of_hasNext i h
macro_rules
| `(tactic| decreasing_trivial) =>
`(tactic| with_reducible apply String.Iterator.sizeOf_next_lt_of_atEnd; assumption)
/--
Replaces the current character in the string.
Does nothing if the iterator is at the end of the string. If both the replacement character and the
replaced character are 7-bit ASCII characters and the string is not shared, then it is updated
in-place and not copied.
-/
@[inline] def setCurr : Iterator → Char → Iterator
| ⟨s, i⟩, c => ⟨i.set s c, i⟩
/--
Moves the iterator forward until the Boolean predicate `p` returns `true` for the iterator's current
character or until the end of the string is reached. Does nothing if the current character already
satisfies `p`.
-/
@[specialize] def find (it : Iterator) (p : Char → Bool) : Iterator :=
if it.atEnd then it
else if p it.curr then it
else find it.next p
/--
Iterates over a string, updating a state at each character using the provided function `f`, until
`f` returns `none`. Begins with the state `init`. Returns the state and character for which `f`
returns `none`.
-/
@[specialize] def foldUntil (it : Iterator) (init : α) (f : α → Char → Option α) : α × Iterator :=
if it.atEnd then
(init, it)
else if let some a := f init it.curr then
foldUntil it.next a f
else
(init, it)
end Iterator
end String
namespace Substring
/--
Returns an iterator into the underlying string, at the substring's starting position. The ending
position is discarded, so the iterator alone cannot be used to determine whether its current
position is within the original substring.
-/
@[inline] def toIterator : Substring → String.Iterator
| ⟨s, b, _⟩ => ⟨s, b⟩
end Substring
instance : Repr String.Iterator where
reprPrec | ⟨s, pos⟩, prec => Repr.addAppParen ("String.Iterator.mk " ++ reprArg s ++ " " ++ reprArg pos) prec
instance : ToString String.Iterator :=
⟨fun it => it.remainingToString⟩

View file

@ -7,16 +7,9 @@ module
prelude
public import Init.Data.String.Substring
public import Init.Data.String.Iterator
public section
instance : Repr String.Iterator where
reprPrec | ⟨s, pos⟩, prec => Repr.addAppParen ("String.Iterator.mk " ++ reprArg s ++ " " ++ reprArg pos) prec
instance : ToString String.Iterator :=
⟨fun it => it.remainingToString⟩
/--
Interprets a string as the decimal representation of an integer, returning it. Returns `none` if
the string does not contain a decimal integer.

View file

@ -51,6 +51,51 @@ def replace [ToForwardSearcher ρ σ] [ToSlice α] (s : String) (pattern : ρ)
(replacement : α) : String :=
s.toSlice.replace pattern replacement
/--
Finds the position of the first match of the pattern {name}`pattern` in after the position
{name}`pos`. If there is no match {name}`none` is returned.
This function is generic over all currently supported patterns.
Examples:
* {lean}`("coffee tea water".toSlice.startPos.find? Char.isWhitespace).map (·.get!) == some ' '`
* {lean}`("tea".toSlice.pos ⟨1⟩ (by decide)).find? (fun (c : Char) => c == 't') == none`
-/
@[inline]
def Slice.Pos.find? [ToForwardSearcher ρ σ] {s : Slice} (pos : s.Pos) (pattern : ρ) :
Option s.Pos :=
((s.replaceStart pos).find? pattern).map ofReplaceStart
/--
Finds the position of the first match of the pattern {name}`pattern` in after the position
{name}`pos`. If there is no match {name}`none` is returned.
This function is generic over all currently supported patterns.
Examples:
* {lean}`("coffee tea water".startValidPos.find? Char.isWhitespace).map (·.get!) == some ' '`
* {lean}`("tea".pos ⟨1⟩ (by decide)).find? (fun (c : Char) => c == 't') == none`
-/
@[inline]
def ValidPos.find? [ToForwardSearcher ρ σ] {s : String} (pos : s.ValidPos)
(pattern : ρ) : Option s.ValidPos :=
(pos.toSlice.find? pattern).map (·.ofSlice)
/--
Finds the position of the first match of the pattern {name}`pattern` in a string {name}`s`. If
there is no match {name}`none` is returned.
This function is generic over all currently supported patterns.
Examples:
* {lean}`("coffee tea water".find? Char.isWhitespace).map (·.get!) == some ' '`
* {lean}`"tea".find? (fun (c : Char) => c == 'X') == none`
* {lean}`("coffee tea water".find? "tea").map (·.get!) == some 't'`
-/
@[inline]
def find? [ToForwardSearcher ρ σ] (s : String) (pattern : ρ) : Option s.ValidPos :=
s.startValidPos.find? pattern
end
end String

View file

@ -478,7 +478,7 @@ where
s.replaceEnd curr
/--
Finds the position of the first match of the pattern {name}`pat` in a slice {name}`true`. If there
Finds the position of the first match of the pattern {name}`pat` in a slice {name}`s`. If there
is no match {name}`none` is returned.
This function is generic over all currently supported patterns.

View file

@ -7,7 +7,6 @@ module
prelude
public import Init.Data.String.Basic
import Init.Data.String.Iterator
import Init.Data.String.Modify
public section
@ -93,7 +92,7 @@ An absolute path starts at the root directory or a drive letter. Accessing files
path does not depend on the current working directory.
-/
def isAbsolute (p : FilePath) : Bool :=
pathSeparators.contains p.toString.front || (isWindows && p.toString.length > 1 && p.toString.iter.next.curr == ':')
pathSeparators.contains p.toString.front || (isWindows && p.toString.length > 1 && p.toString.startValidPos.next?.bind (·.get?) == some ':')
/--
A relative path is one that depends on the current working directory for interpretation. Relative

View file

@ -76,30 +76,30 @@ builtin_initialize externAttr : ParametricAttribute ExternAttrData ←
def getExternAttrData? (env : Environment) (n : Name) : Option ExternAttrData :=
externAttr.getParam? env n
private def parseOptNum : Nat → String.Iterator → Nat → String.Iterator × Nat
| 0, it, r => (it, r)
| n+1, it, r =>
if !it.hasNext then (it, r)
private def parseOptNum : Nat → (pattern : String) → (it : pattern.ValidPos) → Nat → pattern.ValidPos × Nat
| 0, _ , it, r => (it, r)
| n+1, pattern, it, r =>
if h : it.IsAtEnd then (it, r)
else
let c := it.curr
let c := it.get h
if '0' <= c && c <= '9'
then parseOptNum n it.next (r*10 + (c.toNat - '0'.toNat))
then parseOptNum n pattern (it.next h) (r*10 + (c.toNat - '0'.toNat))
else (it, r)
def expandExternPatternAux (args : List String) : Nat → String.Iterator → String → String
| 0, _, r => r
| i+1, it, r =>
if ¬ it.hasNext then r
else let c := it.curr
if c ≠ '#' then expandExternPatternAux args i it.next (r.push c)
def expandExternPatternAux (args : List String) : Nat → (pattern : String) → (it : pattern.ValidPos) → String → String
| 0, _, _, r => r
| i+1, pattern, it, r =>
if h : it.IsAtEnd then r
else let c := it.get h
if c ≠ '#' then expandExternPatternAux args i pattern (it.next h) (r.push c)
else
let it := it.next
let (it, j) := parseOptNum it.remainingBytes it 0
let it := it.next h
let (it, j) := parseOptNum it.remainingBytes pattern it 0
let j := j-1
expandExternPatternAux args i it (r ++ args.getD j "")
expandExternPatternAux args i pattern it (r ++ args.getD j "")
def expandExternPattern (pattern : String) (args : List String) : String :=
expandExternPatternAux args pattern.length pattern.mkIterator ""
expandExternPatternAux args pattern.length pattern pattern.startValidPos ""
def mkSimpleFnCall (fn : String) (args : List String) : String :=
fn ++ "(" ++ ((args.intersperse ", ").foldl (·++·) "") ++ ")"

View file

@ -4,8 +4,10 @@ Released under Apache 2.0 license as described in the file LICENSE.
Authors: David Thrane Christiansen
-/
module
prelude
public import Init
public import Init.Data.String.Basic
import Init.Data.Vector.Basic
set_option linter.missingDocs true
@ -30,24 +32,24 @@ public def levenshtein (str1 str2 : String) (cutoff : Nat) : Option Nat := Id.ru
for h : i in [0:v0.size] do
v0 := v0.set i i
let mut iter1 := str1.iter
let mut iter1 := str1.startValidPos
let mut i := 0
while h1 : iter1.hasNext do
while h1 : ¬iter1.IsAtEnd do
v1 := v1.set 0 (i+1)
let mut iter2 := str2.iter
let mut iter2 := str2.startValidPos
let mut j : Fin (len2 + 1) := 0
while h2 : iter2.hasNext do
while h2 : ¬iter2.IsAtEnd do
let j' : Fin _ := j + 1
let deletionCost := v0[j'] + 1
let insertionCost := v1[j] + 1
let substCost :=
if iter1.curr' h1 == iter2.curr' h2 then v0[j]
if iter1.get h1 == iter2.get h2 then v0[j]
else v0[j] + 1
let cost := min (min deletionCost insertionCost) substCost
v1 := v1.set j' cost
iter2 := iter2.next' h2
iter2 := iter2.next h2
j := j + 1
iter1 := iter1.next' h1
iter1 := iter1.next h1
i := i + 1
-- Terminate early if it's impossible that the result is below the cutoff
if v1.all (· > cutoff) then return none

View file

@ -8,7 +8,6 @@ module
prelude
public import Lean.Data.Json.FromToJson.Basic
public import Lean.ToExpr
import Init.Data.String.Iterator
public section
@ -106,7 +105,7 @@ def ofPosition (text : FileMap) (pos : Position) : String.Pos.Raw :=
0
else
text.positions.back!
String.Iterator.nextn ⟨text.source, colPos⟩ pos.column |>.pos
(text.source.pos! colPos).nextn pos.column |>.offset
/--
Returns the position of the start of (1-based) line `line`.

View file

@ -9,7 +9,6 @@ module
prelude
public import Lean.Syntax
import Init.Data.String.TakeDrop
import Init.Data.String.Iterator
public section
@ -102,29 +101,31 @@ environment variable. If this environment variable is not set, a manual root pro
built is used (typically this is the version corresponding to the current release). If no such root
is available, the latest version of the manual is used.
-/
def rewriteManualLinksCore (s : String) : BaseIO (Array (Lean.Syntax.Range × String) × String) := do
def rewriteManualLinksCore (s : String) : Id (Array (Lean.Syntax.Range × String) × String) := do
let scheme := "lean-manual://"
let mut out := ""
let mut errors := #[]
let mut iter := s.iter
while h : iter.hasNext do
let c := iter.curr' h
iter := iter.next' h
let mut iter := s.startValidPos
while h : ¬iter.IsAtEnd do
let c := iter.get h
let pre := iter
iter := iter.next h
if !lookingAt scheme iter.prev then
if !lookingAt scheme pre then
out := out.push c
continue
let start := iter.prev.forward scheme.length
let start := pre.nextn scheme.length
let mut iter' := start
while h' : iter'.hasNext do
let c' := iter'.curr' h'
iter' := iter'.next' h'
if urlChar c' && !iter'.atEnd then
while h' : ¬iter'.IsAtEnd do
let c' := iter'.get h'
let pre' := iter'
iter' := iter'.next h'
if urlChar c' && ¬iter'.IsAtEnd then
continue
match rw (start.extract iter'.prev) with
match rw (start.extract pre') with
| .error err =>
errors := errors.push (⟨iter.prev.i, iter'.prev.i⟩, err)
errors := errors.push (⟨pre.offset, pre'.offset⟩, err)
out := out.push c
break
| .ok path =>
@ -153,8 +154,8 @@ where
/--
Returns `true` if `goal` is a prefix of the string at the position pointed to by `iter`.
-/
lookingAt (goal : String) (iter : String.Iterator) : Bool :=
String.Pos.Raw.substrEq iter.s iter.i goal 0 goal.rawEndPos.byteIdx
lookingAt (goal : String) {s : String} (iter : s.ValidPos) : Bool :=
String.Pos.Raw.substrEq s iter.offset goal 0 goal.rawEndPos.byteIdx
/--
Rewrites Lean reference manual links in `docstring` to point at the reference manual.

View file

@ -11,7 +11,6 @@ prelude
import Init.Data.Ord
public import Lean.DocString.Types
public import Init.Data.String.TakeDrop
import Init.Data.String.Iterator
public import Init.Data.String.Search
set_option linter.missingDocs true
@ -143,10 +142,10 @@ public instance : MarkdownBlock i Empty where
private def escape (s : String) : String := Id.run do
let mut s' := ""
let mut iter := s.iter
while h : iter.hasNext do
let c := iter.curr' h
iter := iter.next' h
let mut iter := s.startValidPos
while h : ¬iter.IsAtEnd do
let c := iter.get h
iter := iter.next h
if isSpecial c then
s' := s'.push '\\'
s' := s'.push c
@ -157,10 +156,10 @@ where
private def quoteCode (str : String) : String := Id.run do
let mut longest := 0
let mut current := 0
let mut iter := str.iter
while h : iter.hasNext do
let c := iter.curr' h
iter := iter.next' h
let mut iter := str.startValidPos
while h : ¬iter.IsAtEnd do
let c := iter.get h
iter := iter.next h
if c == '`' then
current := current + 1
else
@ -261,11 +260,11 @@ public instance [MarkdownInline i] : ToMarkdown (Inline i) where
private def quoteCodeBlock (indent : Nat) (str : String) : String := Id.run do
let mut longest := 2
let mut current := 0
let mut iter := str.iter
let mut iter := str.startValidPos
let mut out := ""
while h : iter.hasNext do
let c := iter.curr' h
iter := iter.next' h
while h : ¬iter.IsAtEnd do
let c := iter.get h
iter := iter.next h
if c == '`' then
current := current + 1
else

View file

@ -123,14 +123,14 @@ private def withInfoSyntaxFn (p : ParserFn) (infoP : SourceInfo → ParserFn) :
private def unescapeStr (str : String) : String := Id.run do
let mut out := ""
let mut iter := str.iter
while !iter.atEnd do
let c := iter.curr
iter := iter.next
let mut iter := str.startValidPos
while h : ¬iter.IsAtEnd do
let c := iter.get h
iter := iter.next h
if c == '\\' then
if !iter.atEnd then
out := out.push iter.curr
iter := iter.next
if h : ¬iter.IsAtEnd then
out := out.push (iter.get h)
iter := iter.next h
else
out := out.push c
out
@ -201,18 +201,19 @@ private def onlyBlockOpeners : ParserFn := fun c s =>
let position := c.fileMap.toPosition s.pos
let lineStart := c.fileMap.lineStart position.line
let ok : Bool := Id.run do
let mut iter := {c.inputString.iter with i := lineStart}
while iter.i < s.pos && iter.hasNext && iter.i < c.endPos do
if iter.curr.isDigit then
while iter.curr.isDigit && iter.i < s.pos && iter.hasNext do
iter := iter.next
if !iter.hasNext then return false
else if iter.curr == '.' || iter.curr == ')' then iter := iter.next
else if iter.curr == ' ' then iter := iter.next
else if iter.curr == '>' then iter := iter.next
else if iter.curr == '*' then iter := iter.next
else if iter.curr == '+' then iter := iter.next
else if iter.curr == '-' then iter := iter.next
let mut iter := c.inputString.pos! lineStart
while h : iter.offset < s.pos && ¬iter.IsAtEnd && iter.offset < c.endPos do
have h : ¬iter.IsAtEnd := by simp at h; exact h.1.2
if (iter.get h).isDigit then
while h : ¬iter.IsAtEnd && iter.get!.isDigit && iter.offset < s.pos do
iter := iter.next (by simp at h; exact h.1.1)
if h : iter.IsAtEnd then return false
else if iter.get h == '.' || iter.get h == ')' then iter := iter.next h
else if iter.get h == ' ' then iter := iter.next h
else if iter.get h == '>' then iter := iter.next h
else if iter.get h == '*' then iter := iter.next h
else if iter.get h == '+' then iter := iter.next h
else if iter.get h == '-' then iter := iter.next h
else return false
true
@ -245,14 +246,15 @@ private def pushMissing : ParserFn := fun _c s =>
s.pushSyntax .missing
private def strFn (str : String) : ParserFn := asStringFn <| fun c s =>
let rec go (iter : String.Iterator) (s : ParserState) :=
if iter.atEnd then s
let rec go (iter : str.ValidPos) (s : ParserState) :=
if h : iter.IsAtEnd then s
else
let ch := iter.curr
go iter.next <| satisfyFn (· == ch) ch.toString c s
let ch := iter.get h
go (iter.next h) <| satisfyFn (· == ch) ch.toString c s
termination_by iter
let iniPos := s.pos
let iniSz := s.stxStack.size
let s := go str.iter s
let s := go str.startValidPos s
if s.hasError then s.mkErrorAt s!"'{str}'" iniPos (some iniSz) else s
/--

View file

@ -12,6 +12,7 @@ public import Lean.EnvExtension
public import Lean.DocString.Links
import Init.Data.String.TakeDrop
import Init.Data.String.Extra
import Init.Data.String.Search
public section
@ -93,11 +94,11 @@ where
prior to the next whitespace.
-/
upToWs (nonempty : Bool) : Parser String := fun it =>
let it' := it.find fun c => c.isWhitespace
if nonempty && it'.pos == it.pos then
.error it' (.other "Expected a nonempty string")
let it' := (it.2.find? fun (c : Char) => c.isWhitespace).getD it.1.endValidPos
if nonempty && it' == it.2 then
.error ⟨_, it' (.other "Expected a nonempty string")
else
.success it' (it.extract it')
.success ⟨_, it'⟩ (it.1.replaceStartEnd! it.2 it').copy
/-- Parses a named attribute, and returns its name and value. -/
namedAttr : Parser (String × String) := attempt do

View file

@ -30,11 +30,11 @@ line comment marker.
-/
private def addCommentAt (indent : Nat) (line : String) : String := Id.run do
let s := "".pushn ' ' indent ++ "-- "
let mut iter := line.iter
let mut iter := line.startValidPos
for _i in *...indent do
if h : iter.hasNext then
if iter.curr' h == ' ' then
iter := iter.next' h
if h : ¬iter.IsAtEnd then
if iter.get h == ' ' then
iter := iter.next h
else
-- Non-whitespace found *before* the indentation level. This output should be indented
-- as if it were exactly at the indentation level.
@ -42,7 +42,7 @@ private def addCommentAt (indent : Nat) (line : String) : String := Id.run do
else
-- The line was entirely ' ', and was shorter than the indentation level. No `--` added.
return line
let remaining := iter.remainingToString
let remaining := line.replaceStart iter
if remaining.all (· == ' ') then
return line
else
@ -53,11 +53,11 @@ Splits a string into lines, preserving newline characters.
-/
private def lines (s : String) : Array String := Id.run do
let mut result := #[]
let mut lineStart := s.iter
let mut lineStart := s.startValidPos
let mut iter := lineStart
while h : iter.hasNext do
let c := iter.curr' h
iter := iter.next' h
while h : ¬iter.IsAtEnd do
let c := iter.get h
iter := iter.next h
if c == '\n' then
result := result.push <| lineStart.extract iter
lineStart := iter

View file

@ -84,19 +84,8 @@ inductive SearchControl where
/-- Stop the search through a syntax stack. -/
| stop
private def lineCommentPosition? (s : String) : Option String.Pos.Raw := Id.run do
let mut it := s.mkIterator
while h : it.hasNext do
let pos := it.pos
let c₁ := it.curr' h
it := it.next' h
if c₁ == '-' then
if h' : it.hasNext then
let c₂ := it.curr' h'
it := it.next' h'
if c₂ == '-' then
return some pos
return none
private def lineCommentPosition? (s : String) : Option s.ValidPos :=
s.find? "--"
private def isPositionInLineComment (text : FileMap) (pos : String.Pos.Raw) : Bool := Id.run do
let requestedLineNumber := text.toPosition pos |>.line
@ -105,7 +94,7 @@ private def isPositionInLineComment (text : FileMap) (pos : String.Pos.Raw) : Bo
let line := String.Pos.Raw.extract text.source lineStartPos lineEndPos
let some lineCommentPos := lineCommentPosition? line
| return false
return pos >= lineCommentPos.offsetBy lineStartPos
return pos >= lineCommentPos.offset.offsetBy lineStartPos
open CandidateKind in
def findSignatureHelp? (text : FileMap) (ctx? : Option Lsp.SignatureHelpContext) (cmdStx : Syntax)

View file

@ -7,7 +7,8 @@ module
prelude
public import Std.Internal.Parsec.Basic
public import Init.Data.String.Iterator
public import Init.Data.String.Slice
public import Init.Data.String.Termination
public section
@ -15,34 +16,33 @@ namespace Std.Internal
namespace Parsec
namespace String
instance : Input String.Iterator Char String.Pos.Raw where
pos it := it.pos
next it := it.next
curr it := it.curr
hasNext it := it.hasNext
next' it := it.next'
curr' it := it.curr'
instance : Input (Sigma String.ValidPos) Char String.Pos.Raw where
pos it := it.2.offset
next it := it.1, it.2.next!⟩
curr it := it.2.get!
hasNext it := ¬it.2.IsAtEnd
next' it h := ⟨it.1, it.2.next (by simpa using h)⟩
curr' it h := it.2.get (by simpa using h)
/--
`Parser α` is a parser that consumes a `String` input using a `String.Iterator` and returns a result of type `α`.
`Parser α` is a parser that consumes a `String` input using a `Sigma String.ValidPos` and returns a result of type `α`.
-/
abbrev Parser (α : Type) : Type := Parsec String.Iterator α
abbrev Parser (α : Type) : Type := Parsec (Sigma String.ValidPos) α
/--
Run a `Parser` on a `String`, returns either the result or an error string with offset.
-/
protected def Parser.run (p : Parser α) (s : String) : Except String α :=
match p s.mkIterator with
match p ⟨s, s.startValidPos⟩ with
| .success _ res => Except.ok res
| .error it err => Except.error s!"offset {repr it.pos.byteIdx}: {err}"
| .error it err => Except.error s!"offset {repr it.2.offset.byteIdx}: {err}"
/--
Parses the given string.
-/
def pstring (s : String) : Parser String := fun it =>
let substr := it.extract (it.forward s.length)
if substr = s then
.success (it.forward s.length) substr
if (it.1.replaceStart it.2).startsWith s then
.success ⟨_, it.2.nextn s.length⟩ s
else
.error it (.other s!"expected: {s}")
@ -80,25 +80,26 @@ Convert a byte representing `'0'..'9'` to a `Nat`.
private def digitToNat (b : Char) : Nat := b.toNat - '0'.toNat
@[inline]
private partial def digitsCore (acc : Nat) : Parser Nat := fun it =>
private def digitsCore (acc : Nat) : Parser Nat := fun it =>
/-
With this design instead of combinators we can avoid allocating and branching over .success values
all of the time.
-/
let ⟨res, it⟩ := go it acc
.success it res
let ⟨res, it⟩ := go it.2 acc
.success ⟨_, it res
where
go (it : String.Iterator) (acc : Nat) : (Nat × String.Iterator) :=
if h : it.hasNext then
let candidate := it.curr
go {s : String} (it : s.ValidPos) (acc : Nat) : (Nat × s.ValidPos) :=
if h : ¬it.IsAtEnd then
let candidate := it.get h
if '0' ≤ candidate ∧ candidate ≤ '9' then
let digit := digitToNat candidate
let acc := acc * 10 + digit
go (it.next' h) acc
go (it.next h) acc
else
(acc, it)
else
(acc, it)
termination_by it
/--
Parse one or more ASCII digits into a `Nat`.
@ -126,32 +127,34 @@ def asciiLetter : Parser Char := attempt do
let c ← any
if ('A' ≤ c ∧ c ≤ 'Z') ('a' ≤ c ∧ c ≤ 'z') then return c else fail s!"ASCII letter expected"
private partial def skipWs (it : String.Iterator) : String.Iterator :=
if h : it.hasNext then
let c := it.curr' h
private def skipWs {s : String} (it : s.ValidPos) : s.ValidPos :=
if h : ¬it.IsAtEnd then
let c := it.get h
if c = '\u0009' c = '\u000a' c = '\u000d' c = '\u0020' then
skipWs (it.next' h)
skipWs (it.next h)
else
it
else
it
termination_by it
/--
Skip whitespace: tabs, newlines, carriage returns, and spaces.
-/
@[inline]
def ws : Parser Unit := fun it =>
.success (skipWs it) ()
.success ⟨_, skipWs it.2⟩ ()
/--
Takes a fixed amount of chars from the iterator.
-/
def take (n : Nat) : Parser String := fun it =>
let substr := it.extract (it.forward n)
let right := it.2.nextn n
let substr := String.Slice.mk it.1 it.2 right String.ValidPos.le_nextn |>.copy
if substr.length != n then
.error it .eof
else
.success (it.forward n) substr
.success ⟨_, right⟩ substr
end String
end Parsec

View file

@ -19,7 +19,7 @@ def h : Nat → Nat → Nat
termination_by x y => x
@[macro_inline] -- Error
partial def skipMany (p : Parser α) (it : String.Iterator) : Parser PUnit := do
partial def skipMany (p : Parser α) (it : Sigma String.ValidPos) : Parser PUnit := do
match p it with
| .success it _ => skipMany p it
| .error _ _ => pure ()