refactor: discipline around arithmetic of String.Pos.Raw (#10713)

This PR enforces rules around arithmetic of `String.Pos.Raw`.

Specifically, it adopts the following conventions:

- Byte indices ("ordinals") in strings should be represented using
`String.Pos.Raw`
- Amounts of bytes ("cardinals") in strings should be represented using
`Nat`.

For example, `String.Slice.utf8ByteSize` now returns `Nat` instead of
`String.Pos.Raw`, and there is a new function `String.Slice.rawEndPos`.

Finally, the `HAdd` and `HSub` instances for `String.Pos.Raw` are
reorganized. This is a **breaking change**.

The `HAdd/HSub String.Pos.Raw String.Pos.Raw String.Pos.Raw` instances
have been removed. For the use case of tracking positions relative to
some other position, we instead provide `offsetBy` and `unoffsetBy`
functions. For the use case of advancing/unadvancing a position by an
arbitrary number of bytes, we instead provide `increaseBy` and
`decreaseBy` functions. For
offsetting/unoffsetting/advancing/unadvancing a position `p` by the size
of a string `s` (resp. character `c`), use `s + p`/`p - s`/`p + s`/`p -
s` (resp. `c + p`/`p - c`/`p + c`/`p - c`).
This commit is contained in:
Markus Himmel 2025-10-09 09:47:45 +02:00 committed by GitHub
parent 6f1e932542
commit dca8d6d188
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
23 changed files with 356 additions and 194 deletions

View file

@ -110,7 +110,7 @@ where
else
match h : validateUTF8At b i with
| false => false
| true => go fuel (i + (b[i].utf8ByteSize (isUTF8FirstByte_of_validateUTF8At h)).byteIdx)
| true => go fuel (i + b[i].utf8ByteSize (isUTF8FirstByte_of_validateUTF8At h))
?_ ?_
termination_by structural fuel
finally
@ -475,19 +475,25 @@ end
namespace String
instance : HAdd String.Pos.Raw String.Pos.Raw String.Pos.Raw where
hAdd p₁ p₂ := { byteIdx := p₁.byteIdx + p₂.byteIdx }
instance : HSub String.Pos.Raw String String.Pos.Raw where
hSub p s := { byteIdx := p.byteIdx - s.utf8ByteSize }
instance : HSub String.Pos.Raw String.Pos.Raw String.Pos.Raw where
hSub p₁ p₂ := { byteIdx := p₁.byteIdx - p₂.byteIdx }
instance : HSub String.Pos.Raw Char String.Pos.Raw where
hSub p c := { byteIdx := p.byteIdx - c.utf8Size }
@[export lean_string_pos_sub]
def Pos.Internal.subImpl : String.Pos.Raw → String.Pos.Raw → String.Pos.Raw :=
(· - ·)
fun p₁ p₂ => ⟨p₁.byteIdx - p₂.byteIdx⟩
instance : HAdd String.Pos.Raw Char String.Pos.Raw where
hAdd p c := { byteIdx := p.byteIdx + c.utf8Size }
instance : HAdd Char String.Pos.Raw String.Pos.Raw where
hAdd c p := { byteIdx := c.utf8Size + p.byteIdx }
instance : HAdd String String.Pos.Raw String.Pos.Raw where
hAdd s p := { byteIdx := s.utf8ByteSize + p.byteIdx }
instance : HAdd String.Pos.Raw String String.Pos.Raw where
hAdd p s := { byteIdx := p.byteIdx + s.utf8ByteSize }
@ -713,17 +719,36 @@ theorem Pos.Raw.isValid_singleton {c : Char} {p : Pos.Raw} :
· exact ⟨0, by simp⟩
· exact ⟨1, by simp [hi, ← singleton_eq_asString]⟩
@[simp]
theorem Pos.Raw.byteIdx_sub {p₁ p₂ : Pos.Raw} : (p₁ - p₂).byteIdx = p₁.byteIdx - p₂.byteIdx := rfl
/--
Returns the size of the byte slice delineated by the positions `lo` and `hi`.
-/
@[expose, inline]
def Pos.Raw.byteDistance (lo hi : Pos.Raw) : Nat :=
hi.byteIdx - lo.byteIdx
theorem Pos.Raw.byteDistance_eq {lo hi : Pos.Raw} : lo.byteDistance hi = hi.byteIdx - lo.byteIdx :=
(rfl)
@[simp]
theorem Pos.Raw.byteIdx_add {p₁ p₂ : Pos.Raw} : (p₁ + p₂).byteIdx = p₁.byteIdx + p₂.byteIdx := rfl
theorem Pos.Raw.byteIdx_sub_char {p : Pos.Raw} {c : Char} : (p - c).byteIdx = p.byteIdx - c.utf8Size := rfl
@[simp]
theorem Pos.Raw.byteIdx_addChar {p : Pos.Raw} {c : Char} : (p + c).byteIdx = p.byteIdx + c.utf8Size := rfl
theorem Pos.Raw.byteIdx_sub_string {p : Pos.Raw} {s : String} : (p - s).byteIdx = p.byteIdx - s.utf8ByteSize := rfl
@[simp]
theorem Pos.Raw.byteIdx_add_string {p : Pos.Raw} {s : String} : (p + s).byteIdx = p.byteIdx + s.utf8ByteSize := rfl
@[simp]
theorem Pos.Raw.byteIdx_string_add {s : String} {p : Pos.Raw} : (s + p).byteIdx = s.utf8ByteSize + p.byteIdx := rfl
@[simp]
theorem Pos.Raw.byteIdx_add_char {p : Pos.Raw} {c : Char} : (p + c).byteIdx = p.byteIdx + c.utf8Size := rfl
@[simp]
theorem Pos.Raw.byteIdx_char_add {c : Char} {p : Pos.Raw} : (c + p).byteIdx = c.utf8Size + p.byteIdx := rfl
theorem Pos.Raw.isValid_append {s t : String} {p : Pos.Raw} :
p.IsValid (s ++ t) ↔ p.IsValid s (s.endPos ≤ p ∧ (p - s.endPos).IsValid t) := by
p.IsValid (s ++ t) ↔ p.IsValid s (s.endPos ≤ p ∧ (p - s).IsValid t) := by
obtain ⟨s, rfl⟩ := exists_eq_asString s
obtain ⟨t, rfl⟩ := exists_eq_asString t
rw [← List.asString_append, Pos.Raw.isValid_asString, Pos.Raw.isValid_asString, Pos.Raw.isValid_asString]
@ -738,15 +763,15 @@ theorem Pos.Raw.isValid_append {s t : String} {p : Pos.Raw} :
· refine ⟨min j s.length, ?_⟩
rw [List.take_append_of_le_length (Nat.min_le_right ..), ← List.take_eq_take_min, hj]
· refine ⟨s.length + j, ?_⟩
simp only [Pos.Raw.byteIdx_sub, byteIdx_endPos, Pos.Raw.le_iff] at hj h
simp only [Pos.Raw.byteIdx_sub_string, byteIdx_endPos, Pos.Raw.le_iff] at hj h
simp only [List.take_append, List.take_of_length_le (i := s.length + j) (l := s) (by omega),
Nat.add_sub_cancel_left, List.asString_append, utf8ByteSize_append]
omega
theorem Pos.Raw.IsValid.append_left {t : String} {p : Pos.Raw} (h : p.IsValid t) (s : String) :
(s.endPos + p).IsValid (s ++ t) :=
(s + p).IsValid (s ++ t) :=
isValid_append.2 (Or.inr ⟨by simp [Pos.Raw.le_iff], by
suffices p = s.endPos + p - s.endPos by simp [← this, h]
suffices p = s + p - s by simp [← this, h]
simp [Pos.Raw.ext_iff]⟩)
theorem Pos.Raw.IsValid.append_right {s : String} {p : Pos.Raw} (h : p.IsValid s) (t : String) :
@ -760,7 +785,7 @@ theorem append_singleton {s : String} {c : Char} : s ++ singleton c = s.push c :
theorem Pos.Raw.isValid_push {s : String} {c : Char} {p : Pos.Raw} :
p.IsValid (s.push c) ↔ p.IsValid s p = s.endPos + c := by
rw [← append_singleton, isValid_append, isValid_singleton]
simp only [le_iff, byteIdx_endPos, Pos.Raw.ext_iff, byteIdx_sub, byteIdx_zero, byteIdx_addChar]
simp only [le_iff, byteIdx_endPos, Pos.Raw.ext_iff, byteIdx_sub_string, byteIdx_zero, byteIdx_add_char]
refine ⟨?_, ?_⟩
· rintro (h|⟨h₁,(h₂|h₂)⟩)
· exact Or.inl h
@ -1064,17 +1089,79 @@ def toSlice (s : String) : Slice where
/-- The number of bytes of the UTF-8 encoding of the string slice. -/
@[expose]
def Slice.utf8ByteSize (s : Slice) : Pos.Raw :=
s.endExclusive.offset - s.startInclusive.offset
def Slice.utf8ByteSize (s : Slice) : Nat :=
s.startInclusive.offset.byteDistance s.endExclusive.offset
theorem Slice.utf8ByteSize_eq {s : Slice} :
s.utf8ByteSize = s.endExclusive.offset.byteIdx - s.startInclusive.offset.byteIdx := (rfl)
instance : HAdd Pos.Raw Slice Pos.Raw where
hAdd p s := { byteIdx := p.byteIdx + s.utf8ByteSize }
instance : HAdd Slice Pos.Raw Pos.Raw where
hAdd s p := { byteIdx := s.utf8ByteSize + p.byteIdx }
instance : HSub Pos.Raw Slice Pos.Raw where
hSub p s := { byteIdx := p.byteIdx - s.utf8ByteSize }
@[simp]
theorem Slice.byteIdx_utf8ByteSize {s : Slice} :
s.utf8ByteSize.byteIdx = s.endExclusive.offset.byteIdx - s.startInclusive.offset.byteIdx := (rfl)
theorem Pos.Raw.byteIdx_add_slide {p : Pos.Raw} {s : Slice} :
(p + s).byteIdx = p.byteIdx + s.utf8ByteSize := rfl
@[simp]
theorem Pos.Raw.byteIdx_slice_add {s : Slice} {p : Pos.Raw} :
(s + p).byteIdx = s.utf8ByteSize + p.byteIdx := rfl
@[simp]
theorem Pos.Raw.byteIdx_sub_slice {p : Pos.Raw} {s : Slice} :
(p - s).byteIdx = p.byteIdx - s.utf8ByteSize := rfl
/-- The end position of a slice, as a `Pos.Raw`. -/
@[expose]
def Slice.rawEndPos (s : Slice) : Pos.Raw where
byteIdx := s.utf8ByteSize
@[simp]
theorem Slice.byteIdx_rawEndPos {s : Slice} : s.rawEndPos.byteIdx = s.utf8ByteSize := (rfl)
/--
Offsets `p` by `offset` on the left. This is not an `HAdd` instance because it should be a
relatively rare operation, so we use a name to make accidental use less likely. To offset a position
by the size of a character character `c` or string `s`, you can use `c + p` resp. `s + p`.
This should be seen as an operation that converts relative positions into absolute positions.
See also `Pos.Raw.increaseBy`, which is an "advancing" operation.
-/
@[expose, inline]
def Pos.Raw.offsetBy (p : Pos.Raw) (offset : Pos.Raw) : Pos.Raw where
byteIdx := offset.byteIdx + p.byteIdx
@[simp]
theorem Pos.Raw.byteIdx_offsetBy {p : Pos.Raw} {offset : Pos.Raw} :
(p.offsetBy offset).byteIdx = offset.byteIdx + p.byteIdx := (rfl)
/--
Decreases `p` by `offset`. This is not an `HSub` instance because it should be a relatively
rare operation, so we use a name to make accidental use less likely. To unoffset a position
by the size of a character `c` or string `s`, you can use `p - c` resp. `p - s`.
This should be seen as an operation that converts absolute positions into relative positions.
See also `Pos.Raw.decreaseBy`, which is an "unadvancing" operation.
-/
@[expose, inline]
def Pos.Raw.unoffsetBy (p : Pos.Raw) (offset : Pos.Raw) : Pos.Raw where
byteIdx := p.byteIdx - offset.byteIdx
@[simp]
theorem Pos.Raw.byteIdx_unoffsetBy {p : Pos.Raw} {offset : Pos.Raw} :
(p.unoffsetBy offset).byteIdx = p.byteIdx - offset.byteIdx := (rfl)
/-- Criterion for validity of positions in string slices. -/
structure Pos.Raw.IsValidForSlice (s : Slice) (p : Pos.Raw) : Prop where
le_utf8ByteSize : p ≤ s.utf8ByteSize
isValid_add : (s.startInclusive.offset + p).IsValid s.str
le_utf8ByteSize : p ≤ s.rawEndPos
isValid_offsetBy : (p.offsetBy s.startInclusive.offset).IsValid s.str
/--
Accesses the indicated byte in the UTF-8 encoding of a string slice.
@ -1082,10 +1169,11 @@ Accesses the indicated byte in the UTF-8 encoding of a string slice.
At runtime, this function is implemented by efficient, constant-time code.
-/
@[inline, expose]
def Slice.getUTF8Byte (s : Slice) (p : Pos.Raw) (h : p < s.utf8ByteSize) : UInt8 :=
s.str.getUTF8Byte (s.startInclusive.offset + p) (by
def Slice.getUTF8Byte (s : Slice) (p : Pos.Raw) (h : p < s.rawEndPos) : UInt8 :=
s.str.getUTF8Byte (p.offsetBy s.startInclusive.offset) (by
have := s.endExclusive.isValid.le_endPos
simp only [Pos.Raw.lt_iff, byteIdx_utf8ByteSize, Pos.Raw.le_iff, byteIdx_endPos, Pos.Raw.byteIdx_add] at *
simp only [Pos.Raw.lt_iff, byteIdx_rawEndPos, utf8ByteSize_eq, Pos.Raw.le_iff, byteIdx_endPos,
Pos.Raw.byteIdx_offsetBy] at *
omega)
/--
@ -1094,7 +1182,7 @@ is out-of-bounds.
-/
@[expose]
def Slice.getUTF8Byte! (s : Slice) (p : String.Pos.Raw) : UInt8 :=
if h : p < s.utf8ByteSize then
if h : p < s.rawEndPos then
s.getUTF8Byte p h
else
panic! "String slice access is out of bounds."
@ -1119,10 +1207,10 @@ theorem Slice.utf8ByteSize_copy {s : Slice} :
rw [Nat.min_eq_left (by simpa [Pos.Raw.le_iff] using s.endExclusive.isValid.le_endPos)]
@[simp]
theorem Slice.endPos_copy {s : Slice} : s.copy.endPos = s.utf8ByteSize := by
simp [Pos.Raw.ext_iff]
theorem Slice.endPos_copy {s : Slice} : s.copy.endPos = s.rawEndPos := by
simp [Pos.Raw.ext_iff, utf8ByteSize_eq]
theorem Slice.getUTF8Byte_eq_getUTF8Byte_copy {s : Slice} {p : Pos.Raw} {h : p < s.utf8ByteSize} :
theorem Slice.getUTF8Byte_eq_getUTF8Byte_copy {s : Slice} {p : Pos.Raw} {h : p < s.rawEndPos} :
s.getUTF8Byte p h = s.copy.getUTF8Byte p (by simpa) := by
simp [getUTF8Byte, String.getUTF8Byte, bytes_copy, ByteArray.getElem_extract]
@ -1134,19 +1222,16 @@ theorem Slice.isUTF8FirstByte_utf8ByteAt_zero {s : Slice} {h} :
(s.getUTF8Byte 0 h).IsUTF8FirstByte := by
simpa [getUTF8Byte_eq_getUTF8Byte_copy] using s.copy.isUTF8FirstByte_getUTF8Byte_zero
@[simp]
theorem Pos.Raw.add_zero {p : Pos.Raw} : p + 0 = p := by
simp [Pos.Raw.ext_iff]
@[simp]
theorem Pos.Raw.isValid_copy_iff {s : Slice} {p : Pos.Raw} :
p.IsValid s.copy ↔ p.IsValidForSlice s := by
refine ⟨fun ⟨h₁, h₂⟩ => ⟨?_, ?_⟩, fun ⟨h₁, h₂⟩ => ⟨?_, ?_⟩⟩
· simpa using h₁
· have := s.startInclusive_le_endExclusive
simp_all only [Slice.endPos_copy, ValidPos.le_iff, le_iff, Slice.byteIdx_utf8ByteSize]
simp_all only [Slice.endPos_copy, le_iff, Slice.byteIdx_rawEndPos, Slice.utf8ByteSize_eq,
ValidPos.le_iff]
rw [Slice.bytes_copy, ByteArray.extract_extract, Nat.add_zero, Nat.min_eq_left (by omega)] at h₂
rw [← byteIdx_add, Pos.Raw.isValidUTF8_extract_iff] at h₂
rw [← byteIdx_offsetBy, Pos.Raw.isValidUTF8_extract_iff] at h₂
· rcases h₂ with (h₂|⟨-, h₂⟩)
· rw [← h₂]
exact s.startInclusive.isValid
@ -1157,9 +1242,9 @@ theorem Pos.Raw.isValid_copy_iff {s : Slice} {p : Pos.Raw} :
omega
· simpa using h₁
· have := s.startInclusive_le_endExclusive
simp_all only [ValidPos.le_iff, le_iff, Slice.byteIdx_utf8ByteSize]
simp_all only [le_iff, Slice.byteIdx_rawEndPos, Slice.utf8ByteSize_eq, ValidPos.le_iff]
rw [Slice.bytes_copy, ByteArray.extract_extract, Nat.add_zero, Nat.min_eq_left (by omega)]
rw [← byteIdx_add, Pos.Raw.isValidUTF8_extract_iff]
rw [← byteIdx_offsetBy, Pos.Raw.isValidUTF8_extract_iff]
· exact Or.inr ⟨s.startInclusive.isValid, h₂⟩
· simp [le_iff]
· have := s.endExclusive.isValid.le_endPos
@ -1191,19 +1276,39 @@ instance {s : Slice} : Inhabited s.Pos where
default := s.startPos
@[simp]
theorem Slice.offset_startInclusive_add_utf8ByteSize {s : Slice} :
s.startInclusive.offset + s.utf8ByteSize = s.endExclusive.offset := by
theorem Slice.offset_startInclusive_add_self {s : Slice} :
s.startInclusive.offset + s = s.endExclusive.offset := by
have := s.startInclusive_le_endExclusive
simp_all [String.Pos.Raw.ext_iff, ValidPos.le_iff, Pos.Raw.le_iff]
simp_all [String.Pos.Raw.ext_iff, ValidPos.le_iff, Pos.Raw.le_iff, utf8ByteSize_eq]
@[simp]
theorem Pos.Raw.offsetBy_endPos_left {p : Pos.Raw} {s : String} :
s.endPos.offsetBy p = p + s := by
simp [Pos.Raw.ext_iff]
@[simp]
theorem Pos.Raw.offsetBy_endPos_right {p : Pos.Raw} {s : String} :
p.offsetBy s.endPos = s + p := by
simp [Pos.Raw.ext_iff]
@[simp]
theorem Pos.Raw.offsetBy_sliceRawEndPos_left {p : Pos.Raw} {s : Slice} :
s.rawEndPos.offsetBy p = p + s := by
simp [Pos.Raw.ext_iff]
@[simp]
theorem Pos.Raw.offsetBy_sliceRawEndPos_right {p : Pos.Raw} {s : Slice} :
p.offsetBy s.rawEndPos = s + p := by
simp [Pos.Raw.ext_iff]
/-- The past-the-end position of `s`, as an `s.Pos`. -/
@[inline, expose]
def Slice.endPos (s : Slice) : s.Pos where
offset := s.utf8ByteSize
offset := s.rawEndPos
isValidForSlice := ⟨by simp [Pos.Raw.le_iff], by simpa using s.endExclusive.isValid⟩
@[simp]
theorem ByteString.Slice.offset_endPos {s : Slice} : s.endPos.offset = s.utf8ByteSize := (rfl)
theorem ByteString.Slice.offset_endPos {s : Slice} : s.endPos.offset = s.rawEndPos := (rfl)
instance {s : Slice} : LE s.Pos where
le l r := l.offset ≤ r.offset
@ -1224,16 +1329,16 @@ instance {s : Slice} (l r : s.Pos) : Decidable (l < r) :=
decidable_of_iff' _ Slice.Pos.lt_iff
theorem Pos.Raw.isValidForSlice_iff_isUTF8FirstByte {s : Slice} {p : Pos.Raw} :
p.IsValidForSlice s ↔ (p = s.utf8ByteSize (∃ (h : p < s.utf8ByteSize), (s.getUTF8Byte p h).IsUTF8FirstByte)) := by
p.IsValidForSlice s ↔ (p = s.rawEndPos (∃ (h : p < s.rawEndPos), (s.getUTF8Byte p h).IsUTF8FirstByte)) := by
simp [← isValid_copy_iff, isValid_iff_isUTF8FirstByte, Slice.getUTF8Byte_copy]
/-- Efficiently checks whether a position is at a UTF-8 character boundary of the slice `s`. -/
@[expose]
def Pos.Raw.isValidForSlice (s : Slice) (p : Pos.Raw) : Bool :=
if h : p < s.utf8ByteSize then
if h : p < s.rawEndPos then
(s.getUTF8Byte p h).IsUTF8FirstByte
else
p = s.utf8ByteSize
p = s.rawEndPos
@[simp]
theorem Pos.Raw.isValidForSlice_eq_true_iff {s : Slice} {p : Pos.Raw} :
@ -1255,7 +1360,7 @@ instance {s : Slice} {p : Pos.Raw} : Decidable (p.IsValidForSlice s) :=
decidable_of_iff _ Pos.Raw.isValidForSlice_eq_true_iff
theorem Pos.Raw.isValidForSlice_iff_isSome_utf8DecodeChar?_copy {s : Slice} {p : Pos.Raw} :
p.IsValidForSlice s ↔ p = s.utf8ByteSize (s.copy.bytes.utf8DecodeChar? p.byteIdx).isSome := by
p.IsValidForSlice s ↔ p = s.rawEndPos (s.copy.bytes.utf8DecodeChar? p.byteIdx).isSome := by
rw [← isValid_copy_iff, isValid_iff_isSome_utf8DecodeChar?, Slice.endPos_copy]
theorem Slice.bytes_str_eq {s : Slice} :
@ -1269,7 +1374,7 @@ theorem Slice.bytes_str_eq {s : Slice} :
· simpa [Pos.Raw.le_iff] using s.startInclusive_le_endExclusive
theorem Pos.Raw.isValidForSlice_iff_isSome_utf8DecodeChar? {s : Slice} {p : Pos.Raw} :
p.IsValidForSlice s ↔ p = s.utf8ByteSize (p < s.utf8ByteSize ∧ (s.str.bytes.utf8DecodeChar? (s.startInclusive.offset.byteIdx + p.byteIdx)).isSome) := by
p.IsValidForSlice s ↔ p = s.rawEndPos (p < s.rawEndPos ∧ (s.str.bytes.utf8DecodeChar? (s.startInclusive.offset.byteIdx + p.byteIdx)).isSome) := by
refine ⟨?_, ?_⟩
· rw [isValidForSlice_iff_isSome_utf8DecodeChar?_copy]
rintro (rfl|h)
@ -1315,20 +1420,20 @@ theorem Slice.Pos.isUTF8FirstByte_byte {s : Slice} {pos : s.Pos} {h : pos ≠ s.
underlying string `s.str`. -/
@[inline]
def Slice.Pos.str {s : Slice} (pos : s.Pos) : s.str.ValidPos where
offset := s.startInclusive.offset + pos.offset
isValid := pos.isValidForSlice.isValid_add
offset := pos.offset.offsetBy s.startInclusive.offset
isValid := pos.isValidForSlice.isValid_offsetBy
@[simp]
theorem Slice.Pos.offset_str {s : Slice} {pos : s.Pos} :
pos.str.offset = s.startInclusive.offset + pos.offset := (rfl)
pos.str.offset = pos.offset.offsetBy s.startInclusive.offset := (rfl)
@[simp]
theorem Slice.Pos.offset_str_le_offset_endExclusive {s : Slice} {pos : s.Pos} :
pos.str.offset ≤ s.endExclusive.offset := by
have := pos.isValidForSlice.le_utf8ByteSize
have := s.startInclusive_le_endExclusive
simp only [ValidPos.le_iff, Pos.Raw.le_iff, byteIdx_utf8ByteSize, offset_str, Pos.Raw.byteIdx_add,
ge_iff_le] at *
simp only [Pos.Raw.le_iff, byteIdx_rawEndPos, utf8ByteSize_eq, offset_str,
Pos.Raw.byteIdx_offsetBy, ValidPos.le_iff] at *
omega
theorem Slice.Pos.offset_le_offset_str {s : Slice} {pos : s.Pos} :
@ -1414,51 +1519,54 @@ def Slice.replaceStartEnd! (s : Slice) (newStart newEnd : s.Pos) : Slice :=
@[simp]
theorem Slice.utf8ByteSize_replaceStart {s : Slice} {pos : s.Pos} :
(s.replaceStart pos).utf8ByteSize = s.utf8ByteSize - pos.offset := by
(s.replaceStart pos).utf8ByteSize = s.utf8ByteSize - pos.offset.byteIdx := by
simp only [utf8ByteSize_eq, str_replaceStart, endExclusive_replaceStart,
startInclusive_replaceStart, Pos.offset_str, Pos.Raw.byteIdx_offsetBy]
omega
theorem Slice.rawEndPos_replaceStart {s : Slice} {pos : s.Pos} :
(s.replaceStart pos).rawEndPos = s.rawEndPos.unoffsetBy pos.offset := by
ext
simp
omega
@[simp]
theorem Slice.utf8ByteSize_replaceEnd {s : Slice} {pos : s.Pos} :
(s.replaceEnd pos).utf8ByteSize = pos.offset := by
(s.replaceEnd pos).utf8ByteSize = pos.offset.byteIdx := by
simp [utf8ByteSize_eq]
@[simp]
theorem Slice.rawEndPos_replaceEnd {s : Slice} {pos : s.Pos} :
(s.replaceEnd pos).rawEndPos = pos.offset := by
ext
simp
@[simp]
theorem Slice.utf8ByteSize_replaceStartEnd {s : Slice} {newStart newEnd : s.Pos} {h} :
(s.replaceStartEnd newStart newEnd h).utf8ByteSize = newEnd.offset - newStart.offset := by
ext
simp only [byteIdx_utf8ByteSize, str_replaceStartEnd, endExclusive_replaceStartEnd,
Pos.offset_str, Pos.Raw.byteIdx_add, startInclusive_replaceStartEnd, Pos.Raw.byteIdx_sub]
(s.replaceStartEnd newStart newEnd h).utf8ByteSize = newStart.offset.byteDistance newEnd.offset := by
simp [utf8ByteSize_eq, Pos.Raw.byteDistance_eq]
omega
theorem Pos.Raw.add_comm (a b : Pos.Raw) : a + b = b + a := by
theorem Pos.Raw.offsetBy_assoc {p q r : Pos.Raw} :
(p.offsetBy q).offsetBy r = p.offsetBy (q.offsetBy r) := by
ext
simpa using Nat.add_comm _ _
theorem Pos.Raw.add_assoc (a b c : Pos.Raw) : a + b + c = a + (b + c) := by
ext
simpa using Nat.add_assoc _ _ _
simp [Nat.add_assoc]
theorem Pos.Raw.isValidForSlice_replaceStart {s : Slice} {p : s.Pos} {off : Pos.Raw} :
off.IsValidForSlice (s.replaceStart p) ↔ (p.offset + off).IsValidForSlice s := by
off.IsValidForSlice (s.replaceStart p) ↔ (off.offsetBy p.offset).IsValidForSlice s := by
refine ⟨fun ⟨h₁, h₂⟩ => ⟨?_, ?_⟩, fun ⟨h₁, h₂⟩ => ⟨?_, ?_⟩⟩
· have := p.isValidForSlice.le_utf8ByteSize
simp_all [le_iff]
omega
· simp only [Slice.str_replaceStart, Slice.startInclusive_replaceStart, Slice.Pos.offset_str] at h₂
rwa [← Pos.Raw.add_assoc]
· simpa [Pos.Raw.offsetBy_assoc] using h₂
· simp_all [Pos.Raw.le_iff]
omega
· simp only [Slice.str_replaceStart, Slice.startInclusive_replaceStart, Slice.Pos.offset_str]
rwa [Pos.Raw.add_assoc]
· simpa [Pos.Raw.offsetBy_assoc] using h₂
theorem Pos.Raw.isValidForSlice_replaceEnd {s : Slice} {p : s.Pos} {off : Pos.Raw} :
off.IsValidForSlice (s.replaceEnd p) ↔ off ≤ p.offset ∧ off.IsValidForSlice s := by
refine ⟨fun ⟨h₁, h₂⟩ => ⟨?_, ?_, ?_⟩, fun ⟨h₁, ⟨h₂, h₃⟩⟩ => ⟨?_, ?_⟩⟩
· simpa using h₁
· simp only [Slice.utf8ByteSize_replaceEnd] at h₁
· simp only [Slice.rawEndPos_replaceEnd] at h₁
exact Pos.Raw.le_trans h₁ p.isValidForSlice.le_utf8ByteSize
· simpa using h₂
· simpa using h₁
@ -1539,7 +1647,7 @@ def Slice.Pos.ofSlice {s : String} (pos : s.toSlice.Pos) : s.ValidPos where
theorem Slice.Pos.ofset_ofSlice {s : String} {pos : s.toSlice.Pos} : pos.ofSlice.offset = pos.offset := (rfl)
@[simp]
theorem utf8ByteSize_toSlice {s : String} : s.toSlice.utf8ByteSize = s.endPos := by
theorem rawEndPos_toSlice {s : String} : s.toSlice.rawEndPos = s.endPos := by
rw [← Slice.endPos_copy, copy_toSlice]
@[simp]
@ -1665,8 +1773,8 @@ theorem eq_singleton_append {s : String} (h : s.startValidPos ≠ s.endValidPos)
theorem Slice.copy_eq_copy_replaceEnd {s : Slice} {pos : s.Pos} :
s.copy = (s.replaceEnd pos).copy ++ (s.replaceStart pos).copy := by
rw [← String.bytes_inj, bytes_copy, bytes_append, bytes_copy, bytes_copy]
simp only [str_replaceEnd, startInclusive_replaceEnd, endExclusive_replaceEnd,
Slice.Pos.offset_str, Pos.Raw.byteIdx_add, str_replaceStart, startInclusive_replaceStart,
simp only [str_replaceEnd, startInclusive_replaceEnd, endExclusive_replaceEnd, Pos.offset_str,
Pos.Raw.byteIdx_offsetBy, str_replaceStart, startInclusive_replaceStart,
endExclusive_replaceStart, ByteArray.extract_append_extract, Nat.le_add_right, Nat.min_eq_left]
rw [Nat.max_eq_right]
exact pos.offset_str_le_offset_endExclusive
@ -1734,27 +1842,40 @@ theorem Slice.Pos.byte_eq_byte_toCopy {s : Slice} {pos : s.Pos} {h} :
/-- Given a position in `s.replaceStart p₀`, obtain the corresponding position in `s`. -/
@[inline]
def Slice.Pos.ofReplaceStart {s : Slice} {p₀ : s.Pos} (pos : (s.replaceStart p₀).Pos) : s.Pos where
offset := p₀.offset + pos.offset
offset := pos.offset.offsetBy p₀.offset
isValidForSlice := Pos.Raw.isValidForSlice_replaceStart.1 pos.isValidForSlice
@[simp]
theorem Slice.Pos.offset_ofReplaceStart {s : Slice} {p₀ : s.Pos} {pos : (s.replaceStart p₀).Pos} :
(ofReplaceStart pos).offset = p₀.offset + pos.offset := (rfl)
(ofReplaceStart pos).offset = pos.offset.offsetBy p₀.offset := (rfl)
theorem Pos.Raw.offsetBy_unoffsetBy_of_le {p : Pos.Raw} {q : Pos.Raw} (h : q ≤ p) :
(p.unoffsetBy q).offsetBy q = p := by
ext
simp_all [le_iff]
@[simp]
theorem Pos.Raw.unoffsetBy_offsetBy {p q : Pos.Raw} : (p.offsetBy q).unoffsetBy q = p := by
ext
simp
/-- Given a position in `s` that is at least `p₀`, obtain the corresponding position in
`s.replaceStart p₀`. -/
@[inline]
def Slice.Pos.toReplaceStart {s : Slice} (p₀ : s.Pos) (pos : s.Pos) (h : p₀.offset ≤ pos.offset) :
(s.replaceStart p₀).Pos where
offset := pos.offset - p₀.offset
offset := pos.offset.unoffsetBy p₀.offset
isValidForSlice := Pos.Raw.isValidForSlice_replaceStart.2 (by
have : p₀.offset + (pos.offset - p₀.offset) = pos.offset := by
simp_all [Pos.Raw.le_iff, String.Pos.Raw.ext_iff]
simpa [this] using pos.isValidForSlice)
simpa [Pos.Raw.offsetBy_unoffsetBy_of_le (Pos.Raw.le_iff.1 h)] using pos.isValidForSlice)
@[simp]
theorem Slice.Pos.offset_toReplaceStart {s : Slice} {p₀ : s.Pos} {pos : s.Pos} {h} :
(toReplaceStart p₀ pos h).offset = pos.offset - p₀.offset := (rfl)
(toReplaceStart p₀ pos h).offset = pos.offset.unoffsetBy p₀.offset := (rfl)
@[simp]
theorem Pos.Raw.offsetBy_zero_left {p : Pos.Raw} : (0 : Pos.Raw).offsetBy p = p := by
ext
simp
@[simp]
theorem Slice.Pos.ofReplaceStart_startPos {s : Slice} {pos : s.Pos} :
@ -1783,19 +1904,58 @@ theorem Slice.Pos.copy_eq_append_get {s : Slice} {pos : s.Pos} (h : pos ≠ s.en
rw [append_assoc, ← ht₂, ← copy_eq_copy_replaceEnd]
theorem Slice.Pos.utf8ByteSize_byte {s : Slice} {pos : s.Pos} {h : pos ≠ s.endPos} :
(pos.byte h).utf8ByteSize pos.isUTF8FirstByte_byte = (pos.get h).utf8Size := by
(pos.byte h).utf8ByteSize pos.isUTF8FirstByte_byte = (pos.get h).utf8Size := by
simp [getUTF8Byte, byte, String.getUTF8Byte, get_eq_utf8DecodeChar, ByteArray.utf8Size_utf8DecodeChar]
/--
Advances `p` by `n` bytes. This is not an `HAdd` instance because it should be a relatively
rare operation, so we use a name to make accidental use less likely. To add the size of a
character `c` or string `s` to a raw position `p`, you can use `p + c` resp. `p + s`.
This should be seen as an "advance" or "skip".
See also `Pos.Raw.offsetBy`, which turns relative positions into absolute positions.
-/
@[expose, inline]
def Pos.Raw.increaseBy (p : Pos.Raw) (n : Nat) : Pos.Raw where
byteIdx := p.byteIdx + n
@[simp]
theorem Pos.Raw.byteIdx_increaseBy {p : Pos.Raw} {n : Nat} :
(p.increaseBy n).byteIdx = p.byteIdx + n := (rfl)
/--
Move the position `p` back by `n` bytes. This is not an `HSub` instance because it should be a
relatively rare operation, so we use a name to make accidental use less likely. To remove the size
of a character `c` or string `s` from a raw position `p`, you can use `p - c` resp. `p - s`.
This should be seen as the inverse of an "advance" or "skip".
See also `Pos.Raw.unoffsetBy`, which turns absolute positions into relative positions.
-/
@[expose, inline]
def Pos.Raw.decreaseBy (p : Pos.Raw) (n : Nat) : Pos.Raw where
byteIdx := p.byteIdx - n
@[simp]
theorem Pos.Raw.byteIdx_decreaseBy {p : Pos.Raw} {n : Nat} :
(p.decreaseBy n).byteIdx = p.byteIdx - n := (rfl)
theorem Pos.Raw.increaseBy_charUtf8Size {p : Pos.Raw} {c : Char} :
p.increaseBy c.utf8Size = p + c := by
simp [Pos.Raw.ext_iff]
/-- Advances a valid position on a slice to the next valid position, given a proof that the
position is not the past-the-end position, which guarantees that such a position exists. -/
@[expose]
def Slice.Pos.next {s : Slice} (pos : s.Pos) (h : pos ≠ s.endPos) : s.Pos where
offset := pos.offset + (pos.byte h).utf8ByteSize pos.isUTF8FirstByte_byte
offset := pos.offset.increaseBy ((pos.byte h).utf8ByteSize pos.isUTF8FirstByte_byte)
isValidForSlice := by
obtain ⟨t₁, t₂, ht, ht'⟩ := copy_eq_append_get h
replace ht' : pos.offset = ⟨t₁.utf8ByteSize⟩ := Eq.symm (String.Pos.Raw.ext ht')
replace ht' : pos.offset = t₁.endPos := Eq.symm (String.Pos.Raw.ext ht')
rw [utf8ByteSize_byte, ← Pos.Raw.isValid_copy_iff, ht, ht']
refine Pos.Raw.IsValid.append_right ?_ t₂
rw [Pos.Raw.increaseBy_charUtf8Size]
refine Pos.Raw.IsValid.append_left ?_ t₁
exact Pos.Raw.isValid_singleton.2 (Or.inr rfl)
@ -1841,10 +2001,10 @@ theorem Pos.Raw.byteIdx_dec {p : Pos.Raw} : p.dec.byteIdx = p.byteIdx - 1 := (rf
def Slice.Pos.prevAux {s : Slice} (pos : s.Pos) (h : pos ≠ s.startPos) : String.Pos.Raw :=
go (pos.offset.byteIdx - 1) (by
have := pos.isValidForSlice.le_utf8ByteSize
simp [Pos.Raw.le_iff, Pos.Raw.lt_iff, Pos.ext_iff] at ⊢ this h
simp [Pos.Raw.le_iff, Pos.ext_iff] at ⊢ this h
omega)
where
go (off : Nat) (h₁ : off < s.utf8ByteSize) : String.Pos.Raw :=
go (off : Nat) (h₁ : off < s.utf8ByteSize) : String.Pos.Raw :=
if hbyte : (s.getUTF8Byte ⟨off⟩ h₁).IsUTF8FirstByte then
⟨off⟩
else
@ -1854,10 +2014,10 @@ where
simp [hoff, s.isUTF8FirstByte_utf8ByteAt_zero] at hbyte
match off with
| 0 => False.elim (by contradiction)
| off + 1 => go off (by simp [Pos.Raw.lt_iff] at ⊢ h₁; omega)
| off + 1 => go off (by omega)
termination_by structural off
theorem Pos.Raw.isValidForSlice_prevAuxGo {s : Slice} (off : Nat) (h₁ : off < s.utf8ByteSize) :
theorem Pos.Raw.isValidForSlice_prevAuxGo {s : Slice} (off : Nat) (h₁ : off < s.utf8ByteSize) :
(Slice.Pos.prevAux.go off h₁).IsValidForSlice s := by
induction off with
| zero =>
@ -2003,20 +2163,20 @@ theorem ValidPos.cast_rfl {s : String} {pos : s.ValidPos} : pos.cast rfl = pos :
/-- Given a byte position within a string slice, obtains the smallest valid position that is
strictly greater than the given byte position. -/
@[inline]
def Slice.findNextPos (offset : String.Pos.Raw) (s : Slice) (_h : offset < s.utf8ByteSize) : s.Pos :=
def Slice.findNextPos (offset : String.Pos.Raw) (s : Slice) (_h : offset < s.rawEndPos) : s.Pos :=
go offset.inc
where
go (offset : String.Pos.Raw) : s.Pos :=
if h : offset < s.utf8ByteSize then
if h : offset < s.rawEndPos then
if h' : (s.getUTF8Byte offset h).IsUTF8FirstByte then
s.pos offset (Pos.Raw.isValidForSlice_iff_isUTF8FirstByte.2 (Or.inr ⟨_, h'⟩))
else
go offset.inc
else
s.endPos
termination_by s.utf8ByteSize.byteIdx - offset.byteIdx
termination_by s.utf8ByteSize - offset.byteIdx
decreasing_by
simp only [Pos.Raw.lt_iff, byteIdx_utf8ByteSize, Pos.Raw.byteIdx_inc, gt_iff_lt] at h ⊢
simp only [Pos.Raw.lt_iff, byteIdx_rawEndPos, utf8ByteSize_eq, Pos.Raw.byteIdx_inc] at h ⊢
omega
@[simp]
@ -2028,7 +2188,7 @@ theorem Pos.Raw.le_of_lt {p q : Pos.Raw} : p < q → p ≤ q := by simpa [lt_iff
theorem Pos.Raw.inc_le {p q : Pos.Raw} : p.inc ≤ q ↔ p < q := by simpa [lt_iff, le_iff] using Nat.succ_le
private theorem Slice.le_offset_findNextPosGo {s : Slice} {o : String.Pos.Raw} (h : o ≤ s.utf8ByteSize) :
private theorem Slice.le_offset_findNextPosGo {s : Slice} {o : String.Pos.Raw} (h : o ≤ s.rawEndPos) :
o ≤ (findNextPos.go s o).offset := by
fun_induction findNextPos.go with
| case1 => simp
@ -2041,7 +2201,7 @@ private theorem Slice.le_offset_findNextPosGo {s : Slice} {o : String.Pos.Raw} (
theorem Slice.lt_offset_findNextPos {s : Slice} {o : String.Pos.Raw} (h) : o < (s.findNextPos o h).offset :=
Pos.Raw.lt_of_lt_of_le Pos.Raw.lt_inc (le_offset_findNextPosGo (Pos.Raw.inc_le.2 h))
theorem Slice.Pos.prevAuxGo_le_self {s : Slice} {p : Nat} {h : ⟨p⟩ < s.utf8ByteSize} :
theorem Slice.Pos.prevAuxGo_le_self {s : Slice} {p : Nat} {h : p < s.utf8ByteSize} :
prevAux.go p h ≤ ⟨p⟩ := by
induction p with
| zero =>
@ -2066,14 +2226,14 @@ theorem Slice.Pos.prevAux_lt_self {s : Slice} {p : s.Pos} {h} : p.prevAux h < p.
simp [Pos.ext_iff, Pos.Raw.lt_iff] at *
omega
theorem Slice.Pos.prevAux_lt_utf8ByteSize {s : Slice} {p : s.Pos} {h} : p.prevAux h < s.utf8ByteSize :=
theorem Slice.Pos.prevAux_lt_rawEndPos {s : Slice} {p : s.Pos} {h} : p.prevAux h < s.rawEndPos :=
Pos.Raw.lt_of_lt_of_le prevAux_lt_self p.isValidForSlice.le_utf8ByteSize
theorem Pos.Raw.ne_of_lt {a b : Pos.Raw} : a < b → a ≠ b := by
simpa [lt_iff, Pos.Raw.ext_iff] using Nat.ne_of_lt
theorem Slice.Pos.prev_ne_endPos {s : Slice} {p : s.Pos} {h} : p.prev h ≠ s.endPos := by
simpa [Pos.ext_iff, prev] using Pos.Raw.ne_of_lt prevAux_lt_utf8ByteSize
simpa [Pos.ext_iff, prev] using Pos.Raw.ne_of_lt prevAux_lt_rawEndPos
theorem Slice.Pos.offset_prev_lt_offset {s : Slice} {p : s.Pos} {h} : (p.prev h).offset < p.offset := by
simpa [prev] using prevAux_lt_self
@ -2552,12 +2712,12 @@ def splitOnAux (s sep : String) (b : Pos.Raw) (i : Pos.Raw) (j : Pos.Raw) (r : L
let i := s.next i
let j := sep.next j
if sep.atEnd j then
splitOnAux s sep i i 0 (s.extract b (i - j)::r)
splitOnAux s sep i i 0 (s.extract b (i.unoffsetBy j)::r)
else
splitOnAux s sep b i j r
else
splitOnAux s sep b (s.next (i - j)) 0 r
termination_by (s.endPos.1 - (i - j).1, sep.endPos.1 - j.1)
splitOnAux s sep b (s.next (i.unoffsetBy j)) 0 r
termination_by (s.endPos.1 - (j.byteDistance i), sep.endPos.1 - j.1)
decreasing_by
focus
rename_i h _ _
@ -2566,7 +2726,7 @@ decreasing_by
(Nat.lt_of_le_of_lt (Nat.sub_le ..) (lt_next s _))
focus
rename_i i₀ j₀ _ eq h'
rw [show (s.next i₀ - sep.next j₀).1 = (i₀ - j₀).1 by
rw [show (sep.next j₀).byteDistance (s.next i₀) = j₀.byteDistance i₀ by
change (_ + Char.utf8Size _) - (_ + Char.utf8Size _) = _
rw [(beq_iff_eq ..).1 eq, Nat.add_sub_add_right]; rfl]
right; exact Nat.sub_lt_sub_left
@ -3200,7 +3360,7 @@ position in the underlying string, the fallback value `(default : Char)`, which
returned. Does not panic.
-/
@[inline] def get : Substring → String.Pos.Raw → Char
| ⟨s, b, _⟩, p => s.get (b+p)
| ⟨s, b, _⟩, p => s.get (p.offsetBy b)
@[export lean_substring_get]
def Internal.getImpl : Substring → String.Pos.Raw → Char :=
@ -3215,7 +3375,7 @@ position, not the underlying string.
-/
@[inline] def next : Substring → String.Pos.Raw → String.Pos.Raw
| ⟨s, b, e⟩, p =>
let absP := b+p
let absP := p.offsetBy b
if absP = e then p else { byteIdx := (s.next absP).byteIdx - b.byteIdx }
theorem lt_next (s : Substring) (i : String.Pos.Raw) (h : i.1 < s.bsize) :
@ -3236,7 +3396,7 @@ position, not the underlying string.
-/
@[inline] def prev : Substring → String.Pos.Raw → String.Pos.Raw
| ⟨s, b, _⟩, p =>
let absP := b+p
let absP := p.offsetBy b
if absP = b then p else { byteIdx := (s.prev absP).byteIdx - b.byteIdx }
@[export lean_substring_prev]
@ -3295,7 +3455,7 @@ by advancing its start position.
If the substring's end position is reached, the start position is not advanced past it.
-/
@[inline] def drop : Substring → Nat → Substring
| ss@⟨s, b, e⟩, n => ⟨s, b + ss.nextn n 0, e⟩
| ss@⟨s, b, e⟩, n => ⟨s, (ss.nextn n 0).offsetBy b, e⟩
@[export lean_substring_drop]
def Internal.dropImpl : Substring → Nat → Substring :=
@ -3308,7 +3468,7 @@ by moving its end position towards its start position.
If the substring's start position is reached, the end position is not retracted past it.
-/
@[inline] def dropRight : Substring → Nat → Substring
| ss@⟨s, b, _⟩, n => ⟨s, b, b + ss.prevn n ⟨ss.bsize⟩
| ss@⟨s, b, _⟩, n => ⟨s, b, (ss.prevn n ⟨ss.bsize⟩).offsetBy b
/--
Retains only the specified number of characters (Unicode code points) at the beginning of a
@ -3317,7 +3477,7 @@ substring, by moving its end position towards its start position.
If the substring's start position is reached, the end position is not retracted past it.
-/
@[inline] def take : Substring → Nat → Substring
| ss@⟨s, b, _⟩, n => ⟨s, b, b + ss.nextn n 0
| ss@⟨s, b, _⟩, n => ⟨s, b, (ss.nextn n 0).offsetBy b
/--
Retains only the specified number of characters (Unicode code points) at the end of a substring, by
@ -3326,7 +3486,7 @@ moving its start position towards its end position.
If the substring's end position is reached, the start position is not advanced past it.
-/
@[inline] def takeRight : Substring → Nat → Substring
| ss@⟨s, b, e⟩, n => ⟨s, b + ss.prevn n ⟨ss.bsize⟩, e⟩
| ss@⟨s, b, e⟩, n => ⟨s, (ss.prevn n ⟨ss.bsize⟩).offsetBy b, e⟩
/--
Checks whether a position in a substring is precisely equal to its ending position.
@ -3335,7 +3495,7 @@ The position is understood relative to the substring's starting position, rather
string's starting position.
-/
@[inline] def atEnd : Substring → String.Pos.Raw → Bool
| ⟨_, b, e⟩, p => b + p == e
| ⟨_, b, e⟩, p => p.offsetBy b == e
/--
Returns the region of the substring delimited by the provided start and stop positions, as a
@ -3347,7 +3507,7 @@ If the resulting substring is empty, then the resulting substring is a substring
positions adjusted.
-/
@[inline] def extract : Substring → String.Pos.Raw → String.Pos.Raw → Substring
| ⟨s, b, e⟩, b', e' => if b' ≥ e' then ⟨"", 0, 0⟩ else ⟨s, e.min (b+b'), e.min (b+e')⟩
| ⟨s, b, e⟩, b', e' => if b' ≥ e' then ⟨"", 0, 0⟩ else ⟨s, e.min (b'.offsetBy b), e.min (e'.offsetBy b)⟩
@[export lean_substring_extract]
def Internal.extractImpl : Substring → String.Pos.Raw → String.Pos.Raw → Substring :=
@ -3372,14 +3532,14 @@ def splitOn (s : Substring) (sep : String := " ") : List Substring :=
let i := s.next i
let j := sep.next j
if sep.atEnd j then
loop i i 0 (s.extract b (i-j) :: r)
loop i i 0 (s.extract b (i.unoffsetBy j) :: r)
else
loop b i j r
else
loop b (s.next i) 0 r
else
let r := if sep.atEnd j then
"".toSubstring :: s.extract b (i-j) :: r
"".toSubstring :: s.extract b (i.unoffsetBy j) :: r
else
s.extract b i :: r
r.reverse
@ -4058,13 +4218,15 @@ theorem byteIdx_mk (n : Nat) : byteIdx ⟨n⟩ = n := rfl
@[simp] theorem mk_byteIdx (p : Pos.Raw) : ⟨p.byteIdx⟩ = p := rfl
@[simp] theorem add_byteIdx (p₁ p₂ : Pos.Raw) : (p₁ + p₂).byteIdx = p₁.byteIdx + p₂.byteIdx := rfl
@[deprecated byteIdx_offsetBy (since := "2025-10-08")]
theorem add_byteIdx {p₁ p₂ : Pos.Raw} : (p₂.offsetBy p₁).byteIdx = p₁.byteIdx + p₂.byteIdx := by
simp
theorem add_eq (p₁ p₂ : Pos.Raw) : p₁ + p₂ = ⟨p₁.byteIdx + p₂.byteIdx⟩ := rfl
@[deprecated byteIdx_offsetBy (since := "2025-10-08")]
theorem add_eq {p₁ p₂ : Pos.Raw} : p₂.offsetBy p₁ = ⟨p₁.byteIdx + p₂.byteIdx⟩ := rfl
@[simp] theorem sub_byteIdx (p₁ p₂ : Pos.Raw) : (p₁ - p₂).byteIdx = p₁.byteIdx - p₂.byteIdx := rfl
theorem sub_eq (p₁ p₂ : Pos.Raw) : p₁ - p₂ = ⟨p₁.byteIdx - p₂.byteIdx⟩ := rfl
@[deprecated byteIdx_unoffsetBy (since := "2025-10-08")]
theorem sub_byteIdx (p₁ p₂ : Pos.Raw) : (p₁.unoffsetBy p₂).byteIdx = p₁.byteIdx - p₂.byteIdx := rfl
@[simp] theorem addChar_byteIdx (p : Pos.Raw) (c : Char) : (p + c).byteIdx = p.byteIdx + c.utf8Size := rfl
@ -4136,7 +4298,7 @@ open String
namespace Substring
@[simp] theorem prev_zero (s : Substring) : s.prev 0 = 0 := by simp [prev, Pos.Raw.add_eq, Pos.Raw.byteIdx_zero]
@[simp] theorem prev_zero (s : Substring) : s.prev 0 = 0 := by simp [prev]
@[simp] theorem prevn_zero (s : Substring) : ∀ n, s.prevn n 0 = 0
| 0 => rfl

View file

@ -1424,22 +1424,22 @@ public theorem isUTF8FirstByte_getElem_zero_utf8EncodeChar {c : Char} :
simp
@[expose]
public def utf8ByteSize (c : UInt8) (_h : c.IsUTF8FirstByte) : String.Pos.Raw :=
public def utf8ByteSize (c : UInt8) (_h : c.IsUTF8FirstByte) : Nat :=
if c &&& 0x80 = 0 then
⟨1⟩
1
else if c &&& 0xe0 = 0xc0 then
⟨2⟩
2
else if c &&& 0xf0 = 0xe0 then
⟨3⟩
3
else
⟨4⟩
4
def _root_.ByteArray.utf8DecodeChar?.FirstByte.utf8ByteSize : FirstByte → String.Pos.Raw
| .invalid => ⟨0⟩
| .done => ⟨1⟩
| .oneMore => ⟨2⟩
| .twoMore => ⟨3⟩
| .threeMore => ⟨4⟩
def _root_.ByteArray.utf8DecodeChar?.FirstByte.utf8ByteSize : FirstByte → Nat
| .invalid => 0
| .done => 1
| .oneMore => 2
| .twoMore => 3
| .threeMore => 4
theorem utf8ByteSize_eq_utf8ByteSize_parseFirstByte {c : UInt8} {h : c.IsUTF8FirstByte} :
c.utf8ByteSize h = (parseFirstByte c).utf8ByteSize := by
@ -1477,9 +1477,9 @@ public theorem ByteArray.isUTF8FirstByte_of_validateUTF8At {b : ByteArray} {i :
simp only [validateUTF8At_eq_isSome_utf8DecodeChar?]
exact isUTF8FirstByte_of_isSome_utf8DecodeChar?
theorem Char.byteIdx_utf8ByteSize_getElem_utf8EncodeChar {c : Char} :
(((String.utf8EncodeChar c)[0]'(by simp [c.utf8Size_pos])).utf8ByteSize
UInt8.isUTF8FirstByte_getElem_zero_utf8EncodeChar).byteIdx = c.utf8Size := by
theorem Char.utf8ByteSize_getElem_utf8EncodeChar {c : Char} :
((String.utf8EncodeChar c)[0]'(by simp [c.utf8Size_pos])).utf8ByteSize
UInt8.isUTF8FirstByte_getElem_zero_utf8EncodeChar = c.utf8Size := by
rw [UInt8.utf8ByteSize_eq_utf8ByteSize_parseFirstByte]
obtain (hc|hc|hc|hc) := c.utf8Size_eq
· rw [parseFirstByte_utf8EncodeChar_eq_done hc, FirstByte.utf8ByteSize, hc]
@ -1489,7 +1489,7 @@ theorem Char.byteIdx_utf8ByteSize_getElem_utf8EncodeChar {c : Char} :
public theorem ByteArray.utf8Size_utf8DecodeChar {b : ByteArray} {i} {h} :
(utf8DecodeChar b i h).utf8Size =
((b[i]'(lt_size_of_isSome_utf8DecodeChar? h)).utf8ByteSize (isUTF8FirstByte_of_isSome_utf8DecodeChar? h)).byteIdx := by
rw [← Char.byteIdx_utf8ByteSize_getElem_utf8EncodeChar]
(b[i]'(lt_size_of_isSome_utf8DecodeChar? h)).utf8ByteSize (isUTF8FirstByte_of_isSome_utf8DecodeChar? h) := by
rw [← Char.utf8ByteSize_getElem_utf8EncodeChar]
simp only [List.getElem_eq_getElem_toByteArray, utf8EncodeChar_utf8DecodeChar]
simp [ByteArray.getElem_extract]

View file

@ -76,8 +76,8 @@ namespace Internal
@[extern "lean_slice_memcmp"]
def memcmp (lhs rhs : @& Slice) (lstart : @& String.Pos.Raw) (rstart : @& String.Pos.Raw)
(len : @& String.Pos.Raw) (h1 : lstart + len ≤ lhs.utf8ByteSize)
(h2 : rstart + len ≤ rhs.utf8ByteSize) : Bool :=
(len : @& String.Pos.Raw) (h1 : len.offsetBy lstart ≤ lhs.rawEndPos)
(h2 : len.offsetBy rstart ≤ rhs.rawEndPos) : Bool :=
go 0
where
go (curr : String.Pos.Raw) : Bool :=
@ -88,7 +88,7 @@ where
have hr := by
simp [Pos.Raw.le_iff] at h h2 ⊢
omega
if lhs.getUTF8Byte (lstart + curr) hl == rhs.getUTF8Byte (rstart + curr) hr then
if lhs.getUTF8Byte (curr.offsetBy lstart) hl == rhs.getUTF8Byte (curr.offsetBy rstart) hr then
go curr.inc
else
false

View file

@ -61,7 +61,7 @@ instance (s : Slice) : Std.Iterators.Iterator (ForwardCharSearcher s) Id (Search
def finitenessRelation : Std.Iterators.FinitenessRelation (ForwardCharSearcher s) Id where
rel := InvImage WellFoundedRelation.rel
(fun it => s.utf8ByteSize.byteIdx - it.internalState.currPos.offset.byteIdx)
(fun it => s.utf8ByteSize - it.internalState.currPos.offset.byteIdx)
wf := InvImage.wf _ WellFoundedRelation.wf
subrelation {it it'} h := by
simp_wf

View file

@ -63,7 +63,7 @@ instance (s : Slice) : Std.Iterators.Iterator (ForwardCharPredSearcher s) Id (Se
def finitenessRelation : Std.Iterators.FinitenessRelation (ForwardCharPredSearcher s) Id where
rel := InvImage WellFoundedRelation.rel
(fun it => s.utf8ByteSize.byteIdx - it.internalState.currPos.offset.byteIdx)
(fun it => s.utf8ByteSize - it.internalState.currPos.offset.byteIdx)
wf := InvImage.wf _ WellFoundedRelation.wf
subrelation {it it'} h := by
simp_wf

View file

@ -33,12 +33,12 @@ partial def buildTable (pat : Slice) : Array String.Pos.Raw :=
if pat.utf8ByteSize == 0 then
#[]
else
let arr := Array.emptyWithCapacity pat.utf8ByteSize.byteIdx
let arr := Array.emptyWithCapacity pat.utf8ByteSize
let arr := arr.push 0
go ⟨1⟩ arr
where
go (pos : String.Pos.Raw) (table : Array String.Pos.Raw) :=
if h : pos < pat.utf8ByteSize then
if h : pos < pat.rawEndPos then
let patByte := pat.getUTF8Byte pos h
let distance := computeDistance table[table.size - 1]! patByte table
let distance := if patByte = pat.getUTF8Byte! distance then distance.inc else distance
@ -77,7 +77,7 @@ instance (s : Slice) : Std.Iterators.Iterator (ForwardSliceSearcher s) Id (Searc
| .proper needle table stackPos needlePos =>
(∃ newStackPos newNeedlePos,
stackPos < newStackPos ∧
newStackPos ≤ s.utf8ByteSize
newStackPos ≤ s.rawEndPos
it'.internalState = .proper needle table newStackPos newNeedlePos)
it'.internalState = .atEnd
| .atEnd => False
@ -94,7 +94,7 @@ instance (s : Slice) : Std.Iterators.Iterator (ForwardSliceSearcher s) Id (Searc
| .proper needle table stackPos needlePos =>
let rec findNext (startPos : String.Pos.Raw)
(currStackPos : String.Pos.Raw) (needlePos : String.Pos.Raw) (h : stackPos ≤ currStackPos) :=
if h1 : currStackPos < s.utf8ByteSize then
if h1 : currStackPos < s.rawEndPos then
let stackByte := s.getUTF8Byte currStackPos h1
let needlePos := backtrackIfNecessary needle table stackByte needlePos
let patByte := needle.getUTF8Byte! needlePos
@ -115,7 +115,7 @@ instance (s : Slice) : Std.Iterators.Iterator (ForwardSliceSearcher s) Id (Searc
⟨.yield ⟨.proper needle table nextStackPos needlePos⟩ res, hiter⟩
else
let needlePos := needlePos.inc
if needlePos == needle.utf8ByteSize then
if needlePos == needle.rawEndPos then
let nextStackPos := currStackPos.inc
let res := .matched (s.pos! startPos) (s.pos! nextStackPos)
have hiter := by
@ -135,12 +135,12 @@ instance (s : Slice) : Std.Iterators.Iterator (ForwardSliceSearcher s) Id (Searc
omega
findNext startPos currStackPos.inc needlePos hinv
else
if startPos != s.utf8ByteSize then
if startPos != s.rawEndPos then
let res := .rejected (s.pos! startPos) (s.pos! currStackPos)
⟨.yield ⟨.atEnd⟩ res, by simp⟩
else
⟨.done, by simp⟩
termination_by s.utf8ByteSize.byteIdx - currStackPos.byteIdx
termination_by s.utf8ByteSize - currStackPos.byteIdx
decreasing_by
simp at h1 ⊢
omega
@ -149,8 +149,8 @@ instance (s : Slice) : Std.Iterators.Iterator (ForwardSliceSearcher s) Id (Searc
| .atEnd => pure ⟨.done, by simp⟩
private def toPair : ForwardSliceSearcher s → (Nat × Nat)
| .empty pos => (1, s.utf8ByteSize.byteIdx - pos.offset.byteIdx)
| .proper _ _ sp _ => (1, s.utf8ByteSize.byteIdx - sp.byteIdx)
| .empty pos => (1, s.utf8ByteSize - pos.offset.byteIdx)
| .proper _ _ sp _ => (1, s.utf8ByteSize - sp.byteIdx)
| .atEnd => (0, 0)
private instance : WellFoundedRelation (ForwardSliceSearcher s) where
@ -213,14 +213,14 @@ def startsWith (s : Slice) (pat : Slice) : Bool :=
omega
have hp := by
simp [Pos.Raw.le_iff]
Internal.memcmp s pat s.startPos.offset pat.startPos.offset pat.utf8ByteSize hs hp
Internal.memcmp s pat s.startPos.offset pat.startPos.offset pat.rawEndPos hs hp
else
false
@[inline]
def dropPrefix? (s : Slice) (pat : Slice) : Option Slice :=
if startsWith s pat then
some <| s.replaceStart <| s.pos! <| s.startPos.offset + pat.utf8ByteSize
some <| s.replaceStart <| s.pos! <| pat.rawEndPos.offsetBy s.startPos.offset
else
none
@ -242,21 +242,21 @@ namespace BackwardSliceSearcher
@[inline]
def endsWith (s : Slice) (pat : Slice) : Bool :=
if h : pat.utf8ByteSize ≤ s.utf8ByteSize then
let sStart := s.endPos.offset - pat.utf8ByteSize
let sStart := s.endPos.offset.unoffsetBy pat.rawEndPos
let patStart := pat.startPos.offset
have hs := by
simp [sStart, Pos.Raw.le_iff] at h ⊢
omega
have hp := by
simp [patStart, Pos.Raw.le_iff] at h ⊢
Internal.memcmp s pat sStart patStart pat.utf8ByteSize hs hp
Internal.memcmp s pat sStart patStart pat.rawEndPos hs hp
else
false
@[inline]
def dropSuffix? (s : Slice) (pat : Slice) : Option Slice :=
if endsWith s pat then
some <| s.replaceEnd <| s.pos! <| s.endPos.offset - pat.utf8ByteSize
some <| s.replaceEnd <| s.pos! <| s.endPos.offset.unoffsetBy pat.rawEndPos
else
none

View file

@ -61,7 +61,7 @@ def beq (s1 s2 : Slice) : Bool :=
if h : s1.utf8ByteSize = s2.utf8ByteSize then
have h1 := by simp [h, String.Pos.Raw.le_iff]
have h2 := by simp [h, String.Pos.Raw.le_iff]
Internal.memcmp s1 s2 s1.startPos.offset s2.startPos.offset s1.utf8ByteSize h1 h2
Internal.memcmp s1 s2 s1.startPos.offset s2.startPos.offset s1.rawEndPos h1 h2
else
false
@ -687,7 +687,7 @@ def eqIgnoreAsciiCase (s1 s2 : Slice) : Bool :=
s1.utf8ByteSize == s2.utf8ByteSize && go s1 s1.startPos.offset s2 s2.startPos.offset
where
go (s1 : Slice) (s1Curr : String.Pos.Raw) (s2 : Slice) (s2Curr : String.Pos.Raw) : Bool :=
if h : s1Curr < s1.utf8ByteSize ∧ s2Curr < s2.utf8ByteSize then
if h : s1Curr < s1.rawEndPos ∧ s2Curr < s2.rawEndPos then
let c1 := (s1.getUTF8Byte s1Curr h.left).toAsciiLower
let c2 := (s2.getUTF8Byte s2Curr h.right).toAsciiLower
if c1 == c2 then
@ -695,7 +695,7 @@ where
else
false
else
s1Curr == s1.utf8ByteSize && s2Curr == s2.utf8ByteSize
s1Curr == s1.rawEndPos && s2Curr == s2.rawEndPos
termination_by s1.endPos.offset.byteIdx - s1Curr.byteIdx
decreasing_by
simp at h ⊢
@ -740,7 +740,7 @@ instance [Pure m] :
private def finitenessRelation [Pure m] :
Std.Iterators.FinitenessRelation (PosIterator s) m where
rel := InvImage WellFoundedRelation.rel
(fun it => s.utf8ByteSize.byteIdx - it.internalState.currPos.offset.byteIdx)
(fun it => s.utf8ByteSize - it.internalState.currPos.offset.byteIdx)
wf := InvImage.wf _ WellFoundedRelation.wf
subrelation {it it'} h := by
simp_wf
@ -897,14 +897,14 @@ namespace ByteIterator
instance [Pure m] : Std.Iterators.Iterator ByteIterator m UInt8 where
IsPlausibleStep it
| .yield it' out =>
∃ h1 : it.internalState.offset < it.internalState.s.utf8ByteSize,
∃ h1 : it.internalState.offset < it.internalState.s.rawEndPos,
it.internalState.s = it'.internalState.s ∧
it'.internalState.offset = it.internalState.offset.inc ∧
it.internalState.s.getUTF8Byte it.internalState.offset h1 = out
| .skip _ => False
| .done => ¬ it.internalState.offset < it.internalState.s.utf8ByteSize
| .done => ¬ it.internalState.offset < it.internalState.s.rawEndPos
step := fun ⟨s, offset⟩ =>
if h : offset < s.utf8ByteSize then
if h : offset < s.rawEndPos then
pure ⟨.yield ⟨s, offset.inc⟩ (s.getUTF8Byte offset h), by simp [h]⟩
else
pure ⟨.done, by simp [h]⟩
@ -912,7 +912,7 @@ instance [Pure m] : Std.Iterators.Iterator ByteIterator m UInt8 where
private def finitenessRelation [Pure m] :
Std.Iterators.FinitenessRelation (ByteIterator) m where
rel := InvImage WellFoundedRelation.rel
(fun it => it.internalState.s.utf8ByteSize.byteIdx - it.internalState.offset.byteIdx)
(fun it => it.internalState.s.utf8ByteSize - it.internalState.offset.byteIdx)
wf := InvImage.wf _ WellFoundedRelation.wf
subrelation {it it'} h := by
simp_wf
@ -951,7 +951,7 @@ end ByteIterator
structure RevByteIterator where
s : Slice
offset : String.Pos.Raw
hinv : offset ≤ s.utf8ByteSize
hinv : offset ≤ s.rawEndPos
set_option doc.verso false
/--
@ -977,7 +977,7 @@ namespace RevByteIterator
instance [Pure m] : Std.Iterators.Iterator RevByteIterator m UInt8 where
IsPlausibleStep it
| .yield it' out =>
∃ h1 : it.internalState.offset.dec < it.internalState.s.utf8ByteSize,
∃ h1 : it.internalState.offset.dec < it.internalState.s.rawEndPos,
it.internalState.s = it'.internalState.s ∧
it.internalState.offset ≠ 0 ∧
it'.internalState.offset = it.internalState.offset.dec ∧

View file

@ -95,7 +95,7 @@ partial def toPosition (fmap : FileMap) (pos : String.Pos.Raw) : Position :=
-- Some systems like the delaborator use synthetic positions without an input file,
-- which would violate `toPositionAux`'s invariant.
-- Can also happen with EOF errors, which are not strictly inside the file.
⟨fmap.getLastLine, (pos - ps.back!).byteIdx
⟨fmap.getLastLine, ps.back!.byteDistance pos
/-- Convert a `Lean.Position` to a `String.Pos`. -/
def ofPosition (text : FileMap) (pos : Position) : String.Pos.Raw :=

View file

@ -45,7 +45,7 @@ def validateDocComment
for (⟨start, stop⟩, err) in errs do
-- Report errors at their actual location if possible
if let some pos := pos? then
let urlStx : Syntax := .atom (.synthetic (pos + start) (pos + stop)) (str.extract start stop)
let urlStx : Syntax := .atom (.synthetic (start.offsetBy pos) (stop.offsetBy pos)) (str.extract start stop)
logErrorAt urlStx err
else
logError err

View file

@ -215,11 +215,11 @@ def getModuleDoc? (env : Environment) (moduleName : Name) : Option (Array Module
def getDocStringText [Monad m] [MonadError m] (stx : TSyntax `Lean.Parser.Command.docComment) : m String :=
match stx.raw[1] with
| Syntax.atom _ val =>
return val.extract 0 (val.endPos - ⟨2⟩)
return val.extract 0 (val.endPos.unoffsetBy ⟨2⟩)
| Syntax.node _ `Lean.Parser.Command.versoCommentBody _ =>
match stx.raw[1][0] with
| Syntax.atom _ val =>
return val.extract 0 (val.endPos - ⟨2⟩)
return val.extract 0 (val.endPos.unoffsetBy ⟨2⟩)
| _ =>
throwErrorAt stx "unexpected doc string{indentD stx}"
| _ =>

View file

@ -689,11 +689,11 @@ mutual
let info : SourceInfo :=
match info with
| .none => .none
| .synthetic start stop c => .synthetic (start + ⟨1⟩) (stop - ⟨1⟩) c
| .synthetic start stop c => .synthetic (start.offsetBy ⟨1⟩) (stop.unoffsetBy ⟨1⟩) c
| .original leading start trailing stop =>
.original
{leading with stopPos := leading.stopPos + ⟨1⟩} (start + ⟨1⟩)
{trailing with startPos := trailing.startPos - ⟨1⟩} (stop - ⟨1⟩)
{leading with stopPos := leading.stopPos.offsetBy ⟨1⟩} (start.offsetBy ⟨1⟩)
{trailing with startPos := trailing.startPos.unoffsetBy ⟨1⟩} (stop.unoffsetBy ⟨1⟩)
return s.popSyntax.pushSyntax (.atom info str)
return s

View file

@ -29,7 +29,7 @@ namespace Lean.Elab.Command
match stx[1] with
| Syntax.atom _ val =>
if getVersoModuleDocs (← getEnv) |>.isEmpty then
let doc := val.extract 0 (val.endPos - ⟨2⟩)
let doc := val.extract 0 (val.endPos.unoffsetBy ⟨2⟩)
modifyEnv fun env => addMainModuleDoc env ⟨doc, range⟩
else
throwError m!"Can't add Markdown-format module docs because there is already Verso-format content present."

View file

@ -162,7 +162,7 @@ def expandOptDocComment? [Monad m] [MonadError m] (optDocComment : Syntax) : m (
match optDocComment.getOptional? with
| none => return none
| some s => match s[1] with
| .atom _ val => return some (val.extract 0 (val.endPos - ⟨2⟩))
| .atom _ val => return some (val.extract 0 (val.endPos.unoffsetBy ⟨2⟩))
| _ => throwErrorAt s "unexpected doc string{indentD s[1]}"
section Methods

View file

@ -188,7 +188,7 @@ column `range` starts at in that line. -/
def getIndentAndColumn (map : FileMap) (range : String.Range) : Nat × Nat :=
let start := map.source.findLineStart range.start
let body := map.source.findAux (· ≠ ' ') range.start start
((body - start).1, (range.start - start).1)
(start.byteDistance body, start.byteDistance range.start)
/--
An option allowing the user to customize the ideal input width. Defaults to 100.

View file

@ -937,7 +937,7 @@ private def isToken (idStartPos idStopPos : String.Pos.Raw) (tk : Option Token)
| some tk =>
-- if a token is both a symbol and a valid identifier (i.e. a keyword),
-- we want it to be recognized as a symbol
tk.endPos ≥ idStopPos - idStartPos
tk.utf8ByteSize ≥ idStartPos.byteDistance idStopPos
def mkTokenAndFixPos (startPos : String.Pos.Raw) (tk : Option Token) : ParserFn := fun c s =>

View file

@ -75,7 +75,7 @@ where
let tailPos := info.tailPos?.get!
let hoverInfo :=
if hoverPos < tailPos then
HoverInfo.inside (hoverPos - headPos).byteIdx
HoverInfo.inside (headPos.byteDistance hoverPos)
else
HoverInfo.after
let ⟨headPosLine, _⟩ := fileMap.toPosition headPos

View file

@ -100,7 +100,7 @@ private def findSyntheticIdentifierCompletion?
let tailPos := stx.getTailPos?.get!
let hoverInfo :=
if hoverPos < tailPos then
HoverInfo.inside (tailPos - hoverPos).byteIdx
HoverInfo.inside (hoverPos.byteDistance tailPos)
else
HoverInfo.after
some { hoverInfo, ctx, info := .id stx id danglingDot info.lctx none }
@ -110,7 +110,7 @@ private partial def isCursorOnWhitespace (fileMap : FileMap) (hoverPos : String.
private partial def isCursorInProperWhitespace (fileMap : FileMap) (hoverPos : String.Pos.Raw) : Bool :=
(fileMap.source.atEnd hoverPos || (fileMap.source.get hoverPos).isWhitespace)
&& (fileMap.source.get (hoverPos - ⟨1⟩)).isWhitespace
&& (fileMap.source.get (hoverPos.unoffsetBy ⟨1⟩)).isWhitespace
private partial def isSyntheticTacticCompletion
(fileMap : FileMap)

View file

@ -105,7 +105,7 @@ private def isPositionInLineComment (text : FileMap) (pos : String.Pos.Raw) : Bo
let line := text.source.extract lineStartPos lineEndPos
let some lineCommentPos := lineCommentPosition? line
| return false
return pos >= lineStartPos + lineCommentPos
return pos >= lineCommentPos.offsetBy lineStartPos
open CandidateKind in
def findSignatureHelp? (text : FileMap) (ctx? : Option Lsp.SignatureHelpContext) (cmdStx : Syntax)

View file

@ -192,7 +192,7 @@ private def contains (query text : String) : Bool :=
! (kmpSearch query text).isEmpty
private def matchEndPos (query : String) (startPos : String.Pos.Raw) : String.Pos.Raw :=
startPos + query.utf8ByteSize⟩
startPos + query
@[specialize]
private def hightlightStringMatches? (query text : String) (matchPositions : Array String.Pos.Raw)
@ -208,13 +208,13 @@ private def hightlightStringMatches? (query text : String) (matchPositions : Arr
break
let i := mapIdx i
let globalMatchPos := matchPositions[i]!
let matchPos := globalMatchPos - offset
let matchPos := globalMatchPos.unoffsetBy offset
if matchPos >= text.endPos then
break
if let some nonMatch := nonMatch? p matchPos then
r := r.push nonMatch
let globalMatchEndPos := matchEndPos query globalMatchPos
let matchEndPos := globalMatchEndPos - offset
let matchEndPos := globalMatchEndPos.unoffsetBy offset
let «match» := text.extract matchPos matchEndPos
r := r.push <| .tag highlight (.text «match»)
p := matchEndPos
@ -255,7 +255,7 @@ private def advanceTaggedTextHighlightState (text : String) (highlighted : α) :
where
updateState (text : String) (isHighlighted : Bool) : StateM TaggedTextHighlightState Unit :=
modify fun s =>
let p : String.Pos.Raw := s.p + ⟨text.utf8ByteSize⟩
let p : String.Pos.Raw := s.p.increaseBy text.utf8ByteSize
let ms := updateMatches s.query s.ms p
let anyHighlight := s.anyHighlight || isHighlighted
{ s with p, ms, anyHighlight }

View file

@ -212,7 +212,7 @@ def Info.contains (i : Info) (pos : String.Pos.Raw) (includeStop := false) : Boo
def Info.size? (i : Info) : Option String.Pos.Raw := do
let pos ← i.pos?
let tailPos ← i.tailPos?
return tailPos - pos
return tailPos.unoffsetBy pos
-- `Info` without position information are considered to have "infinite" size
def Info.isSmaller (i₁ i₂ : Info) : Bool :=
@ -225,7 +225,7 @@ def Info.occursInside? (i : Info) (hoverPos : String.Pos.Raw) : Option String.Po
let headPos ← i.pos?
let tailPos ← i.tailPos?
guard (headPos ≤ hoverPos && hoverPos < tailPos)
return hoverPos - headPos
return hoverPos.unoffsetBy headPos
def Info.occursInOrOnBoundary (i : Info) (hoverPos : String.Pos.Raw) : Bool := Id.run do
let some headPos := i.pos?
@ -238,7 +238,7 @@ def InfoTree.smallestInfo? (p : Info → Bool) (t : InfoTree) : Option (ContextI
let ts := t.deepestNodes fun ctx i _ => if p i then some (ctx, i) else none
let infos := ts.filterMap fun (ci, i) => do
let diff := (← i.tailPos?) - (← i.pos?)
let diff := (← i.pos?).byteDistance (← i.tailPos?)
return (diff, ci, i)
infos.toArray.getMax? (fun a b => a.1 > b.1) |>.map fun (_, ci, i) => (ci, i)
@ -306,7 +306,7 @@ partial def InfoTree.hoverableInfoAtM? [Monad m] (t : InfoTree) (hoverPos : Stri
return none
let priority : HoverableInfoPrio := {
isHoverPosOnStop := r.stop == hoverPos
size := (r.stop - r.start).byteIdx
size := r.start.byteDistance r.stop
isVariableInfo := info matches .ofTermInfo { expr := .fvar .., .. }
isPartialTermInfo := info matches .ofPartialTermInfo ..
}

View file

@ -247,7 +247,7 @@ private def updateInfo : SourceInfo → String.Pos.Raw → String.Pos.Raw → So
| info, _, _ => info
private def chooseNiceTrailStop (trail : Substring) : String.Pos.Raw :=
trail.startPos + trail.posOf '\n'
(trail.posOf '\n').offsetBy trail.startPos
/-- Remark: the State `String.Pos` is the `SourceInfo.trailing.stopPos` of the previous token,
or the beginning of the String. -/
@ -318,10 +318,10 @@ def identComponents (stx : Syntax) (nFields? : Option Nat := none) : List Syntax
rawComps
if nameComps.length == rawComps.length then
return nameComps.zip rawComps |>.map fun (id, ss) =>
let off := ss.startPos - rawStr.startPos
let off := ss.startPos.unoffsetBy rawStr.startPos
let lead := if off == 0 then lead else "".toSubstring
let trail := if ss.stopPos == rawStr.stopPos then trail else "".toSubstring
let info := original lead (pos + off) trail (pos + off + ⟨ss.bsize⟩)
let info := original lead (pos.offsetBy off) trail (pos.offsetBy off |>.offsetBy ⟨ss.bsize⟩)
ident info ss id []
-- if re-parsing failed, just give them all the same span
nameComps.map fun n => ident si n.toString.toSubstring n []

View file

@ -1,4 +1,4 @@
def showChars : Nat → String → String.Pos → IO Unit
def showChars : Nat → String → String.Pos.Raw → IO Unit
| 0, _, _ => pure ()
| n+1, s, idx => do
unless s.atEnd idx do
@ -7,14 +7,14 @@ def showChars : Nat → String → String.Pos → IO Unit
def main : IO UInt32 :=
let s₁ := "hello α_world_β";
let b : String.Pos := 0;
let b : String.Pos.Raw := 0;
let e := s₁.endPos;
IO.println (s₁.extract b e) *>
IO.println (s₁.extract (b+ " ") e) *>
IO.println (s₁.extract (b+ " ") (e-⟨1⟩)) *>
IO.println (s₁.extract (b+⟨2⟩) (e-⟨2⟩)) *>
IO.println (s₁.extract (b+⟨7⟩) e) *>
IO.println (s₁.extract (b+⟨8⟩) e) *>
IO.println (s₁.extract (b+ " ") (e.unoffsetBy ⟨1⟩)) *>
IO.println (s₁.extract (b.offsetBy ⟨2⟩) (e.unoffsetBy ⟨2⟩)) *>
IO.println (s₁.extract (b.offsetBy ⟨7⟩) e) *>
IO.println (s₁.extract (b.offsetBy ⟨8⟩) e) *>
IO.println (toString e) *>
IO.println (repr " aaa ".trim) *>
showChars s₁.length s₁ 0 *>

View file

@ -60,7 +60,7 @@ end
open Lean Elab Command in
@[command_elab commandComment] def elabCommandComment : CommandElab := fun stx => do
let .atom _ val := stx[1] | return ()
let str := val.extract 0 (val.endPos - ⟨3⟩)
let str := val.extract 0 (val.endPos.unoffsetBy ⟨3⟩)
IO.println s!"str := {repr str}"
//- My command comment hello world -//