refactor: discipline around arithmetic of String.Pos.Raw (#10713)

This PR enforces rules around arithmetic of `String.Pos.Raw`. Specifically, it adopts the following conventions: - Byte indices ("ordinals") in strings should be represented using `String.Pos.Raw` - Amounts of bytes ("cardinals") in strings should be represented using `Nat`. For example, `String.Slice.utf8ByteSize` now returns `Nat` instead of `String.Pos.Raw`, and there is a new function `String.Slice.rawEndPos`. Finally, the `HAdd` and `HSub` instances for `String.Pos.Raw` are reorganized. This is a **breaking change**. The `HAdd/HSub String.Pos.Raw String.Pos.Raw String.Pos.Raw` instances have been removed. For the use case of tracking positions relative to some other position, we instead provide `offsetBy` and `unoffsetBy` functions. For the use case of advancing/unadvancing a position by an arbitrary number of bytes, we instead provide `increaseBy` and `decreaseBy` functions. For offsetting/unoffsetting/advancing/unadvancing a position `p` by the size of a string `s` (resp. character `c`), use `s + p`/`p - s`/`p + s`/`p - s` (resp. `c + p`/`p - c`/`p + c`/`p - c`).
2025-10-09 09:47:45 +02:00 · 2025-10-09 09:47:45 +02:00 · dca8d6d188
commit dca8d6d188
parent 6f1e932542
23 changed files with 356 additions and 194 deletions
--- a/src/Init/Data/String/Basic.lean
+++ b/src/Init/Data/String/Basic.lean
@ -110,7 +110,7 @@ where
      else
        match h : validateUTF8At b i with
        | false => false
-        | true => go fuel (i + (b[i].utf8ByteSize (isUTF8FirstByte_of_validateUTF8At h)).byteIdx)
+        | true => go fuel (i + b[i].utf8ByteSize (isUTF8FirstByte_of_validateUTF8At h))
            ?_ ?_
  termination_by structural fuel
 finally
@ -475,19 +475,25 @@ end

 namespace String

-instance : HAdd String.Pos.Raw String.Pos.Raw String.Pos.Raw where
-  hAdd p₁ p₂ := { byteIdx := p₁.byteIdx + p₂.byteIdx }
+instance : HSub String.Pos.Raw String String.Pos.Raw where
+  hSub p s := { byteIdx := p.byteIdx - s.utf8ByteSize }

-instance : HSub String.Pos.Raw String.Pos.Raw String.Pos.Raw where
-  hSub p₁ p₂ := { byteIdx :=  p₁.byteIdx - p₂.byteIdx }
+instance : HSub String.Pos.Raw Char String.Pos.Raw where
+  hSub p c := { byteIdx := p.byteIdx - c.utf8Size }

@[export lean_string_pos_sub]
 def Pos.Internal.subImpl : String.Pos.Raw → String.Pos.Raw → String.Pos.Raw :=
-  (· - ·)
+  fun p₁ p₂ => ⟨p₁.byteIdx - p₂.byteIdx⟩

 instance : HAdd String.Pos.Raw Char String.Pos.Raw where
  hAdd p c := { byteIdx := p.byteIdx + c.utf8Size }

+instance : HAdd Char String.Pos.Raw String.Pos.Raw where
+  hAdd c p := { byteIdx := c.utf8Size + p.byteIdx }
+
+instance : HAdd String String.Pos.Raw String.Pos.Raw where
+  hAdd s p := { byteIdx := s.utf8ByteSize + p.byteIdx }
+
 instance : HAdd String.Pos.Raw String String.Pos.Raw where
  hAdd p s := { byteIdx := p.byteIdx + s.utf8ByteSize }

@ -713,17 +719,36 @@ theorem Pos.Raw.isValid_singleton {c : Char} {p : Pos.Raw} :
    · exact ⟨0, by simp⟩
    · exact ⟨1, by simp [hi, ← singleton_eq_asString]⟩

-@[simp]
-theorem Pos.Raw.byteIdx_sub {p₁ p₂ : Pos.Raw} : (p₁ - p₂).byteIdx = p₁.byteIdx - p₂.byteIdx := rfl
+/--
+Returns the size of the byte slice delineated by the positions `lo` and `hi`.
+-/
+@[expose, inline]
+def Pos.Raw.byteDistance (lo hi : Pos.Raw) : Nat :=
+  hi.byteIdx - lo.byteIdx
+
+theorem Pos.Raw.byteDistance_eq {lo hi : Pos.Raw} : lo.byteDistance hi = hi.byteIdx - lo.byteIdx :=
+  (rfl)

@[simp]
-theorem Pos.Raw.byteIdx_add {p₁ p₂ : Pos.Raw} : (p₁ + p₂).byteIdx = p₁.byteIdx + p₂.byteIdx := rfl
+theorem Pos.Raw.byteIdx_sub_char {p : Pos.Raw} {c : Char} : (p - c).byteIdx = p.byteIdx - c.utf8Size := rfl

@[simp]
-theorem Pos.Raw.byteIdx_addChar {p : Pos.Raw} {c : Char} : (p + c).byteIdx = p.byteIdx + c.utf8Size := rfl
+theorem Pos.Raw.byteIdx_sub_string {p : Pos.Raw} {s : String} : (p - s).byteIdx = p.byteIdx - s.utf8ByteSize := rfl
+
+@[simp]
+theorem Pos.Raw.byteIdx_add_string {p : Pos.Raw} {s : String} : (p + s).byteIdx = p.byteIdx + s.utf8ByteSize := rfl
+
+@[simp]
+theorem Pos.Raw.byteIdx_string_add {s : String} {p : Pos.Raw} : (s + p).byteIdx = s.utf8ByteSize + p.byteIdx := rfl
+
+@[simp]
+theorem Pos.Raw.byteIdx_add_char {p : Pos.Raw} {c : Char} : (p + c).byteIdx = p.byteIdx + c.utf8Size := rfl
+
+@[simp]
+theorem Pos.Raw.byteIdx_char_add {c : Char} {p : Pos.Raw} : (c + p).byteIdx = c.utf8Size + p.byteIdx := rfl

 theorem Pos.Raw.isValid_append {s t : String} {p : Pos.Raw} :
-    p.IsValid (s ++ t) ↔ p.IsValid s ∨ (s.endPos ≤ p ∧ (p - s.endPos).IsValid t) := by
+    p.IsValid (s ++ t) ↔ p.IsValid s ∨ (s.endPos ≤ p ∧ (p - s).IsValid t) := by
  obtain ⟨s, rfl⟩ := exists_eq_asString s
  obtain ⟨t, rfl⟩ := exists_eq_asString t
  rw [← List.asString_append, Pos.Raw.isValid_asString, Pos.Raw.isValid_asString, Pos.Raw.isValid_asString]
@ -738,15 +763,15 @@ theorem Pos.Raw.isValid_append {s t : String} {p : Pos.Raw} :
    · refine ⟨min j s.length, ?_⟩
      rw [List.take_append_of_le_length (Nat.min_le_right ..), ← List.take_eq_take_min, hj]
    · refine ⟨s.length + j, ?_⟩
-      simp only [Pos.Raw.byteIdx_sub, byteIdx_endPos, Pos.Raw.le_iff] at hj h
+      simp only [Pos.Raw.byteIdx_sub_string, byteIdx_endPos, Pos.Raw.le_iff] at hj h
      simp only [List.take_append, List.take_of_length_le (i := s.length + j) (l := s) (by omega),
        Nat.add_sub_cancel_left, List.asString_append, utf8ByteSize_append]
      omega

 theorem Pos.Raw.IsValid.append_left {t : String} {p : Pos.Raw} (h : p.IsValid t) (s : String) :
-    (s.endPos + p).IsValid (s ++ t) :=
+    (s + p).IsValid (s ++ t) :=
  isValid_append.2 (Or.inr ⟨by simp [Pos.Raw.le_iff], by
-    suffices p = s.endPos + p - s.endPos by simp [← this, h]
+    suffices p = s + p - s by simp [← this, h]
    simp [Pos.Raw.ext_iff]⟩)

 theorem Pos.Raw.IsValid.append_right {s : String} {p : Pos.Raw} (h : p.IsValid s) (t : String) :
@ -760,7 +785,7 @@ theorem append_singleton {s : String} {c : Char} : s ++ singleton c = s.push c :
 theorem Pos.Raw.isValid_push {s : String} {c : Char} {p : Pos.Raw} :
    p.IsValid (s.push c) ↔ p.IsValid s ∨ p = s.endPos + c := by
  rw [← append_singleton, isValid_append, isValid_singleton]
-  simp only [le_iff, byteIdx_endPos, Pos.Raw.ext_iff, byteIdx_sub, byteIdx_zero, byteIdx_addChar]
+  simp only [le_iff, byteIdx_endPos, Pos.Raw.ext_iff, byteIdx_sub_string, byteIdx_zero, byteIdx_add_char]
  refine ⟨?_, ?_⟩
  · rintro (h|⟨h₁,(h₂|h₂)⟩)
    · exact Or.inl h
@ -1064,17 +1089,79 @@ def toSlice (s : String) : Slice where

 /-- The number of bytes of the UTF-8 encoding of the string slice. -/
@[expose]
-def Slice.utf8ByteSize (s : Slice) : Pos.Raw :=
-  s.endExclusive.offset - s.startInclusive.offset
+def Slice.utf8ByteSize (s : Slice) : Nat :=
+  s.startInclusive.offset.byteDistance s.endExclusive.offset
+
+theorem Slice.utf8ByteSize_eq {s : Slice} :
+    s.utf8ByteSize = s.endExclusive.offset.byteIdx - s.startInclusive.offset.byteIdx := (rfl)
+
+instance : HAdd Pos.Raw Slice Pos.Raw where
+  hAdd p s := { byteIdx := p.byteIdx + s.utf8ByteSize }
+
+instance : HAdd Slice Pos.Raw Pos.Raw where
+  hAdd s p := { byteIdx := s.utf8ByteSize + p.byteIdx }
+
+instance : HSub Pos.Raw Slice Pos.Raw where
+  hSub p s := { byteIdx := p.byteIdx - s.utf8ByteSize }

@[simp]
-theorem Slice.byteIdx_utf8ByteSize {s : Slice} :
-    s.utf8ByteSize.byteIdx = s.endExclusive.offset.byteIdx - s.startInclusive.offset.byteIdx := (rfl)
+theorem Pos.Raw.byteIdx_add_slide {p : Pos.Raw} {s : Slice} :
+    (p + s).byteIdx = p.byteIdx + s.utf8ByteSize := rfl
+
+@[simp]
+theorem Pos.Raw.byteIdx_slice_add {s : Slice} {p : Pos.Raw} :
+    (s + p).byteIdx = s.utf8ByteSize + p.byteIdx := rfl
+
+@[simp]
+theorem Pos.Raw.byteIdx_sub_slice {p : Pos.Raw} {s : Slice} :
+    (p - s).byteIdx = p.byteIdx - s.utf8ByteSize := rfl
+
+/-- The end position of a slice, as a `Pos.Raw`. -/
+@[expose]
+def Slice.rawEndPos (s : Slice) : Pos.Raw where
+  byteIdx := s.utf8ByteSize
+
+@[simp]
+theorem Slice.byteIdx_rawEndPos {s : Slice} : s.rawEndPos.byteIdx = s.utf8ByteSize := (rfl)
+
+/--
+Offsets `p` by `offset` on the left. This is not an `HAdd` instance because it should be a
+relatively rare operation, so we use a name to make accidental use less likely. To offset a position
+by the size of a character character `c` or string `s`, you can use `c + p` resp. `s + p`.
+
+This should be seen as an operation that converts relative positions into absolute positions.
+
+See also `Pos.Raw.increaseBy`, which is an "advancing" operation.
+-/
+@[expose, inline]
+def Pos.Raw.offsetBy (p : Pos.Raw) (offset : Pos.Raw) : Pos.Raw where
+  byteIdx := offset.byteIdx + p.byteIdx
+
+@[simp]
+theorem Pos.Raw.byteIdx_offsetBy {p : Pos.Raw} {offset : Pos.Raw} :
+    (p.offsetBy offset).byteIdx = offset.byteIdx + p.byteIdx := (rfl)
+
+/--
+Decreases `p` by `offset`. This is not an `HSub` instance because it should be a relatively
+rare operation, so we use a name to make accidental use less likely. To unoffset a position
+by the size of a character `c` or string `s`, you can use `p - c` resp. `p - s`.
+
+This should be seen as an operation that converts absolute positions into relative positions.
+
+See also `Pos.Raw.decreaseBy`, which is an "unadvancing" operation.
+-/
+@[expose, inline]
+def Pos.Raw.unoffsetBy (p : Pos.Raw) (offset : Pos.Raw) : Pos.Raw where
+  byteIdx := p.byteIdx - offset.byteIdx
+
+@[simp]
+theorem Pos.Raw.byteIdx_unoffsetBy {p : Pos.Raw} {offset : Pos.Raw} :
+    (p.unoffsetBy offset).byteIdx = p.byteIdx - offset.byteIdx := (rfl)

 /-- Criterion for validity of positions in string slices. -/
 structure Pos.Raw.IsValidForSlice (s : Slice) (p : Pos.Raw) : Prop where
-  le_utf8ByteSize : p ≤ s.utf8ByteSize
-  isValid_add : (s.startInclusive.offset + p).IsValid s.str
+  le_utf8ByteSize : p ≤ s.rawEndPos
+  isValid_offsetBy : (p.offsetBy s.startInclusive.offset).IsValid s.str

 /--
 Accesses the indicated byte in the UTF-8 encoding of a string slice.
@ -1082,10 +1169,11 @@ Accesses the indicated byte in the UTF-8 encoding of a string slice.
 At runtime, this function is implemented by efficient, constant-time code.
 -/
@[inline, expose]
-def Slice.getUTF8Byte (s : Slice) (p : Pos.Raw) (h : p < s.utf8ByteSize) : UInt8 :=
-  s.str.getUTF8Byte (s.startInclusive.offset + p) (by
+def Slice.getUTF8Byte (s : Slice) (p : Pos.Raw) (h : p < s.rawEndPos) : UInt8 :=
+  s.str.getUTF8Byte (p.offsetBy s.startInclusive.offset) (by
    have := s.endExclusive.isValid.le_endPos
-    simp only [Pos.Raw.lt_iff, byteIdx_utf8ByteSize, Pos.Raw.le_iff, byteIdx_endPos, Pos.Raw.byteIdx_add] at *
+    simp only [Pos.Raw.lt_iff, byteIdx_rawEndPos, utf8ByteSize_eq, Pos.Raw.le_iff, byteIdx_endPos,
+      Pos.Raw.byteIdx_offsetBy] at *
    omega)

 /--
@ -1094,7 +1182,7 @@ is out-of-bounds.
 -/
@[expose]
 def Slice.getUTF8Byte! (s : Slice) (p : String.Pos.Raw) : UInt8 :=
-  if h : p < s.utf8ByteSize then
+  if h : p < s.rawEndPos then
    s.getUTF8Byte p h
  else
    panic! "String slice access is out of bounds."
@ -1119,10 +1207,10 @@ theorem Slice.utf8ByteSize_copy {s : Slice} :
  rw [Nat.min_eq_left (by simpa [Pos.Raw.le_iff] using s.endExclusive.isValid.le_endPos)]

@[simp]
-theorem Slice.endPos_copy {s : Slice} : s.copy.endPos = s.utf8ByteSize := by
-  simp [Pos.Raw.ext_iff]
+theorem Slice.endPos_copy {s : Slice} : s.copy.endPos = s.rawEndPos := by
+  simp [Pos.Raw.ext_iff, utf8ByteSize_eq]

-theorem Slice.getUTF8Byte_eq_getUTF8Byte_copy {s : Slice} {p : Pos.Raw} {h : p < s.utf8ByteSize} :
+theorem Slice.getUTF8Byte_eq_getUTF8Byte_copy {s : Slice} {p : Pos.Raw} {h : p < s.rawEndPos} :
    s.getUTF8Byte p h = s.copy.getUTF8Byte p (by simpa) := by
  simp [getUTF8Byte, String.getUTF8Byte, bytes_copy, ByteArray.getElem_extract]

@ -1134,19 +1222,16 @@ theorem Slice.isUTF8FirstByte_utf8ByteAt_zero {s : Slice} {h} :
    (s.getUTF8Byte 0 h).IsUTF8FirstByte := by
  simpa [getUTF8Byte_eq_getUTF8Byte_copy] using s.copy.isUTF8FirstByte_getUTF8Byte_zero

-@[simp]
-theorem Pos.Raw.add_zero {p : Pos.Raw} : p + 0 = p := by
-  simp [Pos.Raw.ext_iff]
-
@[simp]
 theorem Pos.Raw.isValid_copy_iff {s : Slice} {p : Pos.Raw} :
    p.IsValid s.copy ↔ p.IsValidForSlice s := by
  refine ⟨fun ⟨h₁, h₂⟩ => ⟨?_, ?_⟩, fun ⟨h₁, h₂⟩ => ⟨?_, ?_⟩⟩
  · simpa using h₁
  · have := s.startInclusive_le_endExclusive
-    simp_all only [Slice.endPos_copy, ValidPos.le_iff, le_iff, Slice.byteIdx_utf8ByteSize]
+    simp_all only [Slice.endPos_copy, le_iff, Slice.byteIdx_rawEndPos, Slice.utf8ByteSize_eq,
+      ValidPos.le_iff]
    rw [Slice.bytes_copy, ByteArray.extract_extract, Nat.add_zero, Nat.min_eq_left (by omega)] at h₂
-    rw [← byteIdx_add, Pos.Raw.isValidUTF8_extract_iff] at h₂
+    rw [← byteIdx_offsetBy, Pos.Raw.isValidUTF8_extract_iff] at h₂
    · rcases h₂ with (h₂|⟨-, h₂⟩)
      · rw [← h₂]
        exact s.startInclusive.isValid
@ -1157,9 +1242,9 @@ theorem Pos.Raw.isValid_copy_iff {s : Slice} {p : Pos.Raw} :
      omega
  · simpa using h₁
  · have := s.startInclusive_le_endExclusive
-    simp_all only [ValidPos.le_iff, le_iff, Slice.byteIdx_utf8ByteSize]
+    simp_all only [le_iff, Slice.byteIdx_rawEndPos, Slice.utf8ByteSize_eq, ValidPos.le_iff]
    rw [Slice.bytes_copy, ByteArray.extract_extract, Nat.add_zero, Nat.min_eq_left (by omega)]
-    rw [← byteIdx_add, Pos.Raw.isValidUTF8_extract_iff]
+    rw [← byteIdx_offsetBy, Pos.Raw.isValidUTF8_extract_iff]
    · exact Or.inr ⟨s.startInclusive.isValid, h₂⟩
    · simp [le_iff]
    · have := s.endExclusive.isValid.le_endPos
@ -1191,19 +1276,39 @@ instance {s : Slice} : Inhabited s.Pos where
  default := s.startPos

@[simp]
-theorem Slice.offset_startInclusive_add_utf8ByteSize {s : Slice} :
-    s.startInclusive.offset + s.utf8ByteSize = s.endExclusive.offset := by
+theorem Slice.offset_startInclusive_add_self {s : Slice} :
+    s.startInclusive.offset + s = s.endExclusive.offset := by
  have := s.startInclusive_le_endExclusive
-  simp_all [String.Pos.Raw.ext_iff, ValidPos.le_iff, Pos.Raw.le_iff]
+  simp_all [String.Pos.Raw.ext_iff, ValidPos.le_iff, Pos.Raw.le_iff, utf8ByteSize_eq]
+
+@[simp]
+theorem Pos.Raw.offsetBy_endPos_left {p : Pos.Raw} {s : String} :
+    s.endPos.offsetBy p = p + s := by
+  simp [Pos.Raw.ext_iff]
+
+@[simp]
+theorem Pos.Raw.offsetBy_endPos_right {p : Pos.Raw} {s : String} :
+    p.offsetBy s.endPos = s + p := by
+  simp [Pos.Raw.ext_iff]
+
+@[simp]
+theorem Pos.Raw.offsetBy_sliceRawEndPos_left {p : Pos.Raw} {s : Slice} :
+    s.rawEndPos.offsetBy p = p + s := by
+  simp [Pos.Raw.ext_iff]
+
+@[simp]
+theorem Pos.Raw.offsetBy_sliceRawEndPos_right {p : Pos.Raw} {s : Slice} :
+    p.offsetBy s.rawEndPos = s + p := by
+  simp [Pos.Raw.ext_iff]

 /-- The past-the-end position of `s`, as an `s.Pos`. -/
@[inline, expose]
 def Slice.endPos (s : Slice) : s.Pos where
-  offset := s.utf8ByteSize
+  offset := s.rawEndPos
  isValidForSlice := ⟨by simp [Pos.Raw.le_iff], by simpa using s.endExclusive.isValid⟩

@[simp]
-theorem ByteString.Slice.offset_endPos {s : Slice} : s.endPos.offset = s.utf8ByteSize := (rfl)
+theorem ByteString.Slice.offset_endPos {s : Slice} : s.endPos.offset = s.rawEndPos := (rfl)

 instance {s : Slice} : LE s.Pos where
  le l r := l.offset ≤ r.offset
@ -1224,16 +1329,16 @@ instance {s : Slice} (l r : s.Pos) : Decidable (l < r) :=
  decidable_of_iff' _ Slice.Pos.lt_iff

 theorem Pos.Raw.isValidForSlice_iff_isUTF8FirstByte {s : Slice} {p : Pos.Raw} :
-    p.IsValidForSlice s ↔ (p = s.utf8ByteSize ∨ (∃ (h : p < s.utf8ByteSize), (s.getUTF8Byte p h).IsUTF8FirstByte)) := by
+    p.IsValidForSlice s ↔ (p = s.rawEndPos ∨ (∃ (h : p < s.rawEndPos), (s.getUTF8Byte p h).IsUTF8FirstByte)) := by
  simp [← isValid_copy_iff, isValid_iff_isUTF8FirstByte, Slice.getUTF8Byte_copy]

 /-- Efficiently checks whether a position is at a UTF-8 character boundary of the slice `s`. -/
@[expose]
 def Pos.Raw.isValidForSlice (s : Slice) (p : Pos.Raw) : Bool :=
-  if h : p < s.utf8ByteSize then
+  if h : p < s.rawEndPos then
    (s.getUTF8Byte p h).IsUTF8FirstByte
  else
-    p = s.utf8ByteSize
+    p = s.rawEndPos

@[simp]
 theorem Pos.Raw.isValidForSlice_eq_true_iff {s : Slice} {p : Pos.Raw} :
@ -1255,7 +1360,7 @@ instance {s : Slice} {p : Pos.Raw} : Decidable (p.IsValidForSlice s) :=
  decidable_of_iff _ Pos.Raw.isValidForSlice_eq_true_iff

 theorem Pos.Raw.isValidForSlice_iff_isSome_utf8DecodeChar?_copy {s : Slice} {p : Pos.Raw} :
-    p.IsValidForSlice s ↔ p = s.utf8ByteSize ∨ (s.copy.bytes.utf8DecodeChar? p.byteIdx).isSome := by
+    p.IsValidForSlice s ↔ p = s.rawEndPos ∨ (s.copy.bytes.utf8DecodeChar? p.byteIdx).isSome := by
  rw [← isValid_copy_iff, isValid_iff_isSome_utf8DecodeChar?, Slice.endPos_copy]

 theorem Slice.bytes_str_eq {s : Slice} :
@ -1269,7 +1374,7 @@ theorem Slice.bytes_str_eq {s : Slice} :
  · simpa [Pos.Raw.le_iff] using s.startInclusive_le_endExclusive

 theorem Pos.Raw.isValidForSlice_iff_isSome_utf8DecodeChar? {s : Slice} {p : Pos.Raw} :
-    p.IsValidForSlice s ↔ p = s.utf8ByteSize ∨ (p < s.utf8ByteSize ∧ (s.str.bytes.utf8DecodeChar? (s.startInclusive.offset.byteIdx + p.byteIdx)).isSome) := by
+    p.IsValidForSlice s ↔ p = s.rawEndPos ∨ (p < s.rawEndPos ∧ (s.str.bytes.utf8DecodeChar? (s.startInclusive.offset.byteIdx + p.byteIdx)).isSome) := by
  refine ⟨?_, ?_⟩
  · rw [isValidForSlice_iff_isSome_utf8DecodeChar?_copy]
    rintro (rfl|h)
@ -1315,20 +1420,20 @@ theorem Slice.Pos.isUTF8FirstByte_byte {s : Slice} {pos : s.Pos} {h : pos ≠ s.
 underlying string `s.str`. -/
@[inline]
 def Slice.Pos.str {s : Slice} (pos : s.Pos) : s.str.ValidPos where
-  offset := s.startInclusive.offset + pos.offset
-  isValid := pos.isValidForSlice.isValid_add
+  offset := pos.offset.offsetBy s.startInclusive.offset
+  isValid := pos.isValidForSlice.isValid_offsetBy

@[simp]
 theorem Slice.Pos.offset_str {s : Slice} {pos : s.Pos} :
-    pos.str.offset = s.startInclusive.offset + pos.offset := (rfl)
+    pos.str.offset = pos.offset.offsetBy s.startInclusive.offset := (rfl)

@[simp]
 theorem Slice.Pos.offset_str_le_offset_endExclusive {s : Slice} {pos : s.Pos} :
    pos.str.offset ≤ s.endExclusive.offset := by
  have := pos.isValidForSlice.le_utf8ByteSize
  have := s.startInclusive_le_endExclusive
-  simp only [ValidPos.le_iff, Pos.Raw.le_iff, byteIdx_utf8ByteSize, offset_str, Pos.Raw.byteIdx_add,
-    ge_iff_le] at *
+  simp only [Pos.Raw.le_iff, byteIdx_rawEndPos, utf8ByteSize_eq, offset_str,
+    Pos.Raw.byteIdx_offsetBy, ValidPos.le_iff] at *
  omega

 theorem Slice.Pos.offset_le_offset_str {s : Slice} {pos : s.Pos} :
@ -1414,51 +1519,54 @@ def Slice.replaceStartEnd! (s : Slice) (newStart newEnd : s.Pos) : Slice :=

@[simp]
 theorem Slice.utf8ByteSize_replaceStart {s : Slice} {pos : s.Pos} :
-    (s.replaceStart pos).utf8ByteSize = s.utf8ByteSize - pos.offset := by
+    (s.replaceStart pos).utf8ByteSize = s.utf8ByteSize - pos.offset.byteIdx := by
+  simp only [utf8ByteSize_eq, str_replaceStart, endExclusive_replaceStart,
+    startInclusive_replaceStart, Pos.offset_str, Pos.Raw.byteIdx_offsetBy]
+  omega
+
+theorem Slice.rawEndPos_replaceStart {s : Slice} {pos : s.Pos} :
+    (s.replaceStart pos).rawEndPos = s.rawEndPos.unoffsetBy pos.offset := by
  ext
  simp
-  omega

@[simp]
 theorem Slice.utf8ByteSize_replaceEnd {s : Slice} {pos : s.Pos} :
-    (s.replaceEnd pos).utf8ByteSize = pos.offset := by
+    (s.replaceEnd pos).utf8ByteSize = pos.offset.byteIdx := by
+  simp [utf8ByteSize_eq]
+
+@[simp]
+theorem Slice.rawEndPos_replaceEnd {s : Slice} {pos : s.Pos} :
+    (s.replaceEnd pos).rawEndPos = pos.offset := by
  ext
  simp

@[simp]
 theorem Slice.utf8ByteSize_replaceStartEnd {s : Slice} {newStart newEnd : s.Pos} {h} :
-    (s.replaceStartEnd newStart newEnd h).utf8ByteSize = newEnd.offset - newStart.offset := by
-  ext
-  simp only [byteIdx_utf8ByteSize, str_replaceStartEnd, endExclusive_replaceStartEnd,
-    Pos.offset_str, Pos.Raw.byteIdx_add, startInclusive_replaceStartEnd, Pos.Raw.byteIdx_sub]
+    (s.replaceStartEnd newStart newEnd h).utf8ByteSize = newStart.offset.byteDistance newEnd.offset := by
+  simp [utf8ByteSize_eq, Pos.Raw.byteDistance_eq]
  omega

-theorem Pos.Raw.add_comm (a b : Pos.Raw) : a + b = b + a := by
+theorem Pos.Raw.offsetBy_assoc {p q r : Pos.Raw} :
+    (p.offsetBy q).offsetBy r = p.offsetBy (q.offsetBy r) := by
  ext
-  simpa using Nat.add_comm _ _
-
-theorem Pos.Raw.add_assoc (a b c : Pos.Raw) : a + b + c = a + (b + c) := by
-  ext
-  simpa using Nat.add_assoc _ _ _
+  simp [Nat.add_assoc]

 theorem Pos.Raw.isValidForSlice_replaceStart {s : Slice} {p : s.Pos} {off : Pos.Raw} :
-    off.IsValidForSlice (s.replaceStart p) ↔ (p.offset + off).IsValidForSlice s := by
+    off.IsValidForSlice (s.replaceStart p) ↔ (off.offsetBy p.offset).IsValidForSlice s := by
  refine ⟨fun ⟨h₁, h₂⟩ => ⟨?_, ?_⟩, fun ⟨h₁, h₂⟩ => ⟨?_, ?_⟩⟩
  · have := p.isValidForSlice.le_utf8ByteSize
    simp_all [le_iff]
    omega
-  · simp only [Slice.str_replaceStart, Slice.startInclusive_replaceStart, Slice.Pos.offset_str] at h₂
-    rwa [← Pos.Raw.add_assoc]
+  · simpa [Pos.Raw.offsetBy_assoc] using h₂
  · simp_all [Pos.Raw.le_iff]
    omega
-  · simp only [Slice.str_replaceStart, Slice.startInclusive_replaceStart, Slice.Pos.offset_str]
-    rwa [Pos.Raw.add_assoc]
+  · simpa [Pos.Raw.offsetBy_assoc] using h₂

 theorem Pos.Raw.isValidForSlice_replaceEnd {s : Slice} {p : s.Pos} {off : Pos.Raw} :
    off.IsValidForSlice (s.replaceEnd p) ↔ off ≤ p.offset ∧ off.IsValidForSlice s := by
  refine ⟨fun ⟨h₁, h₂⟩ => ⟨?_, ?_, ?_⟩, fun ⟨h₁, ⟨h₂, h₃⟩⟩ => ⟨?_, ?_⟩⟩
  · simpa using h₁
-  · simp only [Slice.utf8ByteSize_replaceEnd] at h₁
+  · simp only [Slice.rawEndPos_replaceEnd] at h₁
    exact Pos.Raw.le_trans h₁ p.isValidForSlice.le_utf8ByteSize
  · simpa using h₂
  · simpa using h₁
@ -1539,7 +1647,7 @@ def Slice.Pos.ofSlice {s : String} (pos : s.toSlice.Pos) : s.ValidPos where
 theorem Slice.Pos.ofset_ofSlice {s : String} {pos : s.toSlice.Pos} : pos.ofSlice.offset = pos.offset := (rfl)

@[simp]
-theorem utf8ByteSize_toSlice {s : String} : s.toSlice.utf8ByteSize = s.endPos := by
+theorem rawEndPos_toSlice {s : String} : s.toSlice.rawEndPos = s.endPos := by
  rw [← Slice.endPos_copy, copy_toSlice]

@[simp]
@ -1665,8 +1773,8 @@ theorem eq_singleton_append {s : String} (h : s.startValidPos ≠ s.endValidPos)
 theorem Slice.copy_eq_copy_replaceEnd {s : Slice} {pos : s.Pos} :
    s.copy = (s.replaceEnd pos).copy ++ (s.replaceStart pos).copy := by
  rw [← String.bytes_inj, bytes_copy, bytes_append, bytes_copy, bytes_copy]
-  simp only [str_replaceEnd, startInclusive_replaceEnd, endExclusive_replaceEnd,
-    Slice.Pos.offset_str, Pos.Raw.byteIdx_add, str_replaceStart, startInclusive_replaceStart,
+  simp only [str_replaceEnd, startInclusive_replaceEnd, endExclusive_replaceEnd, Pos.offset_str,
+    Pos.Raw.byteIdx_offsetBy, str_replaceStart, startInclusive_replaceStart,
    endExclusive_replaceStart, ByteArray.extract_append_extract, Nat.le_add_right, Nat.min_eq_left]
  rw [Nat.max_eq_right]
  exact pos.offset_str_le_offset_endExclusive
@ -1734,27 +1842,40 @@ theorem Slice.Pos.byte_eq_byte_toCopy {s : Slice} {pos : s.Pos} {h} :
 /-- Given a position in `s.replaceStart p₀`, obtain the corresponding position in `s`. -/
@[inline]
 def Slice.Pos.ofReplaceStart {s : Slice} {p₀ : s.Pos} (pos : (s.replaceStart p₀).Pos) : s.Pos where
-  offset := p₀.offset + pos.offset
+  offset := pos.offset.offsetBy p₀.offset
  isValidForSlice := Pos.Raw.isValidForSlice_replaceStart.1 pos.isValidForSlice

@[simp]
 theorem Slice.Pos.offset_ofReplaceStart {s : Slice} {p₀ : s.Pos} {pos : (s.replaceStart p₀).Pos} :
-    (ofReplaceStart pos).offset = p₀.offset + pos.offset := (rfl)
+    (ofReplaceStart pos).offset = pos.offset.offsetBy p₀.offset := (rfl)
+
+theorem Pos.Raw.offsetBy_unoffsetBy_of_le {p : Pos.Raw} {q : Pos.Raw} (h : q ≤ p) :
+    (p.unoffsetBy q).offsetBy q = p := by
+  ext
+  simp_all [le_iff]
+
+@[simp]
+theorem Pos.Raw.unoffsetBy_offsetBy {p q : Pos.Raw} : (p.offsetBy q).unoffsetBy q = p := by
+  ext
+  simp

 /-- Given a position in `s` that is at least `p₀`, obtain the corresponding position in
 `s.replaceStart p₀`. -/
@[inline]
 def Slice.Pos.toReplaceStart {s : Slice} (p₀ : s.Pos) (pos : s.Pos) (h : p₀.offset ≤ pos.offset) :
    (s.replaceStart p₀).Pos where
-  offset := pos.offset - p₀.offset
+  offset := pos.offset.unoffsetBy p₀.offset
  isValidForSlice := Pos.Raw.isValidForSlice_replaceStart.2 (by
-    have : p₀.offset + (pos.offset - p₀.offset) = pos.offset := by
-      simp_all [Pos.Raw.le_iff, String.Pos.Raw.ext_iff]
-    simpa [this] using pos.isValidForSlice)
+    simpa [Pos.Raw.offsetBy_unoffsetBy_of_le (Pos.Raw.le_iff.1 h)] using pos.isValidForSlice)

@[simp]
 theorem Slice.Pos.offset_toReplaceStart {s : Slice} {p₀ : s.Pos} {pos : s.Pos} {h} :
-    (toReplaceStart p₀ pos h).offset = pos.offset - p₀.offset := (rfl)
+    (toReplaceStart p₀ pos h).offset = pos.offset.unoffsetBy p₀.offset := (rfl)
+
+@[simp]
+theorem Pos.Raw.offsetBy_zero_left {p : Pos.Raw} : (0 : Pos.Raw).offsetBy p = p := by
+  ext
+  simp

@[simp]
 theorem Slice.Pos.ofReplaceStart_startPos {s : Slice} {pos : s.Pos} :
@ -1783,19 +1904,58 @@ theorem Slice.Pos.copy_eq_append_get {s : Slice} {pos : s.Pos} (h : pos ≠ s.en
  rw [append_assoc, ← ht₂, ← copy_eq_copy_replaceEnd]

 theorem Slice.Pos.utf8ByteSize_byte {s : Slice} {pos : s.Pos} {h : pos ≠ s.endPos} :
-    (pos.byte h).utf8ByteSize pos.isUTF8FirstByte_byte = ⟨(pos.get h).utf8Size⟩ := by
+    (pos.byte h).utf8ByteSize pos.isUTF8FirstByte_byte = (pos.get h).utf8Size := by
  simp [getUTF8Byte, byte, String.getUTF8Byte, get_eq_utf8DecodeChar, ByteArray.utf8Size_utf8DecodeChar]

+/--
+Advances `p` by `n` bytes. This is not an `HAdd` instance because it should be a relatively
+rare operation, so we use a name to make accidental use less likely. To add the size of a
+character `c` or string `s` to a raw position `p`, you can use `p + c` resp. `p + s`.
+
+This should be seen as an "advance" or "skip".
+
+See also `Pos.Raw.offsetBy`, which turns relative positions into absolute positions.
+-/
+@[expose, inline]
+def Pos.Raw.increaseBy (p : Pos.Raw) (n : Nat) : Pos.Raw where
+  byteIdx := p.byteIdx + n
+
+@[simp]
+theorem Pos.Raw.byteIdx_increaseBy {p : Pos.Raw} {n : Nat} :
+    (p.increaseBy n).byteIdx = p.byteIdx + n := (rfl)
+
+/--
+Move the position `p` back by `n` bytes. This is not an `HSub` instance because it should be a
+relatively rare operation, so we use a name to make accidental use less likely. To remove the size
+of a character `c` or string `s` from a raw position `p`, you can use `p - c` resp. `p - s`.
+
+This should be seen as the inverse of an "advance" or "skip".
+
+See also `Pos.Raw.unoffsetBy`, which turns absolute positions into relative positions.
+-/
+@[expose, inline]
+def Pos.Raw.decreaseBy (p : Pos.Raw) (n : Nat) : Pos.Raw where
+  byteIdx := p.byteIdx - n
+
+@[simp]
+theorem Pos.Raw.byteIdx_decreaseBy {p : Pos.Raw} {n : Nat} :
+    (p.decreaseBy n).byteIdx = p.byteIdx - n := (rfl)
+
+theorem Pos.Raw.increaseBy_charUtf8Size {p : Pos.Raw} {c : Char} :
+    p.increaseBy c.utf8Size = p + c := by
+  simp [Pos.Raw.ext_iff]
+
 /-- Advances a valid position on a slice to the next valid position, given a proof that the
 position is not the past-the-end position, which guarantees that such a position exists. -/
@[expose]
 def Slice.Pos.next {s : Slice} (pos : s.Pos) (h : pos ≠ s.endPos) : s.Pos where
-  offset := pos.offset + (pos.byte h).utf8ByteSize pos.isUTF8FirstByte_byte
+  offset := pos.offset.increaseBy ((pos.byte h).utf8ByteSize pos.isUTF8FirstByte_byte)
  isValidForSlice := by
    obtain ⟨t₁, t₂, ht, ht'⟩ := copy_eq_append_get h
-    replace ht' : pos.offset = ⟨t₁.utf8ByteSize⟩ := Eq.symm (String.Pos.Raw.ext ht')
+    replace ht' : pos.offset = t₁.endPos := Eq.symm (String.Pos.Raw.ext ht')
    rw [utf8ByteSize_byte, ← Pos.Raw.isValid_copy_iff, ht, ht']
    refine Pos.Raw.IsValid.append_right ?_ t₂
+    rw [Pos.Raw.increaseBy_charUtf8Size]
    refine Pos.Raw.IsValid.append_left ?_ t₁
    exact Pos.Raw.isValid_singleton.2 (Or.inr rfl)

@ -1841,10 +2001,10 @@ theorem Pos.Raw.byteIdx_dec {p : Pos.Raw} : p.dec.byteIdx = p.byteIdx - 1 := (rf
 def Slice.Pos.prevAux {s : Slice} (pos : s.Pos) (h : pos ≠ s.startPos) : String.Pos.Raw :=
  go (pos.offset.byteIdx - 1) (by
    have := pos.isValidForSlice.le_utf8ByteSize
-    simp [Pos.Raw.le_iff, Pos.Raw.lt_iff, Pos.ext_iff] at ⊢ this h
+    simp [Pos.Raw.le_iff, Pos.ext_iff] at ⊢ this h
    omega)
 where
-  go (off : Nat) (h₁ : ⟨off⟩ < s.utf8ByteSize) : String.Pos.Raw :=
+  go (off : Nat) (h₁ : off < s.utf8ByteSize) : String.Pos.Raw :=
    if hbyte : (s.getUTF8Byte ⟨off⟩ h₁).IsUTF8FirstByte then
      ⟨off⟩
    else
@ -1854,10 +2014,10 @@ where
        simp [hoff, s.isUTF8FirstByte_utf8ByteAt_zero] at hbyte
      match off with
      | 0 => False.elim (by contradiction)
-      | off + 1 => go off (by simp [Pos.Raw.lt_iff] at ⊢ h₁; omega)
+      | off + 1 => go off (by omega)
  termination_by structural off

-theorem Pos.Raw.isValidForSlice_prevAuxGo {s : Slice} (off : Nat) (h₁ : ⟨off⟩ < s.utf8ByteSize) :
+theorem Pos.Raw.isValidForSlice_prevAuxGo {s : Slice} (off : Nat) (h₁ : off < s.utf8ByteSize) :
    (Slice.Pos.prevAux.go off h₁).IsValidForSlice s := by
  induction off with
  | zero =>
@ -2003,20 +2163,20 @@ theorem ValidPos.cast_rfl {s : String} {pos : s.ValidPos} : pos.cast rfl = pos :
 /-- Given a byte position within a string slice, obtains the smallest valid position that is
 strictly greater than the given byte position. -/
@[inline]
-def Slice.findNextPos (offset : String.Pos.Raw) (s : Slice) (_h : offset < s.utf8ByteSize) : s.Pos :=
+def Slice.findNextPos (offset : String.Pos.Raw) (s : Slice) (_h : offset < s.rawEndPos) : s.Pos :=
  go offset.inc
 where
  go (offset : String.Pos.Raw) : s.Pos :=
-    if h : offset < s.utf8ByteSize then
+    if h : offset < s.rawEndPos then
      if h' : (s.getUTF8Byte offset h).IsUTF8FirstByte then
        s.pos offset (Pos.Raw.isValidForSlice_iff_isUTF8FirstByte.2 (Or.inr ⟨_, h'⟩))
      else
        go offset.inc
    else
      s.endPos
-  termination_by s.utf8ByteSize.byteIdx - offset.byteIdx
+  termination_by s.utf8ByteSize - offset.byteIdx
  decreasing_by
-    simp only [Pos.Raw.lt_iff, byteIdx_utf8ByteSize, Pos.Raw.byteIdx_inc, gt_iff_lt] at h ⊢
+    simp only [Pos.Raw.lt_iff, byteIdx_rawEndPos, utf8ByteSize_eq, Pos.Raw.byteIdx_inc] at h ⊢
    omega

@[simp]
@ -2028,7 +2188,7 @@ theorem Pos.Raw.le_of_lt {p q : Pos.Raw} : p < q → p ≤ q := by simpa [lt_iff

 theorem Pos.Raw.inc_le {p q : Pos.Raw} : p.inc ≤ q ↔ p < q := by simpa [lt_iff, le_iff] using Nat.succ_le

-private theorem Slice.le_offset_findNextPosGo {s : Slice} {o : String.Pos.Raw} (h : o ≤ s.utf8ByteSize) :
+private theorem Slice.le_offset_findNextPosGo {s : Slice} {o : String.Pos.Raw} (h : o ≤ s.rawEndPos) :
    o ≤ (findNextPos.go s o).offset := by
  fun_induction findNextPos.go with
  | case1 => simp
@ -2041,7 +2201,7 @@ private theorem Slice.le_offset_findNextPosGo {s : Slice} {o : String.Pos.Raw} (
 theorem Slice.lt_offset_findNextPos {s : Slice} {o : String.Pos.Raw} (h) : o < (s.findNextPos o h).offset :=
  Pos.Raw.lt_of_lt_of_le Pos.Raw.lt_inc (le_offset_findNextPosGo (Pos.Raw.inc_le.2 h))

-theorem Slice.Pos.prevAuxGo_le_self {s : Slice} {p : Nat} {h : ⟨p⟩ < s.utf8ByteSize} :
+theorem Slice.Pos.prevAuxGo_le_self {s : Slice} {p : Nat} {h : p < s.utf8ByteSize} :
    prevAux.go p h ≤ ⟨p⟩ := by
  induction p with
  | zero =>
@ -2066,14 +2226,14 @@ theorem Slice.Pos.prevAux_lt_self {s : Slice} {p : s.Pos} {h} : p.prevAux h < p.
  simp [Pos.ext_iff, Pos.Raw.lt_iff] at *
  omega

-theorem Slice.Pos.prevAux_lt_utf8ByteSize {s : Slice} {p : s.Pos} {h} : p.prevAux h < s.utf8ByteSize :=
+theorem Slice.Pos.prevAux_lt_rawEndPos {s : Slice} {p : s.Pos} {h} : p.prevAux h < s.rawEndPos :=
  Pos.Raw.lt_of_lt_of_le prevAux_lt_self p.isValidForSlice.le_utf8ByteSize

 theorem Pos.Raw.ne_of_lt {a b : Pos.Raw} : a < b → a ≠ b := by
  simpa [lt_iff, Pos.Raw.ext_iff] using Nat.ne_of_lt

 theorem Slice.Pos.prev_ne_endPos {s : Slice} {p : s.Pos} {h} : p.prev h ≠ s.endPos := by
-  simpa [Pos.ext_iff, prev] using Pos.Raw.ne_of_lt prevAux_lt_utf8ByteSize
+  simpa [Pos.ext_iff, prev] using Pos.Raw.ne_of_lt prevAux_lt_rawEndPos

 theorem Slice.Pos.offset_prev_lt_offset {s : Slice} {p : s.Pos} {h} : (p.prev h).offset < p.offset := by
  simpa [prev] using prevAux_lt_self
@ -2552,12 +2712,12 @@ def splitOnAux (s sep : String) (b : Pos.Raw) (i : Pos.Raw) (j : Pos.Raw) (r : L
      let i := s.next i
      let j := sep.next j
      if sep.atEnd j then
-        splitOnAux s sep i i 0 (s.extract b (i - j)::r)
+        splitOnAux s sep i i 0 (s.extract b (i.unoffsetBy j)::r)
      else
        splitOnAux s sep b i j r
    else
-      splitOnAux s sep b (s.next (i - j)) 0 r
-termination_by (s.endPos.1 - (i - j).1, sep.endPos.1 - j.1)
+      splitOnAux s sep b (s.next (i.unoffsetBy j)) 0 r
+termination_by (s.endPos.1 - (j.byteDistance i), sep.endPos.1 - j.1)
 decreasing_by
  focus
    rename_i h _ _
@ -2566,7 +2726,7 @@ decreasing_by
      (Nat.lt_of_le_of_lt (Nat.sub_le ..) (lt_next s _))
  focus
    rename_i i₀ j₀ _ eq h'
-    rw [show (s.next i₀ - sep.next j₀).1 = (i₀ - j₀).1 by
+    rw [show (sep.next j₀).byteDistance (s.next i₀) = j₀.byteDistance i₀ by
      change (_ + Char.utf8Size _) - (_ + Char.utf8Size _) = _
      rw [(beq_iff_eq ..).1 eq, Nat.add_sub_add_right]; rfl]
    right; exact Nat.sub_lt_sub_left
@ -3200,7 +3360,7 @@ position in the underlying string, the fallback value `(default : Char)`, which
 returned.  Does not panic.
 -/
@[inline] def get : Substring → String.Pos.Raw → Char
-  | ⟨s, b, _⟩, p => s.get (b+p)
+  | ⟨s, b, _⟩, p => s.get (p.offsetBy b)

@[export lean_substring_get]
 def Internal.getImpl : Substring → String.Pos.Raw → Char :=
@ -3215,7 +3375,7 @@ position, not the underlying string.
 -/
@[inline] def next : Substring → String.Pos.Raw → String.Pos.Raw
  | ⟨s, b, e⟩, p =>
-    let absP := b+p
+    let absP := p.offsetBy b
    if absP = e then p else { byteIdx := (s.next absP).byteIdx - b.byteIdx }

 theorem lt_next (s : Substring) (i : String.Pos.Raw) (h : i.1 < s.bsize) :
@ -3236,7 +3396,7 @@ position, not the underlying string.
 -/
@[inline] def prev : Substring → String.Pos.Raw → String.Pos.Raw
  | ⟨s, b, _⟩, p =>
-    let absP := b+p
+    let absP := p.offsetBy b
    if absP = b then p else { byteIdx := (s.prev absP).byteIdx - b.byteIdx }

@[export lean_substring_prev]
@ -3295,7 +3455,7 @@ by advancing its start position.
 If the substring's end position is reached, the start position is not advanced past it.
 -/
@[inline] def drop : Substring → Nat → Substring
-  | ss@⟨s, b, e⟩, n => ⟨s, b + ss.nextn n 0, e⟩
+  | ss@⟨s, b, e⟩, n => ⟨s, (ss.nextn n 0).offsetBy b, e⟩

@[export lean_substring_drop]
 def Internal.dropImpl : Substring → Nat → Substring :=
@ -3308,7 +3468,7 @@ by moving its end position towards its start position.
 If the substring's start position is reached, the end position is not retracted past it.
 -/
@[inline] def dropRight : Substring → Nat → Substring
-  | ss@⟨s, b, _⟩, n => ⟨s, b, b + ss.prevn n ⟨ss.bsize⟩⟩
+  | ss@⟨s, b, _⟩, n => ⟨s, b, (ss.prevn n ⟨ss.bsize⟩).offsetBy b⟩

 /--
 Retains only the specified number of characters (Unicode code points) at the beginning of a
@ -3317,7 +3477,7 @@ substring, by moving its end position towards its start position.
 If the substring's start position is reached, the end position is not retracted past it.
 -/
@[inline] def take : Substring → Nat → Substring
-  | ss@⟨s, b, _⟩, n => ⟨s, b, b + ss.nextn n 0⟩
+  | ss@⟨s, b, _⟩, n => ⟨s, b, (ss.nextn n 0).offsetBy b⟩

 /--
 Retains only the specified number of characters (Unicode code points) at the end of a substring, by
@ -3326,7 +3486,7 @@ moving its start position towards its end position.
 If the substring's end position is reached, the start position is not advanced past it.
 -/
@[inline] def takeRight : Substring → Nat → Substring
-  | ss@⟨s, b, e⟩, n => ⟨s, b + ss.prevn n ⟨ss.bsize⟩, e⟩
+  | ss@⟨s, b, e⟩, n => ⟨s, (ss.prevn n ⟨ss.bsize⟩).offsetBy b, e⟩

 /--
 Checks whether a position in a substring is precisely equal to its ending position.
@ -3335,7 +3495,7 @@ The position is understood relative to the substring's starting position, rather
 string's starting position.
 -/
@[inline] def atEnd : Substring → String.Pos.Raw → Bool
-  | ⟨_, b, e⟩, p => b + p == e
+  | ⟨_, b, e⟩, p => p.offsetBy b == e

 /--
 Returns the region of the substring delimited by the provided start and stop positions, as a
@ -3347,7 +3507,7 @@ If the resulting substring is empty, then the resulting substring is a substring
 positions adjusted.
 -/
@[inline] def extract : Substring → String.Pos.Raw → String.Pos.Raw → Substring
-  | ⟨s, b, e⟩, b', e' => if b' ≥ e' then ⟨"", 0, 0⟩ else ⟨s, e.min (b+b'), e.min (b+e')⟩
+  | ⟨s, b, e⟩, b', e' => if b' ≥ e' then ⟨"", 0, 0⟩ else ⟨s, e.min (b'.offsetBy b), e.min (e'.offsetBy b)⟩

@[export lean_substring_extract]
 def Internal.extractImpl : Substring → String.Pos.Raw → String.Pos.Raw → Substring :=
@ -3372,14 +3532,14 @@ def splitOn (s : Substring) (sep : String := " ") : List Substring :=
          let i := s.next i
          let j := sep.next j
          if sep.atEnd j then
-            loop i i 0 (s.extract b (i-j) :: r)
+            loop i i 0 (s.extract b (i.unoffsetBy j) :: r)
          else
            loop b i j r
        else
          loop b (s.next i) 0 r
      else
        let r := if sep.atEnd j then
-          "".toSubstring :: s.extract b (i-j) :: r
+          "".toSubstring :: s.extract b (i.unoffsetBy j) :: r
        else
          s.extract b i :: r
        r.reverse
@ -4058,13 +4218,15 @@ theorem byteIdx_mk (n : Nat) : byteIdx ⟨n⟩ = n := rfl

@[simp] theorem mk_byteIdx (p : Pos.Raw) : ⟨p.byteIdx⟩ = p := rfl

-@[simp] theorem add_byteIdx (p₁ p₂ : Pos.Raw) : (p₁ + p₂).byteIdx = p₁.byteIdx + p₂.byteIdx := rfl
+@[deprecated byteIdx_offsetBy (since := "2025-10-08")]
+theorem add_byteIdx {p₁ p₂ : Pos.Raw} : (p₂.offsetBy p₁).byteIdx = p₁.byteIdx + p₂.byteIdx := by
+  simp

-theorem add_eq (p₁ p₂ : Pos.Raw) : p₁ + p₂ = ⟨p₁.byteIdx + p₂.byteIdx⟩ := rfl
+@[deprecated byteIdx_offsetBy (since := "2025-10-08")]
+theorem add_eq {p₁ p₂ : Pos.Raw} : p₂.offsetBy p₁ = ⟨p₁.byteIdx + p₂.byteIdx⟩ := rfl

-@[simp] theorem sub_byteIdx (p₁ p₂ : Pos.Raw) : (p₁ - p₂).byteIdx = p₁.byteIdx - p₂.byteIdx := rfl
-
-theorem sub_eq (p₁ p₂ : Pos.Raw) : p₁ - p₂ = ⟨p₁.byteIdx - p₂.byteIdx⟩ := rfl
+@[deprecated byteIdx_unoffsetBy (since := "2025-10-08")]
+theorem sub_byteIdx (p₁ p₂ : Pos.Raw) : (p₁.unoffsetBy p₂).byteIdx = p₁.byteIdx - p₂.byteIdx := rfl

@[simp] theorem addChar_byteIdx (p : Pos.Raw) (c : Char) : (p + c).byteIdx = p.byteIdx + c.utf8Size := rfl

@ -4136,7 +4298,7 @@ open String

 namespace Substring

-@[simp] theorem prev_zero (s : Substring) : s.prev 0 = 0 := by simp [prev, Pos.Raw.add_eq, Pos.Raw.byteIdx_zero]
+@[simp] theorem prev_zero (s : Substring) : s.prev 0 = 0 := by simp [prev]

@[simp] theorem prevn_zero (s : Substring) : ∀ n, s.prevn n 0 = 0
  | 0 => rfl
--- a/src/Init/Data/String/Decode.lean
+++ b/src/Init/Data/String/Decode.lean
@ -1424,22 +1424,22 @@ public theorem isUTF8FirstByte_getElem_zero_utf8EncodeChar {c : Char} :
  simp

@[expose]
-public def utf8ByteSize (c : UInt8) (_h : c.IsUTF8FirstByte) : String.Pos.Raw :=
+public def utf8ByteSize (c : UInt8) (_h : c.IsUTF8FirstByte) : Nat :=
  if c &&& 0x80 = 0 then
-    ⟨1⟩
+    1
  else if c &&& 0xe0 = 0xc0 then
-    ⟨2⟩
+    2
  else if c &&& 0xf0 = 0xe0 then
-    ⟨3⟩
+    3
  else
-    ⟨4⟩
+    4

-def _root_.ByteArray.utf8DecodeChar?.FirstByte.utf8ByteSize : FirstByte → String.Pos.Raw
-  | .invalid => ⟨0⟩
-  | .done => ⟨1⟩
-  | .oneMore => ⟨2⟩
-  | .twoMore => ⟨3⟩
-  | .threeMore => ⟨4⟩
+def _root_.ByteArray.utf8DecodeChar?.FirstByte.utf8ByteSize : FirstByte → Nat
+  | .invalid => 0
+  | .done => 1
+  | .oneMore => 2
+  | .twoMore => 3
+  | .threeMore => 4

 theorem utf8ByteSize_eq_utf8ByteSize_parseFirstByte {c : UInt8} {h : c.IsUTF8FirstByte} :
    c.utf8ByteSize h = (parseFirstByte c).utf8ByteSize := by
@ -1477,9 +1477,9 @@ public theorem ByteArray.isUTF8FirstByte_of_validateUTF8At  {b : ByteArray} {i :
  simp only [validateUTF8At_eq_isSome_utf8DecodeChar?]
  exact isUTF8FirstByte_of_isSome_utf8DecodeChar?

-theorem Char.byteIdx_utf8ByteSize_getElem_utf8EncodeChar {c : Char} :
-    (((String.utf8EncodeChar c)[0]'(by simp [c.utf8Size_pos])).utf8ByteSize
-      UInt8.isUTF8FirstByte_getElem_zero_utf8EncodeChar).byteIdx = c.utf8Size := by
+theorem Char.utf8ByteSize_getElem_utf8EncodeChar {c : Char} :
+    ((String.utf8EncodeChar c)[0]'(by simp [c.utf8Size_pos])).utf8ByteSize
+      UInt8.isUTF8FirstByte_getElem_zero_utf8EncodeChar = c.utf8Size := by
  rw [UInt8.utf8ByteSize_eq_utf8ByteSize_parseFirstByte]
  obtain (hc|hc|hc|hc) := c.utf8Size_eq
  · rw [parseFirstByte_utf8EncodeChar_eq_done hc, FirstByte.utf8ByteSize, hc]
@ -1489,7 +1489,7 @@ theorem Char.byteIdx_utf8ByteSize_getElem_utf8EncodeChar {c : Char} :

 public theorem ByteArray.utf8Size_utf8DecodeChar {b : ByteArray} {i} {h} :
    (utf8DecodeChar b i h).utf8Size =
-      ((b[i]'(lt_size_of_isSome_utf8DecodeChar? h)).utf8ByteSize (isUTF8FirstByte_of_isSome_utf8DecodeChar? h)).byteIdx := by
-  rw [← Char.byteIdx_utf8ByteSize_getElem_utf8EncodeChar]
+      (b[i]'(lt_size_of_isSome_utf8DecodeChar? h)).utf8ByteSize (isUTF8FirstByte_of_isSome_utf8DecodeChar? h) := by
+  rw [← Char.utf8ByteSize_getElem_utf8EncodeChar]
  simp only [List.getElem_eq_getElem_toByteArray, utf8EncodeChar_utf8DecodeChar]
  simp [ByteArray.getElem_extract]
--- a/src/Init/Data/String/Pattern/Basic.lean
+++ b/src/Init/Data/String/Pattern/Basic.lean
@ -76,8 +76,8 @@ namespace Internal

@[extern "lean_slice_memcmp"]
 def memcmp (lhs rhs : @& Slice) (lstart : @& String.Pos.Raw) (rstart : @& String.Pos.Raw)
-    (len : @& String.Pos.Raw) (h1 : lstart + len ≤ lhs.utf8ByteSize)
-    (h2 : rstart + len ≤ rhs.utf8ByteSize) : Bool :=
+    (len : @& String.Pos.Raw) (h1 : len.offsetBy lstart ≤ lhs.rawEndPos)
+    (h2 : len.offsetBy rstart ≤ rhs.rawEndPos) : Bool :=
  go 0
 where
  go (curr : String.Pos.Raw) : Bool :=
@ -88,7 +88,7 @@ where
      have hr := by
        simp [Pos.Raw.le_iff] at h h2 ⊢
        omega
-      if lhs.getUTF8Byte (lstart + curr) hl == rhs.getUTF8Byte (rstart + curr) hr then
+      if lhs.getUTF8Byte (curr.offsetBy lstart) hl == rhs.getUTF8Byte (curr.offsetBy rstart) hr then
        go curr.inc
      else
        false
--- a/src/Init/Data/String/Pattern/Char.lean
+++ b/src/Init/Data/String/Pattern/Char.lean
@ -61,7 +61,7 @@ instance (s : Slice) : Std.Iterators.Iterator (ForwardCharSearcher s) Id (Search

 def finitenessRelation : Std.Iterators.FinitenessRelation (ForwardCharSearcher s) Id where
  rel := InvImage WellFoundedRelation.rel
-      (fun it => s.utf8ByteSize.byteIdx - it.internalState.currPos.offset.byteIdx)
+      (fun it => s.utf8ByteSize - it.internalState.currPos.offset.byteIdx)
  wf := InvImage.wf _ WellFoundedRelation.wf
  subrelation {it it'} h := by
    simp_wf
--- a/src/Init/Data/String/Pattern/Pred.lean
+++ b/src/Init/Data/String/Pattern/Pred.lean
@ -63,7 +63,7 @@ instance (s : Slice) : Std.Iterators.Iterator (ForwardCharPredSearcher s) Id (Se

 def finitenessRelation : Std.Iterators.FinitenessRelation (ForwardCharPredSearcher s) Id where
  rel := InvImage WellFoundedRelation.rel
-      (fun it => s.utf8ByteSize.byteIdx - it.internalState.currPos.offset.byteIdx)
+      (fun it => s.utf8ByteSize - it.internalState.currPos.offset.byteIdx)
  wf := InvImage.wf _ WellFoundedRelation.wf
  subrelation {it it'} h := by
    simp_wf
--- a/src/Init/Data/String/Pattern/String.lean
+++ b/src/Init/Data/String/Pattern/String.lean
@ -33,12 +33,12 @@ partial def buildTable (pat : Slice) : Array String.Pos.Raw :=
  if pat.utf8ByteSize == 0 then
    #[]
  else
-    let arr := Array.emptyWithCapacity pat.utf8ByteSize.byteIdx
+    let arr := Array.emptyWithCapacity pat.utf8ByteSize
    let arr := arr.push 0
    go ⟨1⟩ arr
 where
  go (pos : String.Pos.Raw) (table : Array String.Pos.Raw) :=
-    if h : pos < pat.utf8ByteSize then
+    if h : pos < pat.rawEndPos then
      let patByte := pat.getUTF8Byte pos h
      let distance := computeDistance table[table.size - 1]! patByte table
      let distance := if patByte = pat.getUTF8Byte! distance then distance.inc else distance
@ -77,7 +77,7 @@ instance (s : Slice) : Std.Iterators.Iterator (ForwardSliceSearcher s) Id (Searc
      | .proper needle table stackPos needlePos =>
        (∃ newStackPos newNeedlePos,
          stackPos < newStackPos ∧
-          newStackPos ≤ s.utf8ByteSize ∧
+          newStackPos ≤ s.rawEndPos ∧
          it'.internalState = .proper needle table newStackPos newNeedlePos) ∨
        it'.internalState = .atEnd
      | .atEnd => False
@ -94,7 +94,7 @@ instance (s : Slice) : Std.Iterators.Iterator (ForwardSliceSearcher s) Id (Searc
    | .proper needle table stackPos needlePos =>
      let rec findNext (startPos : String.Pos.Raw)
          (currStackPos : String.Pos.Raw) (needlePos : String.Pos.Raw) (h : stackPos ≤ currStackPos) :=
-        if h1 : currStackPos < s.utf8ByteSize then
+        if h1 : currStackPos < s.rawEndPos then
          let stackByte := s.getUTF8Byte currStackPos h1
          let needlePos := backtrackIfNecessary needle table stackByte needlePos
          let patByte := needle.getUTF8Byte! needlePos
@ -115,7 +115,7 @@ instance (s : Slice) : Std.Iterators.Iterator (ForwardSliceSearcher s) Id (Searc
            ⟨.yield ⟨.proper needle table nextStackPos needlePos⟩ res, hiter⟩
          else
            let needlePos := needlePos.inc
-            if needlePos == needle.utf8ByteSize then
+            if needlePos == needle.rawEndPos then
              let nextStackPos := currStackPos.inc
              let res := .matched (s.pos! startPos) (s.pos! nextStackPos)
              have hiter := by
@ -135,12 +135,12 @@ instance (s : Slice) : Std.Iterators.Iterator (ForwardSliceSearcher s) Id (Searc
                omega
              findNext startPos currStackPos.inc needlePos hinv
        else
-          if startPos != s.utf8ByteSize then
+          if startPos != s.rawEndPos then
            let res := .rejected (s.pos! startPos) (s.pos! currStackPos)
            ⟨.yield ⟨.atEnd⟩ res, by simp⟩
          else
            ⟨.done, by simp⟩
-        termination_by s.utf8ByteSize.byteIdx - currStackPos.byteIdx
+        termination_by s.utf8ByteSize - currStackPos.byteIdx
        decreasing_by
          simp at h1 ⊢
          omega
@ -149,8 +149,8 @@ instance (s : Slice) : Std.Iterators.Iterator (ForwardSliceSearcher s) Id (Searc
    | .atEnd => pure ⟨.done, by simp⟩

 private def toPair : ForwardSliceSearcher s → (Nat × Nat)
-  | .empty pos => (1, s.utf8ByteSize.byteIdx - pos.offset.byteIdx)
-  | .proper _ _ sp _ => (1, s.utf8ByteSize.byteIdx - sp.byteIdx)
+  | .empty pos => (1, s.utf8ByteSize - pos.offset.byteIdx)
+  | .proper _ _ sp _ => (1, s.utf8ByteSize - sp.byteIdx)
  | .atEnd => (0, 0)

 private instance : WellFoundedRelation (ForwardSliceSearcher s) where
@ -213,14 +213,14 @@ def startsWith (s : Slice) (pat : Slice) : Bool :=
      omega
    have hp := by
      simp [Pos.Raw.le_iff]
-    Internal.memcmp s pat s.startPos.offset pat.startPos.offset pat.utf8ByteSize hs hp
+    Internal.memcmp s pat s.startPos.offset pat.startPos.offset pat.rawEndPos hs hp
  else
    false

@[inline]
 def dropPrefix? (s : Slice) (pat : Slice) : Option Slice :=
  if startsWith s pat then
-    some <| s.replaceStart <| s.pos! <| s.startPos.offset + pat.utf8ByteSize
+    some <| s.replaceStart <| s.pos! <| pat.rawEndPos.offsetBy s.startPos.offset
  else
    none

@ -242,21 +242,21 @@ namespace BackwardSliceSearcher
@[inline]
 def endsWith (s : Slice) (pat : Slice) : Bool :=
  if h : pat.utf8ByteSize ≤ s.utf8ByteSize then
-    let sStart := s.endPos.offset - pat.utf8ByteSize
+    let sStart := s.endPos.offset.unoffsetBy pat.rawEndPos
    let patStart := pat.startPos.offset
    have hs := by
      simp [sStart, Pos.Raw.le_iff] at h ⊢
      omega
    have hp := by
      simp [patStart, Pos.Raw.le_iff] at h ⊢
-    Internal.memcmp s pat sStart patStart pat.utf8ByteSize hs hp
+    Internal.memcmp s pat sStart patStart pat.rawEndPos hs hp
  else
    false

@[inline]
 def dropSuffix? (s : Slice) (pat : Slice) : Option Slice :=
  if endsWith s pat then
-    some <| s.replaceEnd <| s.pos! <| s.endPos.offset - pat.utf8ByteSize
+    some <| s.replaceEnd <| s.pos! <| s.endPos.offset.unoffsetBy pat.rawEndPos
  else
    none

--- a/src/Init/Data/String/Slice.lean
+++ b/src/Init/Data/String/Slice.lean
@ -61,7 +61,7 @@ def beq (s1 s2 : Slice) : Bool :=
  if h : s1.utf8ByteSize = s2.utf8ByteSize then
    have h1 := by simp [h, String.Pos.Raw.le_iff]
    have h2 := by simp [h, String.Pos.Raw.le_iff]
-    Internal.memcmp s1 s2 s1.startPos.offset s2.startPos.offset s1.utf8ByteSize h1 h2
+    Internal.memcmp s1 s2 s1.startPos.offset s2.startPos.offset s1.rawEndPos h1 h2
  else
    false

@ -687,7 +687,7 @@ def eqIgnoreAsciiCase (s1 s2 : Slice) : Bool :=
  s1.utf8ByteSize == s2.utf8ByteSize && go s1 s1.startPos.offset s2 s2.startPos.offset
 where
  go (s1 : Slice) (s1Curr : String.Pos.Raw) (s2 : Slice) (s2Curr : String.Pos.Raw) : Bool :=
-    if h : s1Curr < s1.utf8ByteSize ∧ s2Curr < s2.utf8ByteSize then
+    if h : s1Curr < s1.rawEndPos ∧ s2Curr < s2.rawEndPos then
      let c1 := (s1.getUTF8Byte s1Curr h.left).toAsciiLower
      let c2 := (s2.getUTF8Byte s2Curr h.right).toAsciiLower
      if c1 == c2 then
@ -695,7 +695,7 @@ where
      else
        false
    else
-      s1Curr == s1.utf8ByteSize && s2Curr == s2.utf8ByteSize
+      s1Curr == s1.rawEndPos && s2Curr == s2.rawEndPos
  termination_by s1.endPos.offset.byteIdx - s1Curr.byteIdx
  decreasing_by
    simp at h ⊢
@ -740,7 +740,7 @@ instance [Pure m] :
 private def finitenessRelation [Pure m] :
    Std.Iterators.FinitenessRelation (PosIterator s) m where
  rel := InvImage WellFoundedRelation.rel
-      (fun it => s.utf8ByteSize.byteIdx - it.internalState.currPos.offset.byteIdx)
+      (fun it => s.utf8ByteSize - it.internalState.currPos.offset.byteIdx)
  wf := InvImage.wf _ WellFoundedRelation.wf
  subrelation {it it'} h := by
    simp_wf
@ -897,14 +897,14 @@ namespace ByteIterator
 instance [Pure m] : Std.Iterators.Iterator ByteIterator m UInt8 where
  IsPlausibleStep it
    | .yield it' out =>
-      ∃ h1 : it.internalState.offset < it.internalState.s.utf8ByteSize,
+      ∃ h1 : it.internalState.offset < it.internalState.s.rawEndPos,
        it.internalState.s = it'.internalState.s ∧
        it'.internalState.offset = it.internalState.offset.inc ∧
        it.internalState.s.getUTF8Byte it.internalState.offset h1 = out
    | .skip _ => False
-    | .done => ¬ it.internalState.offset < it.internalState.s.utf8ByteSize
+    | .done => ¬ it.internalState.offset < it.internalState.s.rawEndPos
  step := fun ⟨s, offset⟩ =>
-    if h : offset < s.utf8ByteSize then
+    if h : offset < s.rawEndPos then
      pure ⟨.yield ⟨s, offset.inc⟩ (s.getUTF8Byte offset h), by simp [h]⟩
    else
      pure ⟨.done, by simp [h]⟩
@ -912,7 +912,7 @@ instance [Pure m] : Std.Iterators.Iterator ByteIterator m UInt8 where
 private def finitenessRelation [Pure m] :
    Std.Iterators.FinitenessRelation (ByteIterator) m where
  rel := InvImage WellFoundedRelation.rel
-      (fun it => it.internalState.s.utf8ByteSize.byteIdx - it.internalState.offset.byteIdx)
+      (fun it => it.internalState.s.utf8ByteSize - it.internalState.offset.byteIdx)
  wf := InvImage.wf _ WellFoundedRelation.wf
  subrelation {it it'} h := by
    simp_wf
@ -951,7 +951,7 @@ end ByteIterator
 structure RevByteIterator where
  s : Slice
  offset : String.Pos.Raw
-  hinv : offset ≤ s.utf8ByteSize
+  hinv : offset ≤ s.rawEndPos

 set_option doc.verso false
 /--
@ -977,7 +977,7 @@ namespace RevByteIterator
 instance [Pure m] : Std.Iterators.Iterator RevByteIterator m UInt8 where
  IsPlausibleStep it
    | .yield it' out =>
-      ∃ h1 : it.internalState.offset.dec < it.internalState.s.utf8ByteSize,
+      ∃ h1 : it.internalState.offset.dec < it.internalState.s.rawEndPos,
        it.internalState.s = it'.internalState.s ∧
        it.internalState.offset ≠ 0 ∧
        it'.internalState.offset = it.internalState.offset.dec ∧
--- a/src/Lean/Data/Position.lean
+++ b/src/Lean/Data/Position.lean
@ -95,7 +95,7 @@ partial def toPosition (fmap : FileMap) (pos : String.Pos.Raw) : Position :=
      -- Some systems like the delaborator use synthetic positions without an input file,
      -- which would violate `toPositionAux`'s invariant.
      -- Can also happen with EOF errors, which are not strictly inside the file.
-      ⟨fmap.getLastLine, (pos - ps.back!).byteIdx⟩
+      ⟨fmap.getLastLine, ps.back!.byteDistance pos⟩

 /-- Convert a `Lean.Position` to a `String.Pos`. -/
 def ofPosition (text : FileMap) (pos : Position) : String.Pos.Raw :=
--- a/src/Lean/DocString/Add.lean
+++ b/src/Lean/DocString/Add.lean
@ -45,7 +45,7 @@ def validateDocComment
  for (⟨start, stop⟩, err) in errs do
    -- Report errors at their actual location if possible
    if let some pos := pos? then
-      let urlStx : Syntax := .atom (.synthetic (pos + start) (pos + stop)) (str.extract start stop)
+      let urlStx : Syntax := .atom (.synthetic (start.offsetBy pos) (stop.offsetBy pos)) (str.extract start stop)
      logErrorAt urlStx err
    else
      logError err
--- a/src/Lean/DocString/Extension.lean
+++ b/src/Lean/DocString/Extension.lean
@ -215,11 +215,11 @@ def getModuleDoc? (env : Environment) (moduleName : Name) : Option (Array Module
 def getDocStringText [Monad m] [MonadError m] (stx : TSyntax `Lean.Parser.Command.docComment) : m String :=
  match stx.raw[1] with
  | Syntax.atom _ val =>
-    return val.extract 0 (val.endPos - ⟨2⟩)
+    return val.extract 0 (val.endPos.unoffsetBy ⟨2⟩)
  | Syntax.node _ `Lean.Parser.Command.versoCommentBody _ =>
    match stx.raw[1][0] with
    | Syntax.atom _ val =>
-      return val.extract 0 (val.endPos - ⟨2⟩)
+      return val.extract 0 (val.endPos.unoffsetBy ⟨2⟩)
    | _ =>
      throwErrorAt stx "unexpected doc string{indentD stx}"
  | _ =>
--- a/src/Lean/DocString/Parser.lean
+++ b/src/Lean/DocString/Parser.lean
@ -689,11 +689,11 @@ mutual
            let info : SourceInfo :=
              match info with
              | .none => .none
-              | .synthetic start stop c => .synthetic (start + ⟨1⟩) (stop - ⟨1⟩) c
+              | .synthetic start stop c => .synthetic (start.offsetBy ⟨1⟩) (stop.unoffsetBy ⟨1⟩) c
              | .original leading start trailing stop =>
                .original
-                  {leading with stopPos := leading.stopPos + ⟨1⟩} (start + ⟨1⟩)
-                  {trailing with startPos := trailing.startPos - ⟨1⟩} (stop - ⟨1⟩)
+                  {leading with stopPos := leading.stopPos.offsetBy ⟨1⟩} (start.offsetBy ⟨1⟩)
+                  {trailing with startPos := trailing.startPos.unoffsetBy ⟨1⟩} (stop.unoffsetBy ⟨1⟩)
            return s.popSyntax.pushSyntax (.atom info str)
      return s

--- a/src/Lean/Elab/BuiltinCommand.lean
+++ b/src/Lean/Elab/BuiltinCommand.lean
@ -29,7 +29,7 @@ namespace Lean.Elab.Command
  match stx[1] with
  | Syntax.atom _ val =>
    if getVersoModuleDocs (← getEnv) |>.isEmpty then
-      let doc := val.extract 0 (val.endPos - ⟨2⟩)
+      let doc := val.extract 0 (val.endPos.unoffsetBy ⟨2⟩)
      modifyEnv fun env => addMainModuleDoc env ⟨doc, range⟩
    else
      throwError m!"Can't add Markdown-format module docs because there is already Verso-format content present."
--- a/src/Lean/Elab/DeclModifiers.lean
+++ b/src/Lean/Elab/DeclModifiers.lean
@ -162,7 +162,7 @@ def expandOptDocComment? [Monad m] [MonadError m] (optDocComment : Syntax) : m (
  match optDocComment.getOptional? with
  | none   => return none
  | some s => match s[1] with
-    | .atom _ val => return some (val.extract 0 (val.endPos - ⟨2⟩))
+    | .atom _ val => return some (val.extract 0 (val.endPos.unoffsetBy ⟨2⟩))
    | _           => throwErrorAt s "unexpected doc string{indentD s[1]}"

 section Methods
--- a/src/Lean/Meta/TryThis.lean
+++ b/src/Lean/Meta/TryThis.lean
@ -188,7 +188,7 @@ column `range` starts at in that line. -/
 def getIndentAndColumn (map : FileMap) (range : String.Range) : Nat × Nat :=
  let start := map.source.findLineStart range.start
  let body := map.source.findAux (· ≠ ' ') range.start start
-  ((body - start).1, (range.start - start).1)
+  (start.byteDistance body, start.byteDistance range.start)

 /--
 An option allowing the user to customize the ideal input width. Defaults to 100.
--- a/src/Lean/Parser/Basic.lean
+++ b/src/Lean/Parser/Basic.lean
@ -937,7 +937,7 @@ private def isToken (idStartPos idStopPos : String.Pos.Raw) (tk : Option Token)
  | some tk =>
     -- if a token is both a symbol and a valid identifier (i.e. a keyword),
     -- we want it to be recognized as a symbol
-    tk.endPos ≥ idStopPos - idStartPos
+    tk.utf8ByteSize ≥ idStartPos.byteDistance idStopPos


 def mkTokenAndFixPos (startPos : String.Pos.Raw) (tk : Option Token) : ParserFn := fun c s =>
--- a/src/Lean/Server/Completion/CompletionInfoSelection.lean
+++ b/src/Lean/Server/Completion/CompletionInfoSelection.lean
@ -75,7 +75,7 @@ where
    let tailPos := info.tailPos?.get!
    let hoverInfo :=
      if hoverPos < tailPos then
-        HoverInfo.inside (hoverPos - headPos).byteIdx
+        HoverInfo.inside (headPos.byteDistance hoverPos)
      else
        HoverInfo.after
    let ⟨headPosLine, _⟩ := fileMap.toPosition headPos
--- a/src/Lean/Server/Completion/SyntheticCompletion.lean
+++ b/src/Lean/Server/Completion/SyntheticCompletion.lean
@ -100,7 +100,7 @@ private def findSyntheticIdentifierCompletion?
  let tailPos := stx.getTailPos?.get!
  let hoverInfo :=
    if hoverPos < tailPos then
-      HoverInfo.inside (tailPos - hoverPos).byteIdx
+      HoverInfo.inside (hoverPos.byteDistance tailPos)
    else
      HoverInfo.after
  some { hoverInfo, ctx, info := .id stx id danglingDot info.lctx none }
@ -110,7 +110,7 @@ private partial def isCursorOnWhitespace (fileMap : FileMap) (hoverPos : String.

 private partial def isCursorInProperWhitespace (fileMap : FileMap) (hoverPos : String.Pos.Raw) : Bool :=
  (fileMap.source.atEnd hoverPos || (fileMap.source.get hoverPos).isWhitespace)
-    && (fileMap.source.get (hoverPos - ⟨1⟩)).isWhitespace
+    && (fileMap.source.get (hoverPos.unoffsetBy ⟨1⟩)).isWhitespace

 private partial def isSyntheticTacticCompletion
    (fileMap  : FileMap)
--- a/src/Lean/Server/FileWorker/SignatureHelp.lean
+++ b/src/Lean/Server/FileWorker/SignatureHelp.lean
@ -105,7 +105,7 @@ private def isPositionInLineComment (text : FileMap) (pos : String.Pos.Raw) : Bo
  let line := text.source.extract lineStartPos lineEndPos
  let some lineCommentPos := lineCommentPosition? line
    | return false
-  return pos >= lineStartPos + lineCommentPos
+  return pos >= lineCommentPos.offsetBy lineStartPos

 open CandidateKind in
 def findSignatureHelp? (text : FileMap) (ctx? : Option Lsp.SignatureHelpContext) (cmdStx : Syntax)
--- a/src/Lean/Server/FileWorker/WidgetRequests.lean
+++ b/src/Lean/Server/FileWorker/WidgetRequests.lean
@ -192,7 +192,7 @@ private def contains (query text : String) : Bool :=
  ! (kmpSearch query text).isEmpty

 private def matchEndPos (query : String) (startPos : String.Pos.Raw) : String.Pos.Raw :=
-  startPos + ⟨query.utf8ByteSize⟩
+  startPos + query

@[specialize]
 private def hightlightStringMatches? (query text : String) (matchPositions : Array String.Pos.Raw)
@ -208,13 +208,13 @@ private def hightlightStringMatches? (query text : String) (matchPositions : Arr
      break
    let i := mapIdx i
    let globalMatchPos := matchPositions[i]!
-    let matchPos := globalMatchPos - offset
+    let matchPos := globalMatchPos.unoffsetBy offset
    if matchPos >= text.endPos then
      break
    if let some nonMatch := nonMatch? p matchPos then
      r := r.push nonMatch
    let globalMatchEndPos := matchEndPos query globalMatchPos
-    let matchEndPos := globalMatchEndPos - offset
+    let matchEndPos := globalMatchEndPos.unoffsetBy offset
    let «match» := text.extract matchPos matchEndPos
    r := r.push <| .tag highlight (.text «match»)
    p := matchEndPos
@ -255,7 +255,7 @@ private def advanceTaggedTextHighlightState (text : String) (highlighted : α) :
 where
  updateState (text : String) (isHighlighted : Bool) : StateM TaggedTextHighlightState Unit :=
    modify fun s =>
-      let p : String.Pos.Raw := s.p + ⟨text.utf8ByteSize⟩
+      let p : String.Pos.Raw := s.p.increaseBy text.utf8ByteSize
      let ms := updateMatches s.query s.ms p
      let anyHighlight := s.anyHighlight || isHighlighted
      { s with p, ms, anyHighlight }
--- a/src/Lean/Server/InfoUtils.lean
+++ b/src/Lean/Server/InfoUtils.lean
@ -212,7 +212,7 @@ def Info.contains (i : Info) (pos : String.Pos.Raw) (includeStop := false) : Boo
 def Info.size? (i : Info) : Option String.Pos.Raw := do
  let pos ← i.pos?
  let tailPos ← i.tailPos?
-  return tailPos - pos
+  return tailPos.unoffsetBy pos

 -- `Info` without position information are considered to have "infinite" size
 def Info.isSmaller (i₁ i₂ : Info) : Bool :=
@ -225,7 +225,7 @@ def Info.occursInside? (i : Info) (hoverPos : String.Pos.Raw) : Option String.Po
  let headPos ← i.pos?
  let tailPos ← i.tailPos?
  guard (headPos ≤ hoverPos && hoverPos < tailPos)
-  return hoverPos - headPos
+  return hoverPos.unoffsetBy headPos

 def Info.occursInOrOnBoundary (i : Info) (hoverPos : String.Pos.Raw) : Bool := Id.run do
  let some headPos := i.pos?
@ -238,7 +238,7 @@ def InfoTree.smallestInfo? (p : Info → Bool) (t : InfoTree) : Option (ContextI
  let ts := t.deepestNodes fun ctx i _ => if p i then some (ctx, i) else none

  let infos := ts.filterMap fun (ci, i) => do
-    let diff := (← i.tailPos?) - (← i.pos?)
+    let diff := (← i.pos?).byteDistance (← i.tailPos?)
    return (diff, ci, i)

  infos.toArray.getMax? (fun a b => a.1 > b.1) |>.map fun (_, ci, i) => (ci, i)
@ -306,7 +306,7 @@ partial def InfoTree.hoverableInfoAtM? [Monad m] (t : InfoTree) (hoverPos : Stri
      return none
    let priority : HoverableInfoPrio := {
      isHoverPosOnStop := r.stop == hoverPos
-      size := (r.stop - r.start).byteIdx
+      size := r.start.byteDistance r.stop
      isVariableInfo := info matches .ofTermInfo { expr := .fvar .., .. }
      isPartialTermInfo := info matches .ofPartialTermInfo ..
    }
--- a/src/Lean/Syntax.lean
+++ b/src/Lean/Syntax.lean
@ -247,7 +247,7 @@ private def updateInfo : SourceInfo → String.Pos.Raw → String.Pos.Raw → So
  | info, _, _ => info

 private def chooseNiceTrailStop (trail : Substring) : String.Pos.Raw :=
-trail.startPos + trail.posOf '\n'
+  (trail.posOf '\n').offsetBy trail.startPos

 /-- Remark: the State `String.Pos` is the `SourceInfo.trailing.stopPos` of the previous token,
   or the beginning of the String. -/
@ -318,10 +318,10 @@ def identComponents (stx : Syntax) (nFields? : Option Nat := none) : List Syntax
          rawComps
      if nameComps.length == rawComps.length then
        return nameComps.zip rawComps |>.map fun (id, ss) =>
-          let off := ss.startPos - rawStr.startPos
+          let off := ss.startPos.unoffsetBy rawStr.startPos
          let lead := if off == 0 then lead else "".toSubstring
          let trail := if ss.stopPos == rawStr.stopPos then trail else "".toSubstring
-          let info := original lead (pos + off) trail (pos + off + ⟨ss.bsize⟩)
+          let info := original lead (pos.offsetBy off) trail (pos.offsetBy off |>.offsetBy ⟨ss.bsize⟩)
          ident info ss id []
    -- if re-parsing failed, just give them all the same span
    nameComps.map fun n => ident si n.toString.toSubstring n []
--- a/tests/compiler/str.lean
+++ b/tests/compiler/str.lean
@ -1,4 +1,4 @@
-def showChars : Nat → String → String.Pos → IO Unit
+def showChars : Nat → String → String.Pos.Raw → IO Unit
 | 0,     _, _   => pure ()
 | n+1,   s, idx => do
  unless s.atEnd idx  do
@ -7,14 +7,14 @@ def showChars : Nat → String → String.Pos → IO Unit

 def main : IO UInt32 :=
 let s₁             := "hello α_world_β";
-let b : String.Pos := 0;
+let b : String.Pos.Raw := 0;
 let e              := s₁.endPos;
 IO.println (s₁.extract b e) *>
 IO.println (s₁.extract (b+ "  ") e) *>
-IO.println (s₁.extract (b+ "  ") (e-⟨1⟩)) *>
-IO.println (s₁.extract (b+⟨2⟩) (e-⟨2⟩)) *>
-IO.println (s₁.extract (b+⟨7⟩) e) *>
-IO.println (s₁.extract (b+⟨8⟩) e) *>
+IO.println (s₁.extract (b+ "  ") (e.unoffsetBy ⟨1⟩)) *>
+IO.println (s₁.extract (b.offsetBy ⟨2⟩) (e.unoffsetBy ⟨2⟩)) *>
+IO.println (s₁.extract (b.offsetBy ⟨7⟩) e) *>
+IO.println (s₁.extract (b.offsetBy ⟨8⟩) e) *>
 IO.println (toString e) *>
 IO.println (repr "   aaa   ".trim) *>
 showChars s₁.length s₁ 0  *>
--- a/tests/lean/run/frontend_meeting_2022_09_13.lean
+++ b/tests/lean/run/frontend_meeting_2022_09_13.lean
@ -60,7 +60,7 @@ end
 open Lean Elab Command in
@[command_elab commandComment] def elabCommandComment : CommandElab := fun stx => do
   let .atom _ val := stx[1] | return ()
-   let str := val.extract 0 (val.endPos - ⟨3⟩)
+   let str := val.extract 0 (val.endPos.unoffsetBy ⟨3⟩)
   IO.println s!"str := {repr str}"

 //- My command comment hello world -//