diff --git a/src/Init/Core.lean b/src/Init/Core.lean
index abeca11449..2d9fbff8ee 100644
--- a/src/Init/Core.lean
+++ b/src/Init/Core.lean
@@ -1580,6 +1580,7 @@ instance {p q : Prop} [d : Decidable (p ↔ q)] : Decidable (p = q) :=
 
 gen_injective_theorems% Array
 gen_injective_theorems% BitVec
+gen_injective_theorems% ByteArray
 gen_injective_theorems% Char
 gen_injective_theorems% DoResultBC
 gen_injective_theorems% DoResultPR
diff --git a/src/Init/Data/BitVec/Lemmas.lean b/src/Init/Data/BitVec/Lemmas.lean
index 11898eea0a..eaeecfa675 100644
--- a/src/Init/Data/BitVec/Lemmas.lean
+++ b/src/Init/Data/BitVec/Lemmas.lean
@@ -1100,6 +1100,10 @@ theorem toInt_setWidth' {m n : Nat} (p : m ≤ n) {x : BitVec m} :
   rw [setWidth'_eq, toFin_setWidth, Fin.val_ofNat, Fin.coe_castLE, val_toFin,
     Nat.mod_eq_of_lt (by apply BitVec.toNat_lt_twoPow_of_le p)]
 
+theorem toNat_setWidth_of_le {w w' : Nat} {b : BitVec w} (h : w ≤ w') : (b.setWidth w').toNat = b.toNat := by
+  rw [BitVec.toNat_setWidth, Nat.mod_eq_of_lt]
+  exact BitVec.toNat_lt_twoPow_of_le h
+
 /-! ## extractLsb -/
 
 @[simp, grind =]
@@ -1287,6 +1291,17 @@ theorem extractLsb'_eq_zero {x : BitVec w} {start : Nat} :
   ext i hi
   omega
 
+theorem extractLsb'_setWidth_of_le {b : BitVec w} {start len w' : Nat} (h : start + len ≤ w') :
+    (b.setWidth w').extractLsb' start len = b.extractLsb' start len := by
+  ext i h_i
+  simp
+  omega
+
+theorem setWidth_extractLsb'_of_le {c : BitVec w} (h : len₁ ≤ len₂) :
+    (c.extractLsb' start len₂).setWidth len₁ = c.extractLsb' start len₁ := by
+  ext i hi
+  simp [show i < len₂ by omega]
+
 /-! ### allOnes -/
 
 @[simp, grind =] theorem toNat_allOnes : (allOnes v).toNat = 2^v - 1 := by
@@ -1530,6 +1545,12 @@ theorem extractLsb_and {x : BitVec w} {hi lo : Nat} :
 @[simp, grind =] theorem ofNat_and {x y : Nat} : BitVec.ofNat w (x &&& y) = BitVec.ofNat w x &&& BitVec.ofNat w y :=
   eq_of_toNat_eq (by simp [Nat.and_mod_two_pow])
 
+theorem and_or_distrib_left {x y z : BitVec w} : x &&& (y ||| z) = (x &&& y) ||| (x &&& z) :=
+  BitVec.eq_of_getElem_eq (by simp [Bool.and_or_distrib_left])
+
+theorem and_or_distrib_right {x y z : BitVec w} : (x ||| y) &&& z = (x &&& z) ||| (y &&& z) :=
+  BitVec.eq_of_getElem_eq (by simp [Bool.and_or_distrib_right])
+
 /-! ### xor -/
 
 @[simp, grind =] theorem toNat_xor (x y : BitVec v) :
@@ -2180,6 +2201,10 @@ theorem msb_ushiftRight {x : BitVec w} {n : Nat} :
   have := lt_of_getLsbD ha
   omega
 
+theorem setWidth_ushiftRight_eq_extractLsb {b : BitVec w} : (b >>> w').setWidth w'' = b.extractLsb' w' w'' := by
+  ext i hi
+  simp
+
 /-! ### ushiftRight reductions from BitVec to Nat -/
 
 @[simp, grind =]
@@ -2970,10 +2995,9 @@ theorem shiftLeft_eq_concat_of_lt {x : BitVec w} {n : Nat} (hn : n < w) :
 /-- Combine adjacent `extractLsb'` operations into a single `extractLsb'`. -/
 theorem extractLsb'_append_extractLsb'_eq_extractLsb' {x : BitVec w} (h : start₂ = start₁ + len₁) :
     ((x.extractLsb' start₂ len₂) ++ (x.extractLsb' start₁ len₁)) =
-    (x.extractLsb' start₁ (len₁ + len₂)).cast (by omega) := by
+      x.extractLsb' start₁ (len₂ + len₁) := by
   ext i h
-  simp only [getElem_append, getElem_extractLsb', dite_eq_ite, getElem_cast, ite_eq_left_iff,
-    Nat.not_lt]
+  simp only [getElem_append, getElem_extractLsb', dite_eq_ite, ite_eq_left_iff, Nat.not_lt]
   intro hi
   congr 1
   omega
@@ -3085,6 +3109,51 @@ theorem extractLsb'_append_eq_of_le {v w} {xhi : BitVec v} {xlo : BitVec w}
     extractLsb' start len (xhi ++ xlo) = extractLsb' (start - w) len xhi := by
   simp [extractLsb'_append_eq_ite, show ¬ start < w by omega]
 
+theorem extractLsb'_append_eq_left {a : BitVec w} {b : BitVec w'} : (a ++ b).extractLsb' w' w = a := by
+  simp [BitVec.extractLsb'_append_eq_of_le]
+
+theorem extractLsb'_append_eq_right {a : BitVec w} {b : BitVec w'} : (a ++ b).extractLsb' 0 w' = b := by
+  simp [BitVec.extractLsb'_append_eq_of_add_le]
+
+theorem setWidth_append_eq_right {a : BitVec w} {b : BitVec w'} : (a ++ b).setWidth w' = b := by
+  ext i hi
+  simp [getLsbD_append, hi]
+
+theorem append_left_inj {s₁ s₂ : BitVec w} (t : BitVec w') : s₁ ++ t = s₂ ++ t ↔ s₁ = s₂ := by
+  refine ⟨fun h => ?_, fun h => h ▸ rfl⟩
+  ext i hi
+  simpa [getElem_append, dif_neg] using congrArg (·[i + w']'(by omega)) h
+
+theorem append_right_inj (s : BitVec w) {t₁ t₂ : BitVec w'} : s ++ t₁ = s ++ t₂ ↔ t₁ = t₂ := by
+  refine ⟨fun h => ?_, fun h => h ▸ rfl⟩
+  ext i hi
+  simpa [getElem_append, hi] using congrArg (·[i]) h
+
+theorem setWidth_append_eq_shiftLeft_setWidth_or {b : BitVec w} {b' : BitVec w'} :
+    (b ++ b').setWidth w'' = (b.setWidth w'' <<< w') ||| b'.setWidth w'' := by
+  ext i hi
+  simp only [getElem_setWidth, getElem_or, getElem_shiftLeft]
+  rw [getLsbD_append]
+  split <;> simp_all
+
+theorem setWidth_append_append_eq_shiftLeft_setWidth_or {b : BitVec w} {b' : BitVec w'} {b'' : BitVec w''} :
+    (b ++ b' ++ b'').setWidth w''' = (b.setWidth w''' <<< (w' + w'')) ||| (b'.setWidth w''' <<< w'') ||| b''.setWidth w''' := by
+  rw [BitVec.setWidth_append_eq_shiftLeft_setWidth_or,
+    BitVec.setWidth_append_eq_shiftLeft_setWidth_or,
+    BitVec.shiftLeft_or_distrib, BitVec.shiftLeft_add]
+
+theorem setWidth_append_append_append_eq_shiftLeft_setWidth_or {b : BitVec w} {b' : BitVec w'} {b'' : BitVec w''} {b''' : BitVec w'''} :
+    (b ++ b' ++ b'' ++ b''').setWidth w'''' = (b.setWidth w'''' <<< (w' + w'' + w''')) ||| (b'.setWidth w'''' <<< (w'' + w''')) |||
+      (b''.setWidth w'''' <<< w''') ||| b'''.setWidth w'''' := by
+  simp only [BitVec.setWidth_append_eq_shiftLeft_setWidth_or, BitVec.shiftLeft_or_distrib, BitVec.shiftLeft_add]
+
+theorem and_setWidth_allOnes (w' w : Nat) (b : BitVec (w' + w)) :
+    b &&& (BitVec.allOnes w).setWidth (w' + w) = 0#w' ++ b.setWidth w := by
+  ext i hi
+  simp only [getElem_and, getElem_setWidth, getLsbD_allOnes]
+  rw [BitVec.getElem_append]
+  split <;> simp_all
+
 /-! ### rev -/
 
 @[grind =]
@@ -4041,6 +4110,9 @@ instance instLawfulOrderLT : LawfulOrderLT (BitVec n) := by
   apply LawfulOrderLT.of_le
   simpa using fun _ _ => BitVec.lt_asymm
 
+theorem length_pos_of_lt {b b' : BitVec w} (h : b < b') : 0 < w :=
+  length_pos_of_ne (BitVec.ne_of_lt h)
+
 protected theorem umod_lt (x : BitVec n) {y : BitVec n} : 0 < y → x % y < y := by
   simp only [ofNat_eq_ofNat, lt_def, toNat_ofNat, Nat.zero_mod]
   apply Nat.mod_lt
@@ -4112,6 +4184,14 @@ theorem lt_of_msb_false_of_msb_true {x y : BitVec w} (hx : x.msb = false) (hy :
   simp
   omega
 
+theorem lt_add_one {b : BitVec w} (h : b ≠ allOnes w) : b < b + 1 := by
+  simp only [ne_eq, ← toNat_inj, toNat_allOnes] at h
+  simp only [BitVec.lt_def, ofNat_eq_ofNat, toNat_add, toNat_ofNat, Nat.add_mod_mod]
+  rw [Nat.mod_eq_of_lt]
+  · exact Nat.lt_add_one _
+  · have := b.toNat_lt_twoPow_of_le (Nat.le_refl _)
+    omega
+
 /-! ### udiv -/
 
 theorem udiv_def {x y : BitVec n} : x / y = BitVec.ofNat n (x.toNat / y.toNat) := by
@@ -5257,7 +5337,7 @@ theorem replicate_succ' {x : BitVec w} :
     (replicate n x ++ x).cast (by rw [Nat.mul_succ]) := by
   simp [replicate_append_self]
 
-theorem BitVec.setWidth_add_eq_mod {x y : BitVec w} : BitVec.setWidth i (x + y) = (BitVec.setWidth i x + BitVec.setWidth i y) % (BitVec.twoPow i w) := by
+theorem setWidth_add_eq_mod {x y : BitVec w} : BitVec.setWidth i (x + y) = (BitVec.setWidth i x + BitVec.setWidth i y) % (BitVec.twoPow i w) := by
   apply BitVec.eq_of_toNat_eq
   rw [toNat_setWidth]
   simp only [toNat_setWidth, toNat_add, toNat_umod, Nat.add_mod_mod, Nat.mod_add_mod, toNat_twoPow]
@@ -5266,6 +5346,14 @@ theorem BitVec.setWidth_add_eq_mod {x y : BitVec w} : BitVec.setWidth i (x + y)
   · have hk : 2 ^ w < 2 ^ i := Nat.pow_lt_pow_of_lt (by decide) (Nat.lt_of_not_le h)
     rw [Nat.mod_eq_of_lt hk, Nat.mod_mod_eq_mod_mod_of_dvd (Nat.pow_dvd_pow _ (Nat.le_of_not_le h))]
 
+theorem setWidth_setWidth_eq_self {a : BitVec w} {w' : Nat} (h : a < BitVec.twoPow w w') : (a.setWidth w').setWidth w = a := by
+  by_cases hw : w' < w
+  · simp only [toNat_eq, toNat_setWidth]
+    rw [Nat.mod_mod_of_dvd' (Nat.pow_dvd_pow _ (Nat.le_of_lt hw)), Nat.mod_eq_of_lt]
+    rwa [BitVec.lt_def, BitVec.toNat_twoPow_of_lt hw] at h
+  · rw [BitVec.lt_def, BitVec.toNat_twoPow_of_le (by omega)] at h
+    simp at h
+
 /-! ### intMin -/
 
 @[grind =]
@@ -5928,6 +6016,12 @@ theorem getElem_eq_true_of_lt_of_le {x : BitVec w} (hk' : k < w) (hlt: x.toNat <
       omega
   · simp [show w ≤ k + k' by omega] at hk'
 
+theorem not_lt_iff {b : BitVec w} : ~~~b < b ↔ 0 < w ∧ b.msb = true := by
+  refine ⟨fun h => ?_, fun ⟨hw, hb⟩ => ?_⟩
+  · have := length_pos_of_lt h
+    exact ⟨this, by rwa [← ult_iff_lt, ult_eq_msb_of_msb_neq (by simp_all)] at h⟩
+  · rwa [← ult_iff_lt, ult_eq_msb_of_msb_neq (by simp_all)]
+
 /-! ### Count leading zeros -/
 
 theorem clzAuxRec_zero (x : BitVec w) :
diff --git a/src/Init/Data/ByteArray.lean b/src/Init/Data/ByteArray.lean
index 4f1cf8249b..a9a32612cd 100644
--- a/src/Init/Data/ByteArray.lean
+++ b/src/Init/Data/ByteArray.lean
@@ -7,6 +7,8 @@ module
 
 prelude
 public import Init.Data.ByteArray.Basic
+public import Init.Data.ByteArray.Bootstrap
 public import Init.Data.ByteArray.Extra
+public import Init.Data.ByteArray.Lemmas
 
 public section
diff --git a/src/Init/Data/ByteArray/Basic.lean b/src/Init/Data/ByteArray/Basic.lean
index 41654438b8..0faadabd3e 100644
--- a/src/Init/Data/ByteArray/Basic.lean
+++ b/src/Init/Data/ByteArray/Basic.lean
@@ -11,16 +11,11 @@ public import Init.Data.UInt.Basic
 public import Init.Data.UInt.BasicAux
 import all Init.Data.UInt.BasicAux
 public import Init.Data.Option.Basic
+public import Init.Data.Array.Extract
 
 @[expose] public section
 universe u
 
-structure ByteArray where
-  data : Array UInt8
-
-attribute [extern "lean_byte_array_mk"] ByteArray.mk
-attribute [extern "lean_byte_array_data"] ByteArray.data
-
 namespace ByteArray
 
 deriving instance BEq for ByteArray
@@ -30,29 +25,15 @@ attribute [ext] ByteArray
 instance : DecidableEq ByteArray :=
   fun _ _ => decidable_of_decidable_of_iff ByteArray.ext_iff.symm
 
-@[extern "lean_mk_empty_byte_array"]
-def emptyWithCapacity (c : @& Nat) : ByteArray :=
-  { data := #[] }
-
 @[deprecated emptyWithCapacity (since := "2025-03-12")]
 abbrev mkEmpty := emptyWithCapacity
 
-def empty : ByteArray := emptyWithCapacity 0
-
 instance : Inhabited ByteArray where
   default := empty
 
 instance : EmptyCollection ByteArray where
   emptyCollection := ByteArray.empty
 
-@[extern "lean_byte_array_push"]
-def push : ByteArray → UInt8 → ByteArray
-  | ⟨bs⟩, b => ⟨bs.push b⟩
-
-@[extern "lean_byte_array_size"]
-def size : (@& ByteArray) → Nat
-  | ⟨bs⟩ => bs.size
-
 @[extern "lean_sarray_size", simp]
 def usize (a : @& ByteArray) : USize :=
   a.size.toUSize
@@ -106,11 +87,31 @@ def copySlice (src : @& ByteArray) (srcOff : Nat) (dest : ByteArray) (destOff le
 def extract (a : ByteArray) (b e : Nat) : ByteArray :=
   a.copySlice b empty 0 (e - b)
 
-protected def append (a : ByteArray) (b : ByteArray) : ByteArray :=
+protected def fastAppend (a : ByteArray) (b : ByteArray) : ByteArray :=
   -- we assume that `append`s may be repeated, so use asymptotic growing; use `copySlice` directly to customize
   b.copySlice 0 a a.size b.size false
 
-instance : Append ByteArray := ⟨ByteArray.append⟩
+@[simp]
+theorem size_data {a : ByteArray} :
+  a.data.size = a.size := rfl
+
+@[csimp]
+theorem append_eq_fastAppend : @ByteArray.append = @ByteArray.fastAppend := by
+  funext a b
+  ext1
+  apply Array.ext'
+  simp [ByteArray.fastAppend, copySlice, ← size_data, - Array.append_assoc]
+
+-- Needs to come after the `csimp` lemma
+instance : Append ByteArray where
+  append := ByteArray.append
+
+@[simp]
+theorem append_eq {a b : ByteArray} : a.append b = a ++ b := rfl
+
+@[simp]
+theorem fastAppend_eq {a b : ByteArray} : a.fastAppend b = a ++ b := by
+  simp [← append_eq_fastAppend]
 
 def toList (bs : ByteArray) : List UInt8 :=
   let rec loop (i : Nat) (r : List UInt8) :=
@@ -350,13 +351,4 @@ def prevn : Iterator → Nat → Iterator
 end Iterator
 end ByteArray
 
-/--
-Converts a list of bytes into a `ByteArray`.
--/
-def List.toByteArray (bs : List UInt8) : ByteArray :=
-  let rec loop
-    | [],    r => r
-    | b::bs, r => loop bs (r.push b)
-  loop bs ByteArray.empty
-
 instance : ToString ByteArray := ⟨fun bs => bs.toList.toString⟩
diff --git a/src/Init/Data/ByteArray/Bootstrap.lean b/src/Init/Data/ByteArray/Bootstrap.lean
new file mode 100644
index 0000000000..4e9a5df2ea
--- /dev/null
+++ b/src/Init/Data/ByteArray/Bootstrap.lean
@@ -0,0 +1,53 @@
+/-
+Copyright (c) 2025 Lean FRO, LLC. All rights reserved.
+Released under Apache 2.0 license as described in the file LICENSE.
+Author: Markus Himmel
+-/
+module
+
+prelude
+public import Init.Prelude
+public import Init.Data.List.Basic
+
+public section
+
+namespace ByteArray
+
+@[simp]
+theorem data_push {a : ByteArray} {b : UInt8} : (a.push b).data = a.data.push b := rfl
+
+@[expose]
+protected def append (a b : ByteArray) : ByteArray :=
+  ⟨⟨a.data.toList ++ b.data.toList⟩⟩
+
+@[simp]
+theorem toList_data_append' {a b : ByteArray} :
+    (a.append b).data.toList = a.data.toList ++ b.data.toList := by
+  have ⟨⟨a⟩⟩ := a
+  have ⟨⟨b⟩⟩ := b
+  rfl
+
+theorem ext : {x y : ByteArray} → x.data = y.data → x = y
+  | ⟨_⟩, ⟨_⟩, rfl => rfl
+
+end ByteArray
+
+@[simp]
+theorem List.toList_data_toByteArray {l : List UInt8} :
+    l.toByteArray.data.toList = l := by
+  rw [List.toByteArray]
+  suffices ∀ a b, (List.toByteArray.loop a b).data.toList = b.data.toList ++ a by
+    simpa using this l ByteArray.empty
+  intro a b
+  fun_induction List.toByteArray.loop a b with simp_all [toList_push]
+where
+  toList_push {xs : Array UInt8} {x : UInt8} : (xs.push x).toList = xs.toList ++ [x] := by
+    have ⟨xs⟩ := xs
+    simp [Array.push, List.concat_eq_append]
+
+theorem List.toByteArray_append' {l l' : List UInt8} :
+    (l ++ l').toByteArray = l.toByteArray.append l'.toByteArray :=
+  ByteArray.ext (ext (by simp))
+where
+  ext : {x y : Array UInt8} → x.toList = y.toList → x = y
+  | ⟨_⟩, ⟨_⟩, rfl => rfl
diff --git a/src/Init/Data/ByteArray/Lemmas.lean b/src/Init/Data/ByteArray/Lemmas.lean
new file mode 100644
index 0000000000..4026d99b58
--- /dev/null
+++ b/src/Init/Data/ByteArray/Lemmas.lean
@@ -0,0 +1,252 @@
+/-
+Copyright (c) 2025 Lean FRO, LLC. All rights reserved.
+Released under Apache 2.0 license as described in the file LICENSE.
+Author: Markus Himmel
+-/
+module
+
+prelude
+public import Init.Data.ByteArray.Basic
+public import Init.Data.Array.Extract
+
+public section
+
+@[simp]
+theorem ByteArray.data_empty : ByteArray.empty.data = #[] := rfl
+
+@[simp]
+theorem ByteArray.data_extract {a : ByteArray} {b e : Nat} :
+    (a.extract b e).data = a.data.extract b e := by
+  simp [extract, copySlice]
+  by_cases b ≤ e
+  · rw [(by omega : b + (e - b) = e)]
+  · rw [Array.extract_eq_empty_of_le (by omega), Array.extract_eq_empty_of_le (by omega)]
+
+@[simp]
+theorem ByteArray.extract_zero_size {b : ByteArray} : b.extract 0 b.size = b := by
+  ext1
+  simp
+
+@[simp]
+theorem ByteArray.extract_same {b : ByteArray} {i : Nat} : b.extract i i = ByteArray.empty := by
+  ext1
+  simp [Nat.min_le_left]
+
+theorem ByteArray.fastAppend_eq_copySlice {a b : ByteArray} :
+  a.fastAppend b = b.copySlice 0 a a.size b.size false := rfl
+
+@[simp]
+theorem List.toByteArray_append {l l' : List UInt8} :
+    (l ++ l').toByteArray = l.toByteArray ++ l'.toByteArray := by
+  simp [List.toByteArray_append']
+
+@[simp]
+theorem ByteArray.toList_data_append {l l' : ByteArray} :
+    (l ++ l').data.toList = l.data.toList ++ l'.data.toList := by
+  simp [← append_eq]
+
+@[simp]
+theorem ByteArray.data_append {l l' : ByteArray} :
+    (l ++ l').data = l.data ++ l'.data := by
+  simp [← Array.toList_inj]
+
+@[simp]
+theorem ByteArray.size_empty : ByteArray.empty.size = 0 := by
+  simp [← ByteArray.size_data]
+
+@[simp]
+theorem List.data_toByteArray {l : List UInt8} :
+    l.toByteArray.data = l.toArray := by
+  rw [List.toByteArray]
+  suffices ∀ a b, (List.toByteArray.loop a b).data = b.data ++ a.toArray by
+    simpa using this l ByteArray.empty
+  intro a b
+  fun_induction List.toByteArray.loop a b with simp_all
+
+@[simp]
+theorem List.size_toByteArray {l : List UInt8} :
+    l.toByteArray.size = l.length := by
+  simp [← ByteArray.size_data]
+
+@[simp]
+theorem List.toByteArray_nil : List.toByteArray [] = ByteArray.empty := rfl
+
+@[simp]
+theorem ByteArray.empty_append {b : ByteArray} : ByteArray.empty ++ b = b := by
+  ext1
+  simp
+
+@[simp]
+theorem ByteArray.append_empty {b : ByteArray} : b ++ ByteArray.empty = b := by
+  ext1
+  simp
+
+@[simp, grind]
+theorem ByteArray.size_append {a b : ByteArray} : (a ++ b).size = a.size + b.size := by
+  simp [← size_data]
+
+@[simp]
+theorem ByteArray.size_eq_zero_iff {a : ByteArray} : a.size = 0 ↔ a = ByteArray.empty := by
+  refine ⟨fun h => ?_, fun h => h ▸ ByteArray.size_empty⟩
+  ext1
+  simp [← Array.size_eq_zero_iff, h]
+
+theorem ByteArray.getElem_eq_getElem_data {a : ByteArray} {i : Nat} {h : i < a.size} :
+    a[i] = a.data[i]'(by simpa [← size_data]) := rfl
+
+@[simp]
+theorem ByteArray.getElem_append_left {i : Nat} {a b : ByteArray} {h : i < (a ++ b).size}
+    (hlt : i < a.size) : (a ++ b)[i] = a[i] := by
+  simp only [getElem_eq_getElem_data, data_append]
+  rw [Array.getElem_append_left (by simpa)]
+
+theorem ByteArray.getElem_append_right {i : Nat} {a b : ByteArray} {h : i < (a ++ b).size}
+    (hle : a.size ≤ i) : (a ++ b)[i] = b[i - a.size]'(by simp_all; omega) := by
+  simp only [getElem_eq_getElem_data, data_append]
+  rw [Array.getElem_append_right (by simpa)]
+  simp
+
+@[simp]
+theorem List.getElem_toByteArray {l : List UInt8} {i : Nat} {h : i < l.toByteArray.size} :
+    l.toByteArray[i]'h = l[i]'(by simp_all) := by
+  simp [ByteArray.getElem_eq_getElem_data]
+
+theorem List.getElem_eq_getElem_toByteArray {l : List UInt8} {i : Nat} {h : i < l.length} :
+    l[i]'h = l.toByteArray[i]'(by simp_all) := by
+  simp
+
+@[simp]
+theorem ByteArray.size_extract {a : ByteArray} {b e : Nat} :
+    (a.extract b e).size = min e a.size - b := by
+  simp [← size_data]
+
+@[simp]
+theorem ByteArray.extract_eq_empty_iff {b : ByteArray} {i j : Nat} : b.extract i j = ByteArray.empty ↔ min j b.size ≤ i := by
+  rw [← size_eq_zero_iff, size_extract]
+  omega
+
+@[simp]
+theorem ByteArray.extract_add_left {b : ByteArray} {i j : Nat} : b.extract (i + j) i = ByteArray.empty := by
+  simp only [extract_eq_empty_iff]
+  exact Nat.le_trans (Nat.min_le_left _ _) (by simp)
+
+@[simp]
+theorem ByteArray.append_eq_empty_iff {a b : ByteArray} :
+    a ++ b = ByteArray.empty ↔ a = ByteArray.empty ∧ b = ByteArray.empty := by
+  simp [← size_eq_zero_iff, size_append]
+
+@[simp]
+theorem List.toByteArray_eq_empty {l : List UInt8} :
+    l.toByteArray = ByteArray.empty ↔ l = [] := by
+  simp [← ByteArray.size_eq_zero_iff]
+
+theorem ByteArray.append_right_inj {ys₁ ys₂ : ByteArray} (xs : ByteArray) :
+    xs ++ ys₁ = xs ++ ys₂ ↔ ys₁ = ys₂ := by
+  simp [ByteArray.ext_iff, Array.append_right_inj]
+
+@[simp]
+theorem ByteArray.extract_append_extract {a : ByteArray} {i j k : Nat} :
+    a.extract i j ++ a.extract j k = a.extract (min i j) (max j k) := by
+  ext1
+  simp
+
+theorem ByteArray.extract_eq_extract_append_extract {a : ByteArray} {i k : Nat} (j : Nat)
+    (hi : i ≤ j) (hk : j ≤ k) :
+    a.extract i k = a.extract i j ++ a.extract j k := by
+  simp
+  rw [Nat.min_eq_left hi, Nat.max_eq_right hk]
+
+theorem ByteArray.append_inj_left {xs₁ xs₂ ys₁ ys₂ : ByteArray} (h : xs₁ ++ ys₁ = xs₂ ++ ys₂) (hl : xs₁.size = xs₂.size) : xs₁ = xs₂ := by
+  simp only [ByteArray.ext_iff, ← ByteArray.size_data, ByteArray.data_append] at *
+  exact Array.append_inj_left h hl
+
+theorem ByteArray.extract_append_eq_right {a b : ByteArray} {i : Nat} (hi : i = a.size) :
+    (a ++ b).extract i (a ++ b).size = b := by
+  subst hi
+  ext1
+  simp [← size_data]
+
+theorem ByteArray.extract_append_eq_left {a b : ByteArray} {i : Nat} (hi : i = a.size) :
+    (a ++ b).extract 0 i = a := by
+  subst hi
+  ext1
+  simp
+
+theorem ByteArray.extract_append_size_left {a b : ByteArray} {i : Nat} :
+    (a ++ b).extract i a.size = a.extract i a.size := by
+  ext1
+  simp
+
+theorem ByteArray.extract_append_size_add {a b : ByteArray} {i j : Nat} :
+    (a ++ b).extract (a.size + i) (a.size + j) = b.extract i j := by
+  ext1
+  simp
+
+theorem ByteArray.extract_append  {as bs : ByteArray} {i j : Nat} :
+    (as ++ bs).extract i j = as.extract i j ++ bs.extract (i - as.size) (j - as.size) := by
+  ext1
+  simp
+
+theorem ByteArray.extract_append_size_add' {a b : ByteArray} {i j k : Nat} (h : k = a.size) :
+    (a ++ b).extract (k + i) (k + j) = b.extract i j := by
+  cases h
+  rw [extract_append_size_add]
+
+theorem ByteArray.extract_extract {a : ByteArray} {i j k l : Nat} :
+    (a.extract i j).extract k l = a.extract (i + k) (min (i + l) j) := by
+  ext1
+  simp
+
+theorem ByteArray.getElem_extract_aux {xs : ByteArray} {start stop : Nat} (h : i < (xs.extract start stop).size) :
+    start + i < xs.size := by
+  rw [size_extract] at h; apply Nat.add_lt_of_lt_sub'; apply Nat.lt_of_lt_of_le h
+  apply Nat.sub_le_sub_right; apply Nat.min_le_right
+
+theorem ByteArray.getElem_extract {i : Nat} {b : ByteArray} {start stop : Nat}
+    (h) : (b.extract start stop)[i]'h = b[start + i]'(getElem_extract_aux h) := by
+  simp [getElem_eq_getElem_data]
+
+theorem ByteArray.extract_eq_extract_left {a : ByteArray} {i i' j : Nat} :
+    a.extract i j = a.extract i' j ↔ min j a.size - i = min j a.size - i' := by
+  simp [ByteArray.ext_iff, Array.extract_eq_extract_left]
+
+theorem ByteArray.extract_add_one {a : ByteArray} {i : Nat} (ha : i + 1 ≤ a.size) :
+    a.extract i (i + 1) = [a[i]].toByteArray := by
+  ext
+  · simp
+    omega
+  · rename_i j hj hj'
+    obtain rfl : j = 0 := by simpa using hj'
+    simp [ByteArray.getElem_eq_getElem_data]
+
+theorem ByteArray.extract_add_two {a : ByteArray} {i : Nat} (ha : i + 2 ≤ a.size) :
+    a.extract i (i + 2) = [a[i], a[i + 1]].toByteArray := by
+  rw [extract_eq_extract_append_extract (i + 1) (by simp) (by omega),
+    extract_add_one (by omega), extract_add_one (by omega)]
+  simp [← List.toByteArray_append]
+
+theorem ByteArray.extract_add_three {a : ByteArray} {i : Nat} (ha : i + 3 ≤ a.size) :
+    a.extract i (i + 3) = [a[i], a[i + 1], a[i + 2]].toByteArray := by
+  rw [extract_eq_extract_append_extract (i + 1) (by simp) (by omega),
+    extract_add_one (by omega), extract_add_two (by omega)]
+  simp [← List.toByteArray_append]
+
+theorem ByteArray.extract_add_four {a : ByteArray} {i : Nat} (ha : i + 4 ≤ a.size) :
+    a.extract i (i + 4) = [a[i], a[i + 1], a[i + 2], a[i + 3]].toByteArray := by
+  rw [extract_eq_extract_append_extract (i + 1) (by simp) (by omega),
+    extract_add_one (by omega), extract_add_three (by omega)]
+  simp [← List.toByteArray_append]
+
+theorem ByteArray.append_assoc {a b c : ByteArray} : a ++ b ++ c = a ++ (b ++ c) := by
+  ext1
+  simp
+
+@[simp]
+theorem ByteArray.toList_empty : ByteArray.empty.toList = [] := by
+  simp [ByteArray.toList, ByteArray.toList.loop]
+
+theorem ByteArray.copySlice_eq_append {src : ByteArray} {srcOff : Nat} {dest : ByteArray} {destOff len : Nat} {exact : Bool} :
+    ByteArray.copySlice src srcOff dest destOff len exact =
+      dest.extract 0 destOff ++ src.extract srcOff (srcOff +len) ++ dest.extract (destOff + min len (src.data.size - srcOff)) dest.data.size := by
+  ext1
+  simp [copySlice]
diff --git a/src/Init/Data/List/Basic.lean b/src/Init/Data/List/Basic.lean
index a8631f9bee..f26a258425 100644
--- a/src/Init/Data/List/Basic.lean
+++ b/src/Init/Data/List/Basic.lean
@@ -475,21 +475,6 @@ We define the basic functional programming operations on `List`:
 
 /-! ### map -/
 
-/--
-Applies a function to each element of the list, returning the resulting list of values.
-
-`O(|l|)`.
-
-Examples:
-* `[a, b, c].map f = [f a, f b, f c]`
-* `[].map Nat.succ = []`
-* `["one", "two", "three"].map (·.length) = [3, 3, 5]`
-* `["one", "two", "three"].map (·.reverse) = ["eno", "owt", "eerht"]`
--/
-@[specialize] def map (f : α → β) : (l : List α) → List β
-  | []    => []
-  | a::as => f a :: map f as
-
 @[simp, grind =] theorem map_nil {f : α → β} : map f [] = [] := rfl
 @[simp, grind =] theorem map_cons {f : α → β} {a : α} {l : List α} : map f (a :: l) = f a :: map f l := rfl
 
@@ -606,20 +591,6 @@ Appends two lists. Normally used via the `++` operator.
 
 Appending lists takes time proportional to the length of the first list: `O(|xs|)`.
 
-Examples:
-  * `[1, 2, 3] ++ [4, 5] = [1, 2, 3, 4, 5]`.
-  * `[] ++ [4, 5] = [4, 5]`.
-  * `[1, 2, 3] ++ [] = [1, 2, 3]`.
--/
-protected def append : (xs ys : List α) → List α
-  | [],    bs => bs
-  | a::as, bs => a :: List.append as bs
-
-/--
-Appends two lists. Normally used via the `++` operator.
-
-Appending lists takes time proportional to the length of the first list: `O(|xs|)`.
-
 This is a tail-recursive version of `List.append`.
 
 Examples:
@@ -691,18 +662,6 @@ theorem reverseAux_eq_append {as bs : List α} : reverseAux as bs = reverseAux a
 
 /-! ### flatten -/
 
-/--
-Concatenates a list of lists into a single list, preserving the order of the elements.
-
-`O(|flatten L|)`.
-
-Examples:
-* `[["a"], ["b", "c"]].flatten = ["a", "b", "c"]`
-* `[["a"], [], ["b", "c"], ["d", "e", "f"]].flatten = ["a", "b", "c", "d", "e", "f"]`
--/
-def flatten : List (List α) → List α
-  | []      => []
-  | l :: L => l ++ flatten L
 
 @[simp, grind =] theorem flatten_nil : List.flatten ([] : List (List α)) = [] := rfl
 @[simp, grind =] theorem flatten_cons : (l :: L).flatten = l ++ L.flatten := rfl
@@ -721,20 +680,14 @@ Examples:
 
 /-! ### flatMap -/
 
-/--
-Applies a function that returns a list to each element of a list, and concatenates the resulting
-lists.
-
-Examples:
-* `[2, 3, 2].flatMap List.range = [0, 1, 0, 1, 2, 0, 1]`
-* `["red", "blue"].flatMap String.toList = ['r', 'e', 'd', 'b', 'l', 'u', 'e']`
--/
-@[inline] def flatMap {α : Type u} {β : Type v} (b : α → List β) (as : List α) : List β := flatten (map b as)
-
 @[simp, grind =] theorem flatMap_nil {f : α → List β} : List.flatMap f [] = [] := by simp [List.flatMap]
 @[simp, grind =] theorem flatMap_cons {x : α} {xs : List α} {f : α → List β} :
   List.flatMap f (x :: xs) = f x ++ List.flatMap f xs := by simp [List.flatMap]
 
+@[simp, grind _=_] theorem flatMap_append {xs ys : List α} {f : α → List β} :
+    (xs ++ ys).flatMap f = xs.flatMap f ++ ys.flatMap f := by
+  induction xs; {rfl}; simp_all [flatMap_cons, append_assoc]
+
 /-! ### replicate -/
 
 /--
diff --git a/src/Init/Data/List/Lemmas.lean b/src/Init/Data/List/Lemmas.lean
index 81ccbff94d..14c37995ec 100644
--- a/src/Init/Data/List/Lemmas.lean
+++ b/src/Init/Data/List/Lemmas.lean
@@ -348,6 +348,18 @@ theorem ext_getElem {l₁ l₂ : List α} (hl : length l₁ = length l₂)
 theorem getElem?_concat_length {l : List α} {a : α} : (l ++ [a])[l.length]? = some a := by
   simp
 
+theorem eq_getElem_of_length_eq_one : (l : List α) → (hl : l.length = 1) → l = [l[0]'(hl ▸ by decide)]
+  | [_], _ => rfl
+
+theorem eq_getElem_of_length_eq_two : (l : List α) → (hl : l.length = 2) → l = [l[0]'(hl ▸ by decide), l[1]'(hl ▸ by decide)]
+  | [_, _], _ => rfl
+
+theorem eq_getElem_of_length_eq_three : (l : List α) → (hl : l.length = 3) → l = [l[0]'(hl ▸ by decide), l[1]'(hl ▸ by decide), l[2]'(hl ▸ by decide)]
+  | [_, _, _], _ => rfl
+
+theorem eq_getElem_of_length_eq_four : (l : List α) → (hl : l.length = 4) → l = [l[0]'(hl ▸ by decide), l[1]'(hl ▸ by decide), l[2]'(hl ▸ by decide), l[3]'(hl ▸ by decide)]
+  | [_, _, _, _], _ => rfl
+
 /-! ### getD
 
 We simplify away `getD`, replacing `getD l n a` with `(l[n]?).getD a`.
@@ -2136,10 +2148,6 @@ theorem flatMap_singleton (f : α → List β) (x : α) : [x].flatMap f = f x :=
     simp only [findSome?_cons]
     split <;> simp_all
 
-@[simp, grind _=_] theorem flatMap_append {xs ys : List α} {f : α → List β} :
-    (xs ++ ys).flatMap f = xs.flatMap f ++ ys.flatMap f := by
-  induction xs; {rfl}; simp_all [flatMap_cons, append_assoc]
-
 theorem flatMap_assoc {l : List α} {f : α → List β} {g : β → List γ} :
     (l.flatMap f).flatMap g = l.flatMap fun x => (f x).flatMap g := by
   induction l <;> simp [*]
diff --git a/src/Init/Data/List/Pairwise.lean b/src/Init/Data/List/Pairwise.lean
index 6c9b13b6a8..0bc2fb17c7 100644
--- a/src/Init/Data/List/Pairwise.lean
+++ b/src/Init/Data/List/Pairwise.lean
@@ -171,7 +171,7 @@ theorem pairwise_append_comm {R : α → α → Prop} (s : ∀ {x y}, R x y →
   induction L with
   | nil => simp
   | cons l L IH =>
-    simp only [flatten, pairwise_append, IH, mem_flatten, exists_imp, and_imp, forall_mem_cons,
+    simp only [flatten_cons, pairwise_append, IH, mem_flatten, exists_imp, and_imp, forall_mem_cons,
       pairwise_cons, and_assoc, and_congr_right_iff]
     rw [and_comm, and_congr_left_iff]
     intros; exact ⟨fun h l' b c d e => h c d e l' b, fun h c d e l' b => h l' b c d e⟩
diff --git a/src/Init/Data/String.lean b/src/Init/Data/String.lean
index 0dba31ee4f..c4c104ce89 100644
--- a/src/Init/Data/String.lean
+++ b/src/Init/Data/String.lean
@@ -8,6 +8,7 @@ module
 prelude
 public import Init.Data.String.Basic
 public import Init.Data.String.Bootstrap
+public import Init.Data.String.Decode
 public import Init.Data.String.Extra
 public import Init.Data.String.Lemmas
 public import Init.Data.String.Repr
diff --git a/src/Init/Data/String/Basic.lean b/src/Init/Data/String/Basic.lean
index 39c1e76c1b..6bd2c13aec 100644
--- a/src/Init/Data/String/Basic.lean
+++ b/src/Init/Data/String/Basic.lean
@@ -9,11 +9,370 @@ prelude
 public import Init.Data.List.Basic
 public import Init.Data.Char.Basic
 public import Init.Data.String.Bootstrap
+public import Init.Data.ByteArray.Basic
+public import Init.Data.String.Decode
+import Init.Data.ByteArray.Lemmas
 
 public section
 
 universe u
 
+section
+
+@[simp]
+theorem List.utf8Encode_nil : List.utf8Encode [] = ByteArray.empty := by simp [utf8Encode]
+
+theorem List.utf8Encode_singleton {c : Char} : [c].utf8Encode = (String.utf8EncodeChar c).toByteArray := by
+  simp [utf8Encode]
+
+@[simp]
+theorem List.utf8Encode_append {l l' : List Char} :
+    (l ++ l').utf8Encode = l.utf8Encode ++ l'.utf8Encode := by
+  simp [utf8Encode]
+
+theorem List.utf8Encode_cons {c : Char} {l : List Char} : (c :: l).utf8Encode = [c].utf8Encode ++ l.utf8Encode := by
+  rw [← singleton_append, List.utf8Encode_append]
+
+@[simp]
+theorem String.utf8EncodeChar_ne_nil {c : Char} : String.utf8EncodeChar c ≠ [] := by
+  fun_cases String.utf8EncodeChar with simp
+
+@[simp]
+theorem List.utf8Encode_eq_empty {l : List Char} : l.utf8Encode = ByteArray.empty ↔ l = [] := by
+  simp [utf8Encode, ← List.eq_nil_iff_forall_not_mem]
+
+theorem ByteArray.isValidUtf8_utf8Encode {l : List Char} : IsValidUtf8 l.utf8Encode :=
+  .intro l rfl
+
+@[simp]
+theorem ByteArray.isValidUtf8_empty : IsValidUtf8 ByteArray.empty :=
+  .intro [] (by simp)
+
+theorem Char.isValidUtf8_toByteArray_utf8EncodeChar {c : Char} :
+    ByteArray.IsValidUtf8 (String.utf8EncodeChar c).toByteArray :=
+  .intro [c] (by simp [List.utf8Encode_singleton])
+
+theorem ByteArray.IsValidUtf8.append {b b' : ByteArray} (h : IsValidUtf8 b) (h' : IsValidUtf8 b') :
+    IsValidUtf8 (b ++ b') := by
+  rcases h with ⟨m, rfl⟩
+  rcases h' with ⟨m', rfl⟩
+  exact .intro (m ++ m') (by simp)
+
+theorem ByteArray.isValidUtf8_utf8Encode_singleton_append_iff {b : ByteArray} {c : Char} :
+    IsValidUtf8 ([c].utf8Encode ++ b) ↔ IsValidUtf8 b := by
+  refine ⟨?_, fun h => IsValidUtf8.append isValidUtf8_utf8Encode h⟩
+  rintro ⟨l, hl⟩
+  match l with
+  | [] => simp at hl
+  | d::l =>
+    obtain rfl : c = d := by
+      replace hl := congrArg (fun l => utf8DecodeChar? l 0) hl
+      simpa [List.utf8DecodeChar?_utf8Encode_singleton_append,
+        List.utf8DecodeChar?_utf8Encode_cons] using hl
+    rw [← List.singleton_append (l := l), List.utf8Encode_append,
+      ByteArray.append_right_inj] at hl
+    exact hl ▸ isValidUtf8_utf8Encode
+
+@[expose]
+def ByteArray.utf8Decode? (b : ByteArray) : Option (Array Char) :=
+  go (b.size + 1) 0 #[] (by simp) (by simp)
+where
+  go (fuel : Nat) (i : Nat) (acc : Array Char) (hi : i ≤ b.size) (hf : b.size - i < fuel) : Option (Array Char) :=
+    match fuel, hf with
+    | fuel + 1, _ =>
+      if i = b.size then
+        some acc
+      else
+        match h : utf8DecodeChar? b i with
+        | none => none
+        | some c => go fuel (i + c.utf8Size) (acc.push c)
+            (le_size_of_utf8DecodeChar?_eq_some h)
+            (have := c.utf8Size_pos; have := le_size_of_utf8DecodeChar?_eq_some h; by omega)
+  termination_by structural fuel
+
+theorem ByteArray.utf8Decode?.go.congr {b b' : ByteArray} {fuel fuel' i i' : Nat} {acc acc' : Array Char} {hi hi' hf hf'}
+    (hbb' : b = b') (hii' : i = i') (hacc : acc = acc') :
+    ByteArray.utf8Decode?.go b fuel i acc hi hf = ByteArray.utf8Decode?.go b' fuel' i' acc' hi' hf' := by
+  subst hbb' hii' hacc
+  fun_induction ByteArray.utf8Decode?.go b fuel i acc hi hf generalizing fuel' with
+  | case1 =>
+    rw [go.eq_def]
+    split
+    simp
+  | case2 =>
+    rw [go.eq_def]
+    split <;> split
+    · simp_all
+    · split <;> simp_all
+  | case3 =>
+    conv => rhs; rw [go.eq_def]
+    split <;> split
+    · simp_all
+    · split
+      · simp_all
+      · rename_i c₁ hc₁ ih _ _ _ _ _ c₂ hc₂
+        obtain rfl : c₁ = c₂ := by rw [← Option.some_inj, ← hc₁, ← hc₂]
+        apply ih
+
+@[simp]
+theorem ByteArray.utf8Decode?_empty : ByteArray.empty.utf8Decode? = some #[] := by
+  simp [utf8Decode?, utf8Decode?.go]
+
+private theorem ByteArray.isSome_utf8Decode?go_iff {b : ByteArray} {fuel i : Nat} {hi : i ≤ b.size} {hf} {acc : Array Char} :
+    (ByteArray.utf8Decode?.go b fuel i acc hi hf).isSome ↔ IsValidUtf8 (b.extract i b.size) := by
+  fun_induction ByteArray.utf8Decode?.go with
+  | case1 => simp
+  | case2 fuel i hi hf acc h₁ h₂ =>
+    simp only [Option.isSome_none, Bool.false_eq_true, false_iff]
+    rintro ⟨l, hl⟩
+    have : l ≠ [] := by
+      rintro rfl
+      simp at hl
+      omega
+    rw [← l.cons_head_tail this] at hl
+    rw [utf8DecodeChar?_eq_utf8DecodeChar?_extract, hl, List.utf8DecodeChar?_utf8Encode_cons] at h₂
+    simp at h₂
+  | case3 i acc hi fuel hf h₁ c h₂ ih =>
+    rw [ih]
+    have h₂' := h₂
+    rw [utf8DecodeChar?_eq_utf8DecodeChar?_extract] at h₂'
+    obtain ⟨l, hl⟩ := exists_of_utf8DecodeChar?_eq_some h₂'
+    rw [ByteArray.extract_eq_extract_append_extract (i := i) (i + c.utf8Size) (by omega)
+      (le_size_of_utf8DecodeChar?_eq_some h₂)] at hl ⊢
+    rw [ByteArray.append_inj_left hl (by have := le_size_of_utf8DecodeChar?_eq_some h₂; simp; omega),
+      ← List.utf8Encode_singleton, isValidUtf8_utf8Encode_singleton_append_iff]
+
+theorem ByteArray.isSome_utf8Decode?_iff {b : ByteArray} :
+    b.utf8Decode?.isSome ↔ IsValidUtf8 b := by
+  rw [utf8Decode?, isSome_utf8Decode?go_iff, extract_zero_size]
+
+@[simp]
+theorem String.bytes_empty : "".bytes = ByteArray.empty := (rfl)
+
+/--
+Appends two strings. Usually accessed via the `++` operator.
+
+The internal implementation will perform destructive updates if the string is not shared.
+
+Examples:
+ * `"abc".append "def" = "abcdef"`
+ * `"abc" ++ "def" = "abcdef"`
+ * `"" ++ "" = ""`
+-/
+@[extern "lean_string_append", expose]
+def String.append (s t : String) : String where
+  bytes := s.bytes ++ t.bytes
+  isValidUtf8 := s.isValidUtf8.append t.isValidUtf8
+
+instance : Append String where
+  append s t := s.append t
+
+@[simp]
+theorem String.bytes_append {s t : String} : (s ++ t).bytes = s.bytes ++ t.bytes := (rfl)
+
+theorem String.bytes_inj {s t : String} : s.bytes = t.bytes ↔ s = t := by
+  refine ⟨fun h => ?_, (· ▸ rfl)⟩
+  rcases s with ⟨s⟩
+  rcases t with ⟨t⟩
+  subst h
+  rfl
+
+@[simp]
+theorem String.empty_append {s : String} : "" ++ s = s := by
+  simp [← String.bytes_inj]
+
+@[simp]
+theorem String.append_empty {s : String} : s ++ "" = s := by
+  simp [← String.bytes_inj]
+
+@[simp] theorem List.bytes_asString {l : List Char} : l.asString.bytes = l.utf8Encode := by
+  simp [List.asString, String.mk]
+
+@[simp]
+theorem List.asString_nil : List.asString [] = "" := by
+  simp [← String.bytes_inj]
+
+@[simp]
+theorem List.asString_append {l₁ l₂ : List Char} : (l₁ ++ l₂).asString = l₁.asString ++ l₂.asString := by
+  simp [← String.bytes_inj]
+
+@[expose]
+def String.Internal.toArray (b : String) : Array Char :=
+  b.bytes.utf8Decode?.get (b.bytes.isSome_utf8Decode?_iff.2 b.isValidUtf8)
+
+@[simp]
+theorem String.Internal.toArray_empty : String.Internal.toArray "" = #[] := by
+  simp [toArray]
+
+@[extern "lean_string_data", expose]
+def String.data (b : String) : List Char :=
+  (String.Internal.toArray b).toList
+
+@[simp]
+theorem String.data_empty : "".data = [] := by
+  simp [data]
+
+/--
+Returns the length of a string in Unicode code points.
+
+Examples:
+* `"".length = 0`
+* `"abc".length = 3`
+* `"L∃∀N".length = 4`
+-/
+@[extern "lean_string_length"]
+def String.length (b : String) : Nat :=
+  b.data.length
+
+@[simp]
+theorem String.Internal.size_toArray {b : String} : (String.Internal.toArray b).size = b.length :=
+  (rfl)
+
+@[simp]
+theorem String.length_data {b : String} : b.data.length = b.length := (rfl)
+
+theorem String.exists_eq_asString (s : String) :
+    ∃ l : List Char, s = l.asString := by
+  rcases s with ⟨_, ⟨l, rfl⟩⟩
+  refine ⟨l, by simp [← String.bytes_inj]⟩
+
+private theorem ByteArray.utf8Decode?go_eq_utf8Decode?go_extract {b : ByteArray} {fuel i : Nat} {hi : i ≤ b.size} {hf} {acc : Array Char} :
+    utf8Decode?.go b fuel i acc hi hf = (utf8Decode?.go (b.extract i b.size) fuel 0 #[] (by simp) (by simp [hf])).map (acc ++ ·) := by
+  fun_cases utf8Decode?.go b fuel i acc hi hf with
+  | case1 =>
+      rw [utf8Decode?.go]
+      simp only [size_extract, Nat.le_refl, Nat.min_eq_left, Nat.zero_add, List.push_toArray,
+        List.nil_append]
+      rw [if_pos (by omega)]
+      simp
+  | case2 fuel hf₁ h₁ h₂ hf₂ =>
+    rw [utf8Decode?.go]
+    simp only [size_extract, Nat.le_refl, Nat.min_eq_left, Nat.zero_add, List.push_toArray,
+      List.nil_append]
+    rw [if_neg (by omega)]
+    rw [utf8DecodeChar?_eq_utf8DecodeChar?_extract] at h₂
+    split <;> simp_all
+  | case3 fuel hf₁ h₁ c h₂ hf₂ =>
+    conv => rhs; rw [utf8Decode?.go]
+    simp only [size_extract, Nat.le_refl, Nat.min_eq_left, Nat.zero_add, List.push_toArray,
+      List.nil_append]
+    rw [if_neg (by omega)]
+    rw [utf8DecodeChar?_eq_utf8DecodeChar?_extract] at h₂
+    split
+    · simp_all
+    · rename_i c' hc'
+      obtain rfl : c = c' := by
+        rw [← Option.some_inj, ← h₂, hc']
+      have := c.utf8Size_pos
+      conv => lhs; rw [ByteArray.utf8Decode?go_eq_utf8Decode?go_extract]
+      conv => rhs; rw [ByteArray.utf8Decode?go_eq_utf8Decode?go_extract]
+      simp only [size_extract, Nat.le_refl, Nat.min_eq_left, Option.map_map, ByteArray.extract_extract]
+      have : (fun x => acc ++ x) ∘ (fun x => #[c] ++ x) = fun x => acc.push c ++ x := by funext; simp
+      simp [(by omega : i + (b.size - i) = b.size), this]
+
+theorem ByteArray.utf8Decode?_utf8Encode_singleton_append {l : ByteArray} {c : Char} :
+    ([c].utf8Encode ++ l).utf8Decode? = l.utf8Decode?.map (#[c] ++ ·) := by
+  rw [utf8Decode?, utf8Decode?.go,
+    if_neg (by simp [List.utf8Encode_singleton]; have := c.utf8Size_pos; omega)]
+  split
+  · simp_all [List.utf8DecodeChar?_utf8Encode_singleton_append]
+  · rename_i d h
+    obtain rfl : c = d := by simpa [List.utf8DecodeChar?_utf8Encode_singleton_append] using h
+    rw [utf8Decode?go_eq_utf8Decode?go_extract, utf8Decode?]
+    simp only [List.push_toArray, List.nil_append, Nat.zero_add]
+    congr 1
+    apply ByteArray.utf8Decode?.go.congr _ rfl rfl
+    apply extract_append_eq_right
+    simp [List.utf8Encode_singleton]
+
+@[simp]
+theorem List.utf8Decode?_utf8Encode {l : List Char} :
+    l.utf8Encode.utf8Decode? = some l.toArray := by
+  induction l with
+  | nil => simp
+  | cons c l ih =>
+    rw [← List.singleton_append, List.utf8Encode_append]
+    simp only [ByteArray.utf8Decode?_utf8Encode_singleton_append, cons_append, nil_append,
+      Option.map_eq_some_iff, Array.append_eq_toArray_iff, cons.injEq, true_and]
+    refine ⟨l.toArray, ih, by simp⟩
+
+@[simp]
+theorem ByteArray.utf8Encode_get_utf8Decode? {b : ByteArray} {h} :
+    (b.utf8Decode?.get h).toList.utf8Encode = b := by
+  obtain ⟨l, rfl⟩ := isSome_utf8Decode?_iff.1 h
+  simp
+
+@[simp]
+theorem List.data_asString {l : List Char} : l.asString.data = l := by
+  simp [String.data, String.Internal.toArray]
+
+@[simp]
+theorem String.asString_data {b : String} : b.data.asString = b := by
+  obtain ⟨l, rfl⟩ := String.exists_eq_asString b
+  rw [List.data_asString]
+
+theorem List.asString_injective {l₁ l₂ : List Char} (h : l₁.asString = l₂.asString) : l₁ = l₂ := by
+  simpa using congrArg String.data h
+
+theorem List.asString_inj {l₁ l₂ : List Char} : l₁.asString = l₂.asString ↔ l₁ = l₂ :=
+  ⟨asString_injective, (· ▸ rfl)⟩
+
+theorem String.data_injective {s₁ s₂ : String} (h : s₁.data = s₂.data) : s₁ = s₂ := by
+  simpa using congrArg List.asString h
+
+theorem String.data_inj {s₁ s₂ : String} : s₁.data = s₂.data ↔ s₁ = s₂ :=
+  ⟨data_injective, (· ▸ rfl)⟩
+
+@[simp]
+theorem String.data_append {l₁ l₂ : String} : (l₁ ++ l₂).data = l₁.data ++ l₂.data := by
+  apply List.asString_injective
+  simp
+
+@[simp]
+theorem String.utf8encode_data {b : String} : b.data.utf8Encode = b.bytes := by
+  have := congrArg String.bytes (String.asString_data (b := b))
+  rwa [← List.bytes_asString]
+
+@[simp]
+theorem String.utf8ByteSize_empty : "".utf8ByteSize = 0 := (rfl)
+
+@[simp]
+theorem String.utf8ByteSize_append {s t : String} :
+    (s ++ t).utf8ByteSize = s.utf8ByteSize + t.utf8ByteSize := by
+  simp [utf8ByteSize]
+
+@[simp]
+theorem String.size_bytes {s : String} : s.bytes.size = s.utf8ByteSize := rfl
+
+@[simp]
+theorem String.bytes_push {s : String} {c : Char} : (s.push c).bytes = s.bytes ++ [c].utf8Encode := by
+  simp [push]
+
+-- This is just to keep the proof of `set_next_add` below from breaking; if that lemma goes away
+-- or the proof is rewritten, it can be removed.
+private noncomputable def String.utf8ByteSize' : String → Nat
+  | s => go s.data
+where
+  go : List Char → Nat
+  | []    => 0
+  | c::cs => go cs + c.utf8Size
+
+private theorem String.utf8ByteSize'_eq (s : String) : s.utf8ByteSize' = s.utf8ByteSize := by
+  suffices ∀ l, utf8ByteSize'.go l = l.asString.utf8ByteSize by
+    obtain ⟨m, rfl⟩ := s.exists_eq_asString
+    rw [utf8ByteSize', this, asString_data]
+  intro l
+  induction l with
+  | nil => simp [utf8ByteSize'.go]
+  | cons c cs ih =>
+    rw [utf8ByteSize'.go, ih, ← List.singleton_append, List.asString_append,
+      utf8ByteSize_append, Nat.add_comm]
+    congr
+    rw [← size_bytes, List.bytes_asString, List.utf8Encode_singleton,
+      List.size_toByteArray, length_utf8EncodeChar]
+
+end
+
 namespace String
 
 instance : HAdd String.Pos String.Pos String.Pos where
@@ -54,8 +413,6 @@ instance : LT String :=
 instance decidableLT (s₁ s₂ : @& String) : Decidable (s₁ < s₂) :=
   List.decidableLT s₁.data s₂.data
 
-
-
 /--
 Non-strict inequality on strings, typically used via the `≤` operator.
 
@@ -69,32 +426,6 @@ instance : LE String :=
 instance decLE (s₁ s₂ : String) : Decidable (s₁ ≤ s₂) :=
   inferInstanceAs (Decidable (Not _))
 
-/--
-Returns the length of a string in Unicode code points.
-
-Examples:
-* `"".length = 0`
-* `"abc".length = 3`
-* `"L∃∀N".length = 4`
--/
-@[extern "lean_string_length", expose]
-def length : (@& String) → Nat
-  | ⟨s⟩ => s.length
-
-/--
-Appends two strings. Usually accessed via the `++` operator.
-
-The internal implementation will perform destructive updates if the string is not shared.
-
-Examples:
- * `"abc".append "def" = "abcdef"`
- * `"abc" ++ "def" = "abcdef"`
- * `"" ++ "" = ""`
--/
-@[extern "lean_string_append", expose]
-def append : String → (@& String) → String
-  | ⟨a⟩, ⟨b⟩ => ⟨a ++ b⟩
-
 /--
 Converts a string to a list of characters.
 
@@ -153,8 +484,7 @@ Examples:
 -/
 @[extern "lean_string_utf8_get", expose]
 def get (s : @& String) (p : @& Pos) : Char :=
-  match s with
-  | ⟨s⟩ => utf8GetAux s 0 p
+  utf8GetAux s.data 0 p
 
 def utf8GetAux? : List Char → Pos → Pos → Option Char
   | [],    _, _ => none
@@ -175,7 +505,7 @@ Examples:
 -/
 @[extern "lean_string_utf8_get_opt"]
 def get? : (@& String) → (@& Pos) → Option Char
-  | ⟨s⟩, p => utf8GetAux? s 0 p
+  | s, p => utf8GetAux? s.data 0 p
 
 /--
 Returns the character at position `p` of a string. Panics if `p` is not a valid position.
@@ -191,7 +521,7 @@ Examples
 @[extern "lean_string_utf8_get_bang", expose]
 def get! (s : @& String) (p : @& Pos) : Char :=
   match s with
-  | ⟨s⟩ => utf8GetAux s 0 p
+  | s => utf8GetAux s.data 0 p
 
 def utf8SetAux (c' : Char) : List Char → Pos → Pos → List Char
   | [],    _, _ => []
@@ -214,7 +544,7 @@ Examples:
 -/
 @[extern "lean_string_utf8_set"]
 def set : String → (@& Pos) → Char → String
-  | ⟨s⟩, i, c => ⟨utf8SetAux c s 0 i⟩
+  | s, i, c => (utf8SetAux c s.data 0 i).asString
 
 /--
 Replaces the character at position `p` in the string `s` with the result of applying `f` to that
@@ -270,7 +600,7 @@ Examples:
 -/
 @[extern "lean_string_utf8_prev", expose]
 def prev : (@& String) → (@& Pos) → Pos
-  | ⟨s⟩, p => utf8PrevAux s 0 p
+  | s, p => utf8PrevAux s.data 0 p
 
 /--
 Returns the first character in `s`. If `s = ""`, returns `(default : Char)`.
@@ -336,7 +666,7 @@ Examples:
 @[extern "lean_string_utf8_get_fast", expose]
 def get' (s : @& String) (p : @& Pos) (h : ¬ s.atEnd p) : Char :=
   match s with
-  | ⟨s⟩ => utf8GetAux s 0 p
+  | s => utf8GetAux s.data 0 p
 
 /--
 Returns the next position in a string after position `p`. The result is unspecified if `p` is not a
@@ -360,16 +690,6 @@ def next' (s : @& String) (p : @& Pos) (h : ¬ s.atEnd p) : Pos :=
   let c := get s p
   p + c
 
-theorem _root_.Char.utf8Size_pos (c : Char) : 0 < c.utf8Size := by
-  repeat first | apply iteInduction (motive := (0 < ·)) <;> intros | decide
-
-theorem _root_.Char.utf8Size_le_four (c : Char) : c.utf8Size ≤ 4 := by
-  repeat first | apply iteInduction (motive := (· ≤ 4)) <;> intros | decide
-
-theorem _root_.Char.utf8Size_eq (c : Char) : c.utf8Size = 1 ∨ c.utf8Size = 2 ∨ c.utf8Size = 3 ∨ c.utf8Size = 4 := by
-  match c.utf8Size, c.utf8Size_pos, c.utf8Size_le_four with
-  | 1, _, _ | 2, _, _ | 3, _, _ | 4, _, _ => simp
-
 @[deprecated Char.utf8Size_pos (since := "2026-06-04")] abbrev one_le_csize := Char.utf8Size_pos
 
 @[simp] theorem pos_lt_eq (p₁ p₂ : Pos) : (p₁ < p₂) = (p₁.1 < p₂.1) := rfl
@@ -534,7 +854,7 @@ Examples:
 -/
 @[extern "lean_string_utf8_extract", expose]
 def extract : (@& String) → (@& Pos) → (@& Pos) → String
-  | ⟨s⟩, b, e => if b.byteIdx ≥ e.byteIdx then "" else ⟨go₁ s 0 b e⟩
+  | s, b, e => if b.byteIdx ≥ e.byteIdx then "" else (go₁ s.data 0 b e).asString
 where
   go₁ : List Char → Pos → Pos → Pos → List Char
     | [],        _, _, _ => []
@@ -1030,37 +1350,31 @@ theorem utf8SetAux_of_gt (c' : Char) : ∀ (cs : List Char) {i p : Pos}, i > p 
 theorem set_next_add (s : String) (i : Pos) (c : Char) (b₁ b₂)
     (h : (s.next i).1 + b₁ = s.endPos.1 + b₂) :
     ((s.set i c).next i).1 + b₁ = (s.set i c).endPos.1 + b₂ := by
-  simp [next, get, set, endPos, utf8ByteSize] at h ⊢
+  simp [next, get, set, endPos, ← utf8ByteSize'_eq, utf8ByteSize'] at h ⊢
   rw [Nat.add_comm i.1, Nat.add_assoc] at h ⊢
   let rec foo : ∀ cs a b₁ b₂,
-    (utf8GetAux cs a i).utf8Size + b₁ = utf8ByteSize.go cs + b₂ →
-    (utf8GetAux (utf8SetAux c cs a i) a i).utf8Size + b₁ = utf8ByteSize.go (utf8SetAux c cs a i) + b₂
+    (utf8GetAux cs a i).utf8Size + b₁ = utf8ByteSize'.go cs + b₂ →
+    (utf8GetAux (utf8SetAux c cs a i) a i).utf8Size + b₁ = utf8ByteSize'.go (utf8SetAux c cs a i) + b₂
   | [], _, _, _, h => h
   | c'::cs, a, b₁, b₂, h => by
     unfold utf8SetAux
-    apply iteInduction (motive := fun p => (utf8GetAux p a i).utf8Size + b₁ = utf8ByteSize.go p + b₂) <;>
-      intro h' <;> simp [utf8GetAux, h', utf8ByteSize.go] at h ⊢
+    apply iteInduction (motive := fun p => (utf8GetAux p a i).utf8Size + b₁ = utf8ByteSize'.go p + b₂) <;>
+      intro h' <;> simp [utf8GetAux, h', utf8ByteSize'.go] at h ⊢
     next =>
       rw [Nat.add_assoc, Nat.add_left_comm] at h ⊢; rw [Nat.add_left_cancel h]
     next =>
       rw [Nat.add_assoc] at h ⊢
       refine foo cs (a + c') b₁ (c'.utf8Size + b₂) h
-  exact foo s.1 0 _ _ h
+  exact foo s.data 0 _ _ h
 
 theorem mapAux_lemma (s : String) (i : Pos) (c : Char) (h : ¬s.atEnd i) :
-    (s.set i c).endPos.1 - ((s.set i c).next i).1 < s.endPos.1 - i.1 :=
+    (s.set i c).endPos.1 - ((s.set i c).next i).1 < s.endPos.1 - i.1 := by
   suffices (s.set i c).endPos.1 - ((s.set i c).next i).1 = s.endPos.1 - (s.next i).1 by
     rw [this]
     apply Nat.sub_lt_sub_left (Nat.gt_of_not_le (mt decide_eq_true h)) (lt_next ..)
-  Nat.sub.elim (motive := (_ = ·)) _ _
-    (fun _ k e =>
-      have := set_next_add _ _ _ k 0 e.symm
-      Nat.sub_eq_of_eq_add <| this.symm.trans <| Nat.add_comm ..)
-    (fun h => by
-      have ⟨k, e⟩ := Nat.le.dest h
-      rw [Nat.succ_add] at e
-      have : ((s.set i c).next i).1 = _ := set_next_add _ _ c 0 k.succ e.symm
-      exact Nat.sub_eq_zero_of_le (this ▸ Nat.le_add_right ..))
+  have := set_next_add s i c (s.endPos.byteIdx - (s.next i).byteIdx) 0
+  have := set_next_add s i c 0 ((s.next i).byteIdx - s.endPos.byteIdx)
+  omega
 
 @[specialize] def mapAux (f : Char → Char) (i : Pos) (s : String) : String :=
   if h : s.atEnd i then s
@@ -2044,40 +2358,51 @@ def stripSuffix (s : String) (suff : String) : String :=
 
 end String
 
-namespace Char
-
-@[simp] theorem length_toString (c : Char) : c.toString.length = 1 := rfl
-
-end Char
-
 namespace String
 
+@[ext]
 theorem ext {s₁ s₂ : String} (h : s₁.data = s₂.data) : s₁ = s₂ :=
-  show ⟨s₁.data⟩ = (⟨s₂.data⟩ : String) from h ▸ rfl
-
-theorem ext_iff {s₁ s₂ : String} : s₁ = s₂ ↔ s₁.data = s₂.data := ⟨fun h => h ▸ rfl, ext⟩
+  data_injective h
 
 @[simp] theorem default_eq : default = "" := rfl
 
-@[simp] theorem length_mk (s : List Char) : (String.mk s).length = s.length := rfl
+@[simp]
+theorem String.mk_eq_asString (s : List Char) : String.mk s = List.asString s := rfl
 
-@[simp] theorem length_empty : "".length = 0 := rfl
+@[simp]
+theorem _root_.List.length_asString (s : List Char) : (List.asString s).length = s.length := by
+  rw [← length_data, List.data_asString]
 
-@[simp] theorem length_singleton (c : Char) : (String.singleton c).length = 1 := rfl
+@[simp] theorem length_empty : "".length = 0 := by simp [← length_data, data_empty]
+
+@[simp]
+theorem bytes_singleton {c : Char} : (String.singleton c).bytes = [c].utf8Encode := by
+  simp [singleton]
+
+theorem singleton_eq {c : Char} : String.singleton c = [c].asString := by
+  simp [← bytes_inj]
+
+@[simp] theorem data_singleton (c : Char) : (String.singleton c).data = [c] := by
+  simp [singleton_eq]
+
+@[simp]
+theorem length_singleton {c : Char} : (String.singleton c).length = 1 := by
+  simp [← length_data]
+
+theorem push_eq_append (c : Char) : String.push s c = s ++ singleton c := by
+  simp [← bytes_inj]
+
+@[simp] theorem data_push (c : Char) : (String.push s c).data = s.data ++ [c] := by
+  simp [push_eq_append]
 
 @[simp] theorem length_push (c : Char) : (String.push s c).length = s.length + 1 := by
-  rw [push, length_mk, List.length_append, List.length_singleton, Nat.succ.injEq]
-  rfl
+  simp [← length_data]
 
 @[simp] theorem length_pushn (c : Char) (n : Nat) : (pushn s c n).length = s.length + n := by
   unfold pushn; induction n <;> simp [Nat.repeat, Nat.add_assoc, *]
 
 @[simp] theorem length_append (s t : String) : (s ++ t).length = s.length + t.length := by
-  simp only [length, append, List.length_append]
-
-@[simp] theorem data_push (s : String) (c : Char) : (s.push c).data = s.data ++ [c] := rfl
-
-@[simp] theorem data_append (s t : String) : (s ++ t).data = s.data ++ t.data := rfl
+  simp [← length_data]
 
 attribute [simp] toList -- prefer `String.data` over `String.toList` in lemmas
 
@@ -2161,10 +2486,8 @@ end Pos
 theorem lt_next' (s : String) (p : Pos) : p < next s p := lt_next ..
 
 @[simp] theorem prev_zero (s : String) : prev s 0 = 0 := by
-  cases s with | mk cs
-  cases cs
-  next => rfl
-  next => simp [prev, utf8PrevAux, Pos.le_iff]
+  rw [prev]
+  cases s.data <;> simp [utf8PrevAux, Pos.le_iff]
 
 @[simp] theorem get'_eq (s : String) (p : Pos) (h) : get' s p h = get s p := rfl
 
@@ -2174,19 +2497,20 @@ theorem lt_next' (s : String) (p : Pos) : p < next s p := lt_next ..
 -- so for proving can be unfolded.
 attribute [simp] toSubstring'
 
-theorem singleton_eq (c : Char) : singleton c = ⟨[c]⟩ := rfl
-
-@[simp] theorem data_singleton (c : Char) : (singleton c).data = [c] := rfl
-
-@[simp] theorem append_empty (s : String) : s ++ "" = s := ext (List.append_nil _)
-
-@[simp] theorem empty_append (s : String) : "" ++ s = s := rfl
-
 theorem append_assoc (s₁ s₂ s₃ : String) : (s₁ ++ s₂) ++ s₃ = s₁ ++ (s₂ ++ s₃) :=
-  ext (List.append_assoc ..)
+  ext (by simp [data_append])
 
 end String
 
+namespace Char
+
+theorem toString_eq_singleton {c : Char} : c.toString = String.singleton c := rfl
+
+@[simp] theorem length_toString (c : Char) : c.toString.length = 1 := by
+  simp [toString_eq_singleton]
+
+end Char
+
 open String
 
 namespace Substring
diff --git a/src/Init/Data/String/Bootstrap.lean b/src/Init/Data/String/Bootstrap.lean
index eae8eb2faf..52e09c5fd2 100644
--- a/src/Init/Data/String/Bootstrap.lean
+++ b/src/Init/Data/String/Bootstrap.lean
@@ -8,6 +8,7 @@ module
 prelude
 public import Init.Data.List.Basic
 public import Init.Data.Char.Basic
+public import Init.Data.ByteArray.Bootstrap
 
 public section
 
@@ -31,7 +32,11 @@ Examples:
 -/
 @[extern "lean_string_push", expose]
 def push : String → Char → String
-  | ⟨s⟩, c => ⟨s ++ [c]⟩
+  | ⟨b, h⟩, c => ⟨b.append (List.utf8Encode [c]), ?pf⟩
+where finally
+  have ⟨m, hm⟩ := h
+  cases hm
+  exact .intro (m ++ [c]) (by simp [List.utf8Encode, List.toByteArray_append'])
 
 /--
 Returns a new string that contains only the character `c`.
@@ -124,8 +129,21 @@ Examples:
  * `[].asString = ""`
  * `['a', 'a', 'a'].asString = "aaa"`
 -/
+@[extern "lean_string_mk", expose]
+def String.mk (data : List Char) : String :=
+  ⟨List.utf8Encode data,.intro data rfl⟩
+
+/--
+Creates a string that contains the characters in a list, in order.
+
+Examples:
+ * `['L', '∃', '∀', 'N'].asString = "L∃∀N"`
+ * `[].asString = ""`
+ * `['a', 'a', 'a'].asString = "aaa"`
+-/
+@[expose]
 def List.asString (s : List Char) : String :=
-  ⟨s⟩
+  String.mk s
 
 namespace Substring.Internal
 
diff --git a/src/Init/Data/String/Decode.lean b/src/Init/Data/String/Decode.lean
new file mode 100644
index 0000000000..7793abe91b
--- /dev/null
+++ b/src/Init/Data/String/Decode.lean
@@ -0,0 +1,1238 @@
+/-
+Copyright (c) 2025 Lean FRO, LLC. All rights reserved.
+Released under Apache 2.0 license as described in the file LICENSE.
+Author: Markus Himmel
+-/
+module
+
+prelude
+public import Init.Data.List.Lemmas
+import Init.Data.List.Lemmas
+public import Init.Data.UInt.Bitwise
+import Init.Data.Char.Lemmas
+public import Init.Data.ByteArray.Basic
+import Init.Data.ByteArray.Lemmas
+
+/-! # `Char.utf8Size` -/
+
+public theorem Char.utf8Size_eq_one_iff {c : Char} : c.utf8Size = 1 ↔ c.val ≤ 127 := by
+  fun_cases utf8Size
+  all_goals simp_all; assumption
+
+public theorem Char.utf8Size_eq_two_iff {c : Char} : c.utf8Size = 2 ↔ 127 < c.val ∧ c.val ≤ 0x7ff := by
+  fun_cases utf8Size with
+  | case1 v _ => subst v; simp_all [← UInt32.not_le]
+  | case2 v _ _ => subst v; simp_all
+  | case3 v _ _ _ => subst v; simp_all [← UInt32.not_le]
+  | case4 v _ _ _ => subst v; simp_all [← UInt32.not_le]
+
+public theorem Char.utf8Size_eq_three_iff {c : Char} : c.utf8Size = 3 ↔ 0x7ff < c.val ∧ c.val ≤ 0xffff := by
+  fun_cases utf8Size with
+  | case1 v _ => subst v; simp_all [UInt32.lt_iff_toNat_lt, UInt32.le_iff_toNat_le]; omega
+  | case2 v _ _ => subst v; simp_all [UInt32.lt_iff_toNat_lt, UInt32.le_iff_toNat_le]; omega
+  | case3 v _ _ _ => subst v; simp_all [← UInt32.not_le]
+  | case4 v _ _ _ => subst v; simp_all [← UInt32.not_le]
+
+public theorem Char.utf8Size_eq_four_iff {c : Char} : c.utf8Size = 4 ↔ 0xffff < c.val := by
+  fun_cases utf8Size with
+  | case1 v _ => subst v; simp_all [UInt32.lt_iff_toNat_lt, UInt32.le_iff_toNat_le]; omega
+  | case2 v _ _ => subst v; simp_all [UInt32.lt_iff_toNat_lt, UInt32.le_iff_toNat_le]; omega
+  | case3 v _ _ _ => subst v; simp_all [← UInt32.not_le]
+  | case4 v _ _ _ => subst v; simp_all [← UInt32.not_le]
+
+public theorem Char.utf8Size_pos (c : Char) : 0 < c.utf8Size := by
+  repeat first | apply iteInduction (motive := (0 < ·)) <;> intros | decide
+
+public theorem Char.utf8Size_le_four (c : Char) : c.utf8Size ≤ 4 := by
+  repeat first | apply iteInduction (motive := (· ≤ 4)) <;> intros | decide
+
+public theorem Char.utf8Size_eq (c : Char) : c.utf8Size = 1 ∨ c.utf8Size = 2 ∨ c.utf8Size = 3 ∨ c.utf8Size = 4 := by
+  match c.utf8Size, c.utf8Size_pos, c.utf8Size_le_four with
+  | 1, _, _ | 2, _, _ | 3, _, _ | 4, _, _ => simp
+
+theorem Char.toNat_val_le {c : Char} : c.val.toNat ≤ 0x10ffff := by
+  have := c.valid
+  simp [UInt32.isValidChar, Nat.isValidChar] at this
+  omega
+
+/-! # `utf8EncodeChar` -/
+
+/-! ## `utf8EncodeChar` low-level API -/
+
+/--
+Returns the sequence of bytes in a character's UTF-8 encoding.
+-/
+@[expose]
+public def String.utf8EncodeCharFast (c : Char) : List UInt8 :=
+  let v := c.val
+  if v ≤ 0x7f then
+    [v.toUInt8]
+  else if v ≤ 0x7ff then
+    [(v >>>  6).toUInt8 &&& 0x1f ||| 0xc0,
+              v.toUInt8 &&& 0x3f ||| 0x80]
+  else if v ≤ 0xffff then
+    [(v >>> 12).toUInt8 &&& 0x0f ||| 0xe0,
+     (v >>>  6).toUInt8 &&& 0x3f ||| 0x80,
+              v.toUInt8 &&& 0x3f ||| 0x80]
+  else
+    [(v >>> 18).toUInt8 &&& 0x07 ||| 0xf0,
+     (v >>> 12).toUInt8 &&& 0x3f ||| 0x80,
+     (v >>>  6).toUInt8 &&& 0x3f ||| 0x80,
+              v.toUInt8 &&& 0x3f ||| 0x80]
+
+private theorem Nat.add_two_pow_eq_or_of_lt {b : Nat} (i : Nat) (b_lt : b < 2 ^ i) (a : Nat) :
+    b + 2 ^ i * a = b ||| 2 ^ i * a := by
+  rw [Nat.add_comm, Nat.or_comm, Nat.two_pow_add_eq_or_of_lt b_lt]
+
+@[csimp]
+public theorem String.utf8EncodeChar_eq_utf8EncodeCharFast : @utf8EncodeChar = @utf8EncodeCharFast := by
+  funext c
+  simp only [utf8EncodeChar, utf8EncodeCharFast, UInt8.ofNat_uInt32ToNat, UInt8.ofNat_add,
+    UInt8.reduceOfNat, UInt32.le_iff_toNat_le, UInt32.reduceToNat]
+  split
+  · rfl
+  · split
+    · simp only [List.cons.injEq, ← UInt8.toNat_inj, UInt8.toNat_add, UInt8.toNat_ofNat',
+        Nat.reducePow, UInt8.reduceToNat, Nat.mod_add_mod, UInt8.toNat_or, UInt8.toNat_and,
+        UInt32.toNat_toUInt8, UInt32.toNat_shiftRight, UInt32.reduceToNat, Nat.reduceMod, and_true]
+      refine ⟨?_, ?_⟩
+      · rw [Nat.mod_eq_of_lt (by omega), Nat.add_two_pow_eq_or_of_lt 5 (by omega) 6,
+          Nat.and_two_pow_sub_one_eq_mod _ 5, Nat.shiftRight_eq_div_pow,
+          Nat.mod_eq_of_lt (b := 256) (by omega)]
+      · rw [Nat.mod_eq_of_lt (by omega), Nat.add_two_pow_eq_or_of_lt 6 (by omega) 2,
+          Nat.and_two_pow_sub_one_eq_mod _ 6, Nat.mod_mod_of_dvd _ (by decide)]
+    · split
+      · simp only [List.cons.injEq, ← UInt8.toNat_inj, UInt8.toNat_add, UInt8.toNat_ofNat',
+          Nat.reducePow, UInt8.reduceToNat, Nat.mod_add_mod, UInt8.toNat_or, UInt8.toNat_and,
+          UInt32.toNat_toUInt8, UInt32.toNat_shiftRight, UInt32.reduceToNat, Nat.reduceMod, and_true]
+        refine ⟨?_, ?_, ?_⟩
+        · rw [Nat.mod_eq_of_lt (by omega), Nat.add_two_pow_eq_or_of_lt 4 (by omega) 14,
+            Nat.and_two_pow_sub_one_eq_mod _ 4, Nat.shiftRight_eq_div_pow,
+            Nat.mod_eq_of_lt (b := 256) (by omega)]
+        · rw [Nat.mod_eq_of_lt (by omega), Nat.add_two_pow_eq_or_of_lt 6 (by omega) 2,
+            Nat.and_two_pow_sub_one_eq_mod _ 6, Nat.shiftRight_eq_div_pow,
+            Nat.mod_mod_of_dvd (c.val.toNat / 2 ^ 6) (by decide)]
+        · rw [Nat.mod_eq_of_lt (by omega), Nat.add_two_pow_eq_or_of_lt 6 (by omega) 2,
+            Nat.and_two_pow_sub_one_eq_mod _ 6, Nat.mod_mod_of_dvd c.val.toNat (by decide)]
+      · simp only [List.cons.injEq, ← UInt8.toNat_inj, UInt8.toNat_add, UInt8.toNat_ofNat',
+          Nat.reducePow, UInt8.reduceToNat, Nat.mod_add_mod, UInt8.toNat_or, UInt8.toNat_and,
+          UInt32.toNat_toUInt8, UInt32.toNat_shiftRight, UInt32.reduceToNat, Nat.reduceMod, and_true]
+        refine ⟨?_, ?_, ?_, ?_⟩
+        · rw [Nat.mod_eq_of_lt (by omega), Nat.add_two_pow_eq_or_of_lt 3 (by omega) 30,
+            Nat.and_two_pow_sub_one_eq_mod _ 3, Nat.shiftRight_eq_div_pow,
+            Nat.mod_mod_of_dvd _ (by decide)]
+        · rw [Nat.mod_eq_of_lt (by omega), Nat.add_two_pow_eq_or_of_lt 6 (by omega) 2,
+            Nat.and_two_pow_sub_one_eq_mod _ 6, Nat.shiftRight_eq_div_pow,
+            Nat.mod_mod_of_dvd (c.val.toNat / 2 ^ 12) (by decide)]
+        · rw [Nat.mod_eq_of_lt (by omega), Nat.add_two_pow_eq_or_of_lt 6 (by omega) 2,
+            Nat.and_two_pow_sub_one_eq_mod _ 6, Nat.shiftRight_eq_div_pow,
+            Nat.mod_mod_of_dvd (c.val.toNat / 2 ^ 6) (by decide)]
+        · rw [Nat.mod_eq_of_lt (by omega), Nat.add_two_pow_eq_or_of_lt 6 (by omega) 2,
+            Nat.and_two_pow_sub_one_eq_mod _ 6, Nat.mod_mod_of_dvd c.val.toNat (by decide)]
+
+@[simp] public theorem String.length_utf8EncodeChar (c : Char) : (utf8EncodeChar c).length = c.utf8Size := by
+  simp [Char.utf8Size, utf8EncodeChar_eq_utf8EncodeCharFast, utf8EncodeCharFast]
+  cases Decidable.em (c.val ≤ 0x7f) <;> simp [*]
+  cases Decidable.em (c.val ≤ 0x7ff) <;> simp [*]
+  cases Decidable.em (c.val ≤ 0xffff) <;> simp [*]
+
+theorem String.utf8EncodeChar_eq_singleton {c : Char} : c.utf8Size = 1 →
+    String.utf8EncodeChar c = [c.val.toUInt8] := by
+  rw [← length_utf8EncodeChar]
+  fun_cases utf8EncodeChar
+  all_goals simp_all; try rfl
+
+theorem String.utf8EncodeChar_eq_cons_cons {c : Char} : c.utf8Size = 2 →
+    String.utf8EncodeChar c = [(c.val >>>  6).toUInt8 &&& 0x1f ||| 0xc0, c.val.toUInt8 &&& 0x3f ||| 0x80] := by
+  rw [← length_utf8EncodeChar, utf8EncodeChar_eq_utf8EncodeCharFast]
+  fun_cases utf8EncodeCharFast
+  all_goals simp_all <;> (repeat' apply And.intro) <;> rfl
+
+theorem String.utf8EncodeChar_eq_cons_cons_cons {c : Char} : c.utf8Size = 3 →
+    String.utf8EncodeChar c =
+    [(c.val >>> 12).toUInt8 &&& 0x0f ||| 0xe0,
+     (c.val >>>  6).toUInt8 &&& 0x3f ||| 0x80,
+              c.val.toUInt8 &&& 0x3f ||| 0x80] := by
+  rw [← length_utf8EncodeChar, utf8EncodeChar_eq_utf8EncodeCharFast]
+  fun_cases utf8EncodeCharFast
+  all_goals simp_all <;> (repeat' apply And.intro) <;> rfl
+
+theorem String.utf8EncodeChar_eq_cons_cons_cons_cons {c : Char} : c.utf8Size = 4 →
+    String.utf8EncodeChar c =
+    [(c.val >>> 18).toUInt8 &&& 0x07 ||| 0xf0,
+     (c.val >>> 12).toUInt8 &&& 0x3f ||| 0x80,
+     (c.val >>>  6).toUInt8 &&& 0x3f ||| 0x80,
+              c.val.toUInt8 &&& 0x3f ||| 0x80] := by
+  rw [← length_utf8EncodeChar, utf8EncodeChar_eq_utf8EncodeCharFast]
+  fun_cases utf8EncodeCharFast
+  all_goals simp_all <;> (repeat' apply And.intro) <;> rfl
+
+/-! ## `utf8EncodeChar` BitVec API -/
+
+theorem helper₄ (s : Nat) (c : BitVec w₀) (v : BitVec w') (w : Nat) :
+    (c >>> s).setWidth (w' + w) &&& (BitVec.allOnes w).setWidth (w' + w) ||| v ++ 0#w = v ++ c.extractLsb' s w := by
+  rw [BitVec.and_setWidth_allOnes, BitVec.or_append, BitVec.zero_or, BitVec.or_zero,
+    BitVec.setWidth_ushiftRight_eq_extractLsb, BitVec.setWidth_extractLsb'_of_le (by simp)]
+
+/-! ### Size one -/
+
+-- TODO: possibly it makes sense to factor out this proof
+theorem String.toBitVec_getElem_utf8EncodeChar_zero_of_utf8Size_eq_one {c : Char} (h : c.utf8Size = 1) :
+    ((String.utf8EncodeChar c)[0]'(by simp [h])).toBitVec = 0#1 ++ c.val.toBitVec.extractLsb' 0 7 := by
+  have h₀ : c.val.toNat < 128 := by
+    suffices c.val.toNat ≤ 127 by omega
+    simpa [Char.utf8Size_eq_one_iff, UInt32.le_iff_toNat_le] using h
+  have h₁ : c.val.toNat < 256 := by omega
+  rw [← BitVec.toNat_inj, BitVec.toNat_append]
+  simp [utf8EncodeChar_eq_singleton h, Nat.mod_eq_of_lt h₀, Nat.mod_eq_of_lt h₁]
+
+/-! ### Size two -/
+
+theorem String.toBitVec_getElem_utf8EncodeChar_zero_of_utf8Size_eq_two {c : Char} (h : c.utf8Size = 2) :
+    ((String.utf8EncodeChar c)[0]'(by simp [h])).toBitVec = 0b110#3 ++ c.val.toBitVec.extractLsb' 6 5 := by
+  simpa [String.utf8EncodeChar_eq_cons_cons h] using helper₄ 6 c.val.toBitVec 6#3 5
+
+theorem String.toBitVec_getElem_utf8EncodeChar_one_of_utf8Size_eq_two {c : Char} (h : c.utf8Size = 2) :
+    ((String.utf8EncodeChar c)[1]'(by simp [h])).toBitVec = 0b10#2 ++ c.val.toBitVec.extractLsb' 0 6 := by
+  simpa [String.utf8EncodeChar_eq_cons_cons h] using helper₄ 0 c.val.toBitVec 2#2 6
+
+/-! ### Size three -/
+
+theorem String.toBitVec_getElem_utf8EncodeChar_zero_of_utf8Size_eq_three {c : Char} (h : c.utf8Size = 3) :
+    ((String.utf8EncodeChar c)[0]'(by simp [h])).toBitVec = 0b1110#4 ++ c.val.toBitVec.extractLsb' 12 4 := by
+  simpa [String.utf8EncodeChar_eq_cons_cons_cons h] using helper₄ 12 c.val.toBitVec 0b1110#4 4
+
+theorem String.toBitVec_getElem_utf8EncodeChar_one_of_utf8Size_eq_three {c : Char} (h : c.utf8Size = 3) :
+    ((String.utf8EncodeChar c)[1]'(by simp [h])).toBitVec = 0b10#2 ++ c.val.toBitVec.extractLsb' 6 6 := by
+  simpa [String.utf8EncodeChar_eq_cons_cons_cons h] using helper₄ 6 c.val.toBitVec 0b10#2 6
+
+theorem String.toBitVec_getElem_utf8EncodeChar_two_of_utf8Size_eq_three {c : Char} (h : c.utf8Size = 3) :
+    ((String.utf8EncodeChar c)[2]'(by simp [h])).toBitVec = 0b10#2 ++ c.val.toBitVec.extractLsb' 0 6 := by
+  simpa [String.utf8EncodeChar_eq_cons_cons_cons h] using helper₄ 0 c.val.toBitVec 0b10#2 6
+
+/-! ### Size four -/
+
+theorem String.toBitVec_getElem_utf8EncodeChar_zero_of_utf8Size_eq_four {c : Char} (h : c.utf8Size = 4) :
+    ((String.utf8EncodeChar c)[0]'(by simp [h])).toBitVec = 0b11110#5 ++ c.val.toBitVec.extractLsb' 18 3 := by
+  simpa [String.utf8EncodeChar_eq_cons_cons_cons_cons h] using helper₄ 18 c.val.toBitVec 0b11110#5 3
+
+theorem String.toBitVec_getElem_utf8EncodeChar_one_of_utf8Size_eq_four {c : Char} (h : c.utf8Size = 4) :
+    ((String.utf8EncodeChar c)[1]'(by simp [h])).toBitVec = 0b10#2 ++ c.val.toBitVec.extractLsb' 12 6 := by
+  simpa [String.utf8EncodeChar_eq_cons_cons_cons_cons h] using helper₄ 12 c.val.toBitVec 0b10#2 6
+
+theorem String.toBitVec_getElem_utf8EncodeChar_two_of_utf8Size_eq_four {c : Char} (h : c.utf8Size = 4) :
+    ((String.utf8EncodeChar c)[2]'(by simp [h])).toBitVec = 0b10#2 ++ c.val.toBitVec.extractLsb' 6 6 := by
+  simpa [String.utf8EncodeChar_eq_cons_cons_cons_cons h] using helper₄ 6 c.val.toBitVec 0b10#2 6
+
+theorem String.toBitVec_getElem_utf8EncodeChar_three_of_utf8Size_eq_four {c : Char} (h : c.utf8Size = 4) :
+    ((String.utf8EncodeChar c)[3]'(by simp [h])).toBitVec = 0b10#2 ++ c.val.toBitVec.extractLsb' 0 6 := by
+  simpa [String.utf8EncodeChar_eq_cons_cons_cons_cons h] using helper₄ 0 c.val.toBitVec 0b10#2 6
+
+namespace ByteArray.utf8DecodeChar?
+
+/-! # `parseFirstByte` -/
+
+/-! ## `parseFirstByte` definition -/
+
+public inductive FirstByte where
+  | invalid : FirstByte
+  | done : FirstByte
+  | oneMore : FirstByte
+  | twoMore : FirstByte
+  | threeMore : FirstByte
+
+@[inline, expose]
+public def parseFirstByte (b : UInt8) : FirstByte :=
+  if b &&& 0x80 == 0 then
+    .done
+  else if b &&& 0xe0 == 0xc0 then
+    .oneMore
+  else if b &&& 0xf0 == 0xe0 then
+    .twoMore
+  else if b &&& 0xf8 == 0xf0 then
+    .threeMore
+  else .invalid
+
+/-! ## `parseFirstByte` low-level API -/
+
+theorem parseFirstByte_eq_done_iff : parseFirstByte b = .done ↔ b &&& 0x80 = 0 := by
+  fun_cases parseFirstByte with simp_all
+
+theorem parseFirstByte_eq_oneMore_iff : parseFirstByte b = .oneMore ↔ b &&& 0xe0 = 0xc0 := by
+  suffices b &&& 0xe0 = 0xc0 → b &&& 0x80 ≠ 0 by fun_cases parseFirstByte with simp_all
+  intro h
+  calc b &&& 0x80 = b &&& (0xe0 &&& 0x80) := rfl
+    _ = 0xc0 &&& 0x80 := by rw [← UInt8.and_assoc, h]
+    _ ≠ 0 := by decide
+
+theorem parseFirstByte_eq_twoMore_iff : parseFirstByte b = .twoMore ↔ b &&& 0xf0 = 0xe0 := by
+  suffices b &&& 0xf0 = 0xe0 → b &&& 0xe0 ≠ 0xc0 ∧ b &&& 0x80 ≠ 0 by fun_cases parseFirstByte with simp_all
+  refine fun h => ⟨?_, ?_⟩
+  · calc b &&& 0xe0 = b &&& (0xf0 &&& 0xe0) := rfl
+      _ = 0xe0 &&& 0xe0 := by rw [← UInt8.and_assoc, h]
+      _ ≠ 0xc0 := by decide
+  · calc b &&& 0x80 = b &&& (0xf0 &&& 0x80) := rfl
+      _ = 0xe0 &&& 0x80 := by rw [← UInt8.and_assoc, h]
+      _ ≠ 0 := by decide
+
+theorem parseFirstByte_eq_threeMore_iff : parseFirstByte b = .threeMore ↔ b &&& 0xf8 = 0xf0 := by
+  suffices b &&& 0xf8 = 0xf0 → b &&& 0xf0 ≠ 0xe0 ∧ b &&& 0xe0 ≠ 0xc0 ∧ b &&& 0x80 ≠ 0 by fun_cases parseFirstByte with simp_all
+  refine fun h => ⟨?_, ?_, ?_⟩
+  · calc b &&& 0xf0 = b &&& (0xf8 &&& 0xf0) := rfl
+      _ = 0xf0 &&& 0xf0 := by rw [← UInt8.and_assoc, h]
+      _ ≠ 0xe0 := by decide
+  · calc b &&& 0xe0 = b &&& (0xf8 &&& 0xe0) := rfl
+      _ = 0xf0 &&& 0xe0 := by rw [← UInt8.and_assoc, h]
+      _ ≠ 0xc0 := by decide
+  · calc b &&& 0x80 = b &&& (0xf8 &&& 0x80) := rfl
+      _ = 0xf0 &&& 0x80 := by rw [← UInt8.and_assoc, h]
+      _ ≠ 0 := by decide
+
+/-! ## `parseFirstByte` BitVec API -/
+
+theorem helper (w : Nat) (b : BitVec (w' + w)) (v : BitVec w') : b &&& (BitVec.allOnes w' ++ 0#w) = v ++ 0#w ↔ b.extractLsb' w w' = v := by
+  have h : b = b.extractLsb' w w' ++ b.extractLsb' 0 w := by
+    rw [BitVec.extractLsb'_append_extractLsb'_eq_extractLsb'] <;> simp
+  conv => lhs; rw [h]
+  rw [BitVec.and_append, BitVec.and_allOnes, BitVec.and_zero, BitVec.append_left_inj]
+
+theorem helper₂ (w : Nat) (b : BitVec (w' + w)) {v : BitVec w'} (h : b.extractLsb' w w' = v) : b = v ++ b.extractLsb' 0 w := by
+  rw [← h, BitVec.extractLsb'_append_extractLsb'_eq_extractLsb'] <;> simp
+
+/-! ### Size one -/
+
+theorem parseFirstByte_eq_done_iff_toBitVec : parseFirstByte b = .done ↔ b.toBitVec.extractLsb' 7 1 = 0#1 := by
+  simp only [parseFirstByte_eq_done_iff, ← UInt8.toBitVec_inj, UInt8.toBitVec_and,
+    UInt8.toBitVec_ofNat]
+  exact helper 7 b.toBitVec 0#1
+
+theorem toBitVec_eq_of_parseFirstByte_eq_done {b : UInt8} (h : parseFirstByte b = .done) :
+    b.toBitVec = 0#1 ++ b.toBitVec.setWidth 7 := by
+  exact helper₂ 7 b.toBitVec (parseFirstByte_eq_done_iff_toBitVec.1 h)
+
+/-! ### Size two -/
+
+theorem parseFirstByte_eq_oneMore_iff_toBitVec : parseFirstByte b = .oneMore ↔ b.toBitVec.extractLsb' 5 3 = 0b110#3 := by
+  simp only [parseFirstByte_eq_oneMore_iff, ← UInt8.toBitVec_inj, UInt8.toBitVec_and,
+    UInt8.toBitVec_ofNat]
+  exact helper 5 b.toBitVec 0b110#3
+
+theorem toBitVec_eq_of_parseFirstByte_eq_oneMore {b : UInt8} (h : parseFirstByte b = .oneMore) :
+    b.toBitVec = 0b110#3 ++ b.toBitVec.setWidth 5 := by
+  exact helper₂ 5 b.toBitVec (parseFirstByte_eq_oneMore_iff_toBitVec.1 h)
+
+/-! ### Size three -/
+
+theorem parseFirstByte_eq_twoMore_iff_toBitVec : parseFirstByte b = .twoMore ↔ b.toBitVec.extractLsb' 4 4 = 0b1110#4 := by
+  simp only [parseFirstByte_eq_twoMore_iff, ← UInt8.toBitVec_inj, UInt8.toBitVec_and,
+    UInt8.toBitVec_ofNat]
+  exact helper 4 b.toBitVec 0b1110#4
+
+theorem toBitVec_eq_of_parseFirstByte_eq_twoMore {b : UInt8} (h : parseFirstByte b = .twoMore) :
+    b.toBitVec = 0b1110#4 ++ b.toBitVec.setWidth 4 := by
+  exact helper₂ 4 b.toBitVec (parseFirstByte_eq_twoMore_iff_toBitVec.1 h)
+
+/-! ### Size four -/
+
+theorem parseFirstByte_eq_threeMore_iff_toBitVec : parseFirstByte b = .threeMore ↔ b.toBitVec.extractLsb' 3 5 = 0b11110#5 := by
+  simp only [parseFirstByte_eq_threeMore_iff, ← UInt8.toBitVec_inj, UInt8.toBitVec_and,
+    UInt8.toBitVec_ofNat]
+  exact helper 3 b.toBitVec 0b11110#5
+
+theorem toBitVec_eq_of_parseFirstByte_eq_threeMore {b : UInt8} (h : parseFirstByte b = .threeMore) :
+    b.toBitVec = 0b11110#5 ++ b.toBitVec.setWidth 3 := by
+  exact helper₂ 3 b.toBitVec (parseFirstByte_eq_threeMore_iff_toBitVec.1 h)
+
+/-! # `isInvalidContinuationByte` definiton & API -/
+
+@[inline, expose]
+public def isInvalidContinuationByte (b : UInt8) : Bool :=
+  b &&& 0xc0 != 0x80
+
+theorem isInvalidContinutationByte_eq_false_iff {b : UInt8} :
+    isInvalidContinuationByte b = false ↔ b &&& 0xc0 = 0x80 := by
+  simp [isInvalidContinuationByte]
+
+theorem isInvalidContinuationByte_eq_false_iff_toBitVec {b : UInt8} :
+    isInvalidContinuationByte b = false ↔ b.toBitVec.extractLsb' 6 2 = 0b10#2 := by
+  simp only [isInvalidContinuationByte, bne_eq_false_iff_eq, ← UInt8.toBitVec_inj,
+    UInt8.toBitVec_and, UInt8.toBitVec_ofNat]
+  exact helper 6 b.toBitVec 0b10#2
+
+theorem toBitVec_eq_of_isInvalidContinuationByte_eq_false {b : UInt8} (hb : isInvalidContinuationByte b = false) :
+    b.toBitVec = 0b10#2 ++ b.toBitVec.setWidth 6 := by
+  exact helper₂ 6 b.toBitVec (isInvalidContinuationByte_eq_false_iff_toBitVec.1 hb)
+
+/-! # `parseFirstByte`, `isInvalidContinuationByte` and `utf8EncodeChar` -/
+
+theorem parseFirstByte_utf8EncodeChar_eq_done {c : Char} (hc : c.utf8Size = 1) :
+    parseFirstByte ((String.utf8EncodeChar c)[0]'(by simp [Char.utf8Size_pos])) = .done := by
+  rw [parseFirstByte_eq_done_iff_toBitVec, String.toBitVec_getElem_utf8EncodeChar_zero_of_utf8Size_eq_one hc,
+    BitVec.extractLsb'_append_eq_left]
+
+theorem parseFirstByte_utf8EncodeChar_eq_oneMore {c : Char} (hc : c.utf8Size = 2) :
+    parseFirstByte ((String.utf8EncodeChar c)[0]'(by simp [Char.utf8Size_pos])) = .oneMore := by
+  rw [parseFirstByte_eq_oneMore_iff_toBitVec, String.toBitVec_getElem_utf8EncodeChar_zero_of_utf8Size_eq_two hc,
+    BitVec.extractLsb'_append_eq_left]
+
+theorem isInvalidContinuationByte_getElem_utf8EncodeChar_one_of_utf8Size_eq_two {c : Char} (hc : c.utf8Size = 2) :
+    isInvalidContinuationByte ((String.utf8EncodeChar c)[1]'(by simp [String.length_utf8EncodeChar, hc])) = false := by
+  rw [isInvalidContinuationByte_eq_false_iff_toBitVec, String.toBitVec_getElem_utf8EncodeChar_one_of_utf8Size_eq_two hc,
+    BitVec.extractLsb'_append_eq_left]
+
+theorem parseFirstByte_utf8EncodeChar_eq_twoMore {c : Char} (hc : c.utf8Size = 3) :
+    parseFirstByte ((String.utf8EncodeChar c)[0]'(by simp [Char.utf8Size_pos])) = .twoMore := by
+  rw [parseFirstByte_eq_twoMore_iff_toBitVec, String.toBitVec_getElem_utf8EncodeChar_zero_of_utf8Size_eq_three hc,
+    BitVec.extractLsb'_append_eq_left]
+
+theorem isInvalidContinuationByte_getElem_utf8EncodeChar_one_of_utf8Size_eq_three {c : Char} (hc : c.utf8Size = 3) :
+    isInvalidContinuationByte ((String.utf8EncodeChar c)[1]'(by simp [String.length_utf8EncodeChar, hc])) = false := by
+  rw [isInvalidContinuationByte_eq_false_iff_toBitVec, String.toBitVec_getElem_utf8EncodeChar_one_of_utf8Size_eq_three hc,
+    BitVec.extractLsb'_append_eq_left]
+
+theorem isInvalidContinuationByte_getElem_utf8EncodeChar_two_of_utf8Size_eq_three {c : Char} (hc : c.utf8Size = 3) :
+    isInvalidContinuationByte ((String.utf8EncodeChar c)[2]'(by simp [String.length_utf8EncodeChar, hc])) = false := by
+  rw [isInvalidContinuationByte_eq_false_iff_toBitVec, String.toBitVec_getElem_utf8EncodeChar_two_of_utf8Size_eq_three hc,
+    BitVec.extractLsb'_append_eq_left]
+
+theorem parseFirstByte_utf8EncodeChar_eq_threeMore {c : Char} (hc : c.utf8Size = 4) :
+    parseFirstByte ((String.utf8EncodeChar c)[0]'(by simp [Char.utf8Size_pos])) = .threeMore := by
+  rw [parseFirstByte_eq_threeMore_iff_toBitVec, String.toBitVec_getElem_utf8EncodeChar_zero_of_utf8Size_eq_four hc,
+    BitVec.extractLsb'_append_eq_left]
+
+theorem isInvalidContinuationByte_getElem_utf8EncodeChar_one_of_utf8Size_eq_four {c : Char} (hc : c.utf8Size = 4) :
+    isInvalidContinuationByte ((String.utf8EncodeChar c)[1]'(by simp [String.length_utf8EncodeChar, hc])) = false := by
+  rw [isInvalidContinuationByte_eq_false_iff_toBitVec, String.toBitVec_getElem_utf8EncodeChar_one_of_utf8Size_eq_four hc,
+    BitVec.extractLsb'_append_eq_left]
+
+theorem isInvalidContinuationByte_getElem_utf8EncodeChar_two_of_utf8Size_eq_four {c : Char} (hc : c.utf8Size = 4) :
+    isInvalidContinuationByte ((String.utf8EncodeChar c)[2]'(by simp [String.length_utf8EncodeChar, hc])) = false := by
+  rw [isInvalidContinuationByte_eq_false_iff_toBitVec, String.toBitVec_getElem_utf8EncodeChar_two_of_utf8Size_eq_four hc,
+    BitVec.extractLsb'_append_eq_left]
+
+theorem isInvalidContinuationByte_getElem_utf8EncodeChar_three_of_utf8Size_eq_four {c : Char} (hc : c.utf8Size = 4) :
+    isInvalidContinuationByte ((String.utf8EncodeChar c)[3]'(by simp [String.length_utf8EncodeChar, hc])) = false := by
+  rw [isInvalidContinuationByte_eq_false_iff_toBitVec, String.toBitVec_getElem_utf8EncodeChar_three_of_utf8Size_eq_four hc,
+    BitVec.extractLsb'_append_eq_left]
+
+/-! # `assemble₁` -/
+
+theorem helper₅ {w : UInt8} (h : parseFirstByte w = .done) : w < 128 := by
+  simp only [UInt8.lt_iff_toBitVec_lt, UInt8.toBitVec_ofNat]
+  rw [toBitVec_eq_of_parseFirstByte_eq_done h]
+  simp only [Nat.reduceAdd, BitVec.lt_def, BitVec.toNat_ofNat, Nat.reducePow, Nat.reduceMod]
+  rw [BitVec.toNat_append]
+  simpa using Nat.mod_lt _ (by decide)
+
+@[inline, expose]
+public def assemble₁ (w : UInt8) (h : parseFirstByte w = .done) : Option Char :=
+  some ⟨w.toUInt32, ?done⟩
+where finally
+  case done =>
+    have : w < 0x80 := helper₅ h
+    refine Or.inl ?_
+    simp only [UInt8.lt_iff_toNat_lt, UInt8.reduceToNat] at this
+    simp only [UInt8.toNat_toUInt32]
+    omega
+
+theorem toBitVec_val_assemble₁ {w : UInt8} {h} {c : Char} : assemble₁ w h = some c → c.val.toBitVec = w.toBitVec.setWidth 32 := by
+  simp only [assemble₁, Option.some.injEq]
+  rintro rfl
+  rfl
+
+theorem val_assemble₁_le {w : UInt8} {h} {c : Char} (h' : assemble₁ w h = some c) : c.val ≤ 127 := by
+  have := toBitVec_val_assemble₁ h'
+  have hx := helper₅ h
+  simp only [UInt8.lt_iff_toNat_lt, UInt8.reduceToNat] at hx
+  rw [UInt32.le_iff_toBitVec_le, this, BitVec.le_def]
+  simp only [BitVec.toNat_setWidth, UInt8.toNat_toBitVec, Nat.reducePow, UInt8.toNat_mod_uInt32Size,
+    UInt32.toBitVec_ofNat, BitVec.toNat_ofNat, Nat.reduceMod]
+  omega
+
+theorem utf8Size_assemble₁ {w : UInt8} {c : Char} {h} (h : assemble₁ w h = some c) : c.utf8Size = 1 :=
+  Char.utf8Size_eq_one_iff.2 (val_assemble₁_le h)
+
+theorem assemble₁_eq_some_iff_utf8EncodeChar_eq {w : UInt8} {c : Char} :
+    (∃ h : parseFirstByte w = .done, assemble₁ w h = some c) ↔ String.utf8EncodeChar c = [w] := by
+  refine ⟨fun ⟨h₁, h₂⟩ => ?_, ?_⟩
+  · have hc := utf8Size_assemble₁ h₂
+    simp only [String.utf8EncodeChar_eq_singleton hc, List.cons.injEq, and_true]
+    simp only [assemble₁, Option.some.injEq] at h₂
+    simpa using congrArg (·.val.toUInt8) h₂ |>.symm
+  · intro h
+    have h₀ : (String.utf8EncodeChar c).length = 1 := congrArg List.length h
+    have hc : c.utf8Size = 1 := String.length_utf8EncodeChar _ ▸ h₀
+    obtain ⟨rfl⟩ : w = (String.utf8EncodeChar c)[0] := by simp [h]
+    refine ⟨parseFirstByte_utf8EncodeChar_eq_done hc, ?_⟩
+    have : c.val.toNat < 256 := by
+      simp only [Char.utf8Size_eq_one_iff, UInt32.le_iff_toNat_le, UInt32.reduceToNat] at hc
+      omega
+    simpa [String.utf8EncodeChar_eq_singleton hc, assemble₁, Char.ext_iff, ← UInt32.toNat_inj]
+
+/-! # `assemble₂` -/
+
+@[inline, expose]
+public def assemble₂Unchecked (w x : UInt8) : UInt32 :=
+  let b₀ : UInt8 := w &&& 0x1f
+  let b₁ := x &&& 0x3f
+  (b₀.toUInt32 <<< 6) ||| b₁.toUInt32
+
+@[inline, expose]
+public def assemble₂ (w x : UInt8) : Option Char :=
+  if isInvalidContinuationByte x then
+    none
+  else
+    let r := assemble₂Unchecked w x
+    if r < 0x80 then
+      none -- overlong encodinlg
+    else
+      some ⟨r, ?onemore⟩
+where finally
+  case onemore =>
+    let b₀ : UInt8 := w &&& 0x1f
+    let b₁ := x &&& 0x3f
+    have hr : r = (b₀.toUInt32 <<< 6) ||| b₁.toUInt32 := rfl
+    have hb₀ : b₀ < 0x20 := UInt8.and_lt_add_one (by decide)
+    have hb₁ : b₁ < 0x40 := UInt8.and_lt_add_one (by decide)
+    refine Or.inl ?_
+    simp only [UInt8.lt_iff_toNat_lt, UInt8.reduceToNat] at hb₀ hb₁
+    simp only [hr, UInt32.toNat_or, UInt32.toNat_shiftLeft, UInt8.toNat_toUInt32, UInt32.reduceToNat,
+      Nat.reduceMod, Nat.reducePow]
+    rw [Nat.shiftLeft_eq, Nat.mod_eq_of_lt (by omega), Nat.mul_comm, ← Nat.two_pow_add_eq_or_of_lt hb₁]
+    omega
+
+theorem helper₃ {x : UInt8} (n : Nat) (hn : n < 8) :
+    (x &&& UInt8.ofNat (2 ^ n - 1)).toUInt32.toBitVec = (x.toBitVec.setWidth n).setWidth 32 := by
+  apply BitVec.eq_of_toNat_eq
+  simp only [UInt8.toUInt32_and, UInt32.toBitVec_and, UInt8.toBitVec_toUInt32, BitVec.toNat_and,
+    BitVec.toNat_setWidth, UInt8.toNat_toBitVec, Nat.reducePow, UInt8.toNat_mod_uInt32Size,
+    UInt8.toNat_ofNat']
+  have : 2 ^ n < 2 ^ 8 := Nat.pow_lt_pow_right (by decide) hn
+  rw [Nat.mod_mod_of_dvd' (by decide), Nat.mod_eq_of_lt (by omega), Nat.mod_mod_of_dvd']
+  · exact Nat.and_two_pow_sub_one_eq_mod _ _
+  · exact Nat.pow_dvd_pow (n := 32) 2 (by omega)
+
+theorem toBitVec_assemble₂Unchecked {w x : UInt8} : (assemble₂Unchecked w x).toBitVec = (w.toBitVec.setWidth 5 ++ x.toBitVec.setWidth 6).setWidth 32 := by
+  have hw : (w &&& 31).toUInt32.toBitVec = (w.toBitVec.setWidth 5).setWidth 32 := helper₃ 5 (by decide)
+  have hx : (x &&& 63).toUInt32.toBitVec = (x.toBitVec.setWidth 6).setWidth 32 := helper₃ 6 (by decide)
+  rw [assemble₂Unchecked, UInt32.toBitVec_or, UInt32.toBitVec_shiftLeft, hw, hx]
+  simpa using BitVec.setWidth_append_eq_shiftLeft_setWidth_or.symm
+
+theorem toBitVec_val_assemble₂ {w x : UInt8} (c : Char) : assemble₂ w x = some c → c.val.toBitVec = (w.toBitVec.setWidth 5 ++ x.toBitVec.setWidth 6).setWidth 32 := by
+  fun_cases assemble₂ with
+  | case1 => simp
+  | case2 => simp
+  | case3 hx r hr =>
+    simp only [Option.some.injEq, Nat.reduceAdd]
+    rintro rfl
+    subst r
+    simp [toBitVec_assemble₂Unchecked]
+
+theorem le_val_assemble₂ {w x : UInt8} {c : Char} : assemble₂ w x = some c → 128 ≤ c.val := by
+  rcases c with ⟨v, hv⟩
+  fun_cases assemble₂ with
+  | case1 => simp
+  | case2 => simp
+  | case3 hx r hr =>
+    simp only [Option.some.injEq, Char.mk.injEq]
+    rintro rfl
+    exact UInt32.not_lt.1 hr
+
+theorem val_assemble₂_le {w x : UInt8} {c : Char} (h : assemble₂ w x = some c) : c.val ≤ 2047 := by
+  rw [UInt32.le_iff_toBitVec_le, toBitVec_val_assemble₂ _ h, BitVec.le_def, BitVec.toNat_setWidth_of_le (by simp)]
+  exact Nat.le_of_lt_succ (BitVec.toNat_lt_twoPow_of_le (Nat.le_refl _))
+
+theorem utf8Size_assemble₂ {w x : UInt8} {c : Char} (h : assemble₂ w x = some c) : c.utf8Size = 2 :=
+  Char.utf8Size_eq_two_iff.2 ⟨le_val_assemble₂ h, val_assemble₂_le h⟩
+
+theorem helper₆ {c : Char} {o : Option Char} {b : BitVec 32} (hc : c.val.toBitVec = b) (h' : ∀ d, o = some d → d.val.toBitVec = b)
+    (hi : o.isSome) : o = some c := by
+  obtain rfl : c = o.get hi := by
+    refine Char.ext (UInt32.eq_of_toBitVec_eq ?_)
+    rw [hc, h' (o.get hi) (by simp)]
+  simp
+
+theorem assemble₂_eq_some_of_toBitVec {w x : UInt8} (hx : isInvalidContinuationByte x = false) {c : Char} (hc₀ : 128 ≤ c.val)
+    (hc : c.val.toBitVec = (w.toBitVec.setWidth 5 ++ x.toBitVec.setWidth 6).setWidth 32) : assemble₂ w x = some c := by
+  apply helper₆ hc toBitVec_val_assemble₂
+  fun_cases assemble₂ with
+  | case1 => simp_all
+  | case2 hx r hr =>
+    suffices 128 ≤ r.toNat ∧ r.toNat < 128 by omega
+    simp_all [← toBitVec_assemble₂Unchecked, r, UInt32.le_iff_toBitVec_le, UInt32.lt_iff_toBitVec_lt, BitVec.le_def, BitVec.lt_def]
+  | case3 hx r hr => simp
+
+theorem isInvalidContinuationByte_eq_false_of_assemble₂_eq_some {w x : UInt8} {c : Char} : assemble₂ w x = some c → isInvalidContinuationByte x = false := by
+  fun_cases assemble₂ with simp_all
+
+theorem assemble₂_eq_some_iff_utf8EncodeChar_eq {x y : UInt8} {c : Char} :
+    parseFirstByte x = .oneMore ∧ assemble₂ x y = some c ↔ String.utf8EncodeChar c = [x, y] := by
+  refine ⟨fun ⟨h₁, h₂⟩ => ?_, ?_⟩
+  · have hc := utf8Size_assemble₂ h₂
+    rw [(String.utf8EncodeChar c).eq_getElem_of_length_eq_two (by simp [hc])]
+    simp only [List.cons.injEq, and_true, UInt8.eq_iff_toBitVec_eq]
+    refine ⟨?_, ?_⟩
+    · rw [String.toBitVec_getElem_utf8EncodeChar_zero_of_utf8Size_eq_two hc,
+        toBitVec_val_assemble₂ _ h₂, BitVec.extractLsb'_setWidth_of_le (by decide),
+        BitVec.extractLsb'_append_eq_left]
+      conv => rhs; rw [toBitVec_eq_of_parseFirstByte_eq_oneMore h₁]
+    · rw [String.toBitVec_getElem_utf8EncodeChar_one_of_utf8Size_eq_two hc,
+        toBitVec_val_assemble₂ _ h₂]
+      rw [BitVec.extractLsb'_setWidth_of_le (by decide)]
+      conv => rhs; rw [toBitVec_eq_of_isInvalidContinuationByte_eq_false (isInvalidContinuationByte_eq_false_of_assemble₂_eq_some h₂)]
+      rw [BitVec.extractLsb'_append_eq_right]
+  · intro h
+    have hc : c.utf8Size = 2 := String.length_utf8EncodeChar _ ▸ congrArg List.length h
+    have h₀ : (String.utf8EncodeChar c).length = 2 := congrArg List.length h
+    obtain ⟨rfl, rfl⟩ : x = (String.utf8EncodeChar c)[0] ∧ y = (String.utf8EncodeChar c)[1] := by simp [h]
+    refine ⟨parseFirstByte_utf8EncodeChar_eq_oneMore hc, ?_⟩
+    apply assemble₂_eq_some_of_toBitVec
+    · apply isInvalidContinuationByte_getElem_utf8EncodeChar_one_of_utf8Size_eq_two hc
+    · rw [Char.utf8Size_eq_two_iff] at hc
+      exact hc.1
+    · rw [String.toBitVec_getElem_utf8EncodeChar_zero_of_utf8Size_eq_two hc,
+        String.toBitVec_getElem_utf8EncodeChar_one_of_utf8Size_eq_two hc,
+        BitVec.setWidth_append_eq_right, BitVec.setWidth_append_eq_right,
+        BitVec.extractLsb'_append_extractLsb'_eq_extractLsb' (by simp),
+        ← BitVec.setWidth_eq_extractLsb' (by simp),
+        BitVec.setWidth_setWidth_eq_self]
+      simpa [BitVec.lt_def, UInt32.le_iff_toNat_le] using Nat.lt_succ_iff.2 (Char.utf8Size_eq_two_iff.1 hc).2
+
+/-! # `assemble₃` -/
+
+@[inline, expose]
+public def assemble₃Unchecked (w x y : UInt8) : UInt32 :=
+  let b₀ : UInt8 := w &&& 0x0f
+  let b₁ := x &&& 0x3f
+  let b₂ := y &&& 0x3f
+  (b₀.toUInt32 <<< 12) ||| (b₁.toUInt32 <<< 6) ||| b₂.toUInt32
+
+@[inline, expose]
+public def assemble₃ (w x y : UInt8) : Option Char :=
+  if isInvalidContinuationByte x || isInvalidContinuationByte y then
+    none
+  else
+    let r := assemble₃Unchecked w x y
+    if r < 0x800 then
+      none -- overlong encoding
+    else if hr : 0xd800 ≤ r ∧ r ≤ 0xdfff then
+      none -- surrogate code point
+    else
+      some ⟨r, ?twomore⟩
+where finally
+  case twomore =>
+    let b₀ : UInt8 := w &&& 0x0f
+    let b₁ := x &&& 0x3f
+    let b₂ := y &&& 0x3f
+    have hb₀ : b₀ < 0x10 := UInt8.and_lt_add_one (by decide)
+    have hb₁ : b₁ < 0x40 := UInt8.and_lt_add_one (by decide)
+    have hb₂ : b₂ < 0x40 := UInt8.and_lt_add_one (by decide)
+    have hr' : r = (b₀.toUInt32 <<< 12) ||| (b₁.toUInt32 <<< 6) ||| b₂.toUInt32 := rfl
+    simp only [UInt8.lt_iff_toNat_lt, UInt8.reduceToNat] at hb₀ hb₁ hb₂
+    simp only [Decidable.not_and_iff_not_or_not, UInt32.not_le] at hr
+    rcases hr with (hr|hr)
+    · exact Or.inl hr
+    · refine Or.inr ⟨hr, ?_⟩
+      subst r
+      simp only [hr', UInt32.toNat_or, UInt32.toNat_shiftLeft, UInt8.toNat_toUInt32, UInt32.reduceToNat,
+        Nat.reduceMod, Nat.reducePow]
+      rw [Nat.shiftLeft_eq, Nat.shiftLeft_eq, Nat.mod_eq_of_lt (by omega), Nat.mod_eq_of_lt (by omega),
+        Nat.mul_comm _ (2 ^ _), Nat.mul_comm _ (2 ^ _), Nat.or_assoc, ← Nat.two_pow_add_eq_or_of_lt (b := b₂.toNat) hb₂,
+          ← Nat.two_pow_add_eq_or_of_lt (by omega)]
+      omega
+
+theorem toBitVec_assemble₃Unchecked {w x y : UInt8} : (assemble₃Unchecked w x y).toBitVec =
+    (w.toBitVec.setWidth 4 ++ x.toBitVec.setWidth 6 ++ y.toBitVec.setWidth 6).setWidth 32 := by
+  have hw : (w &&& 15).toUInt32.toBitVec = (w.toBitVec.setWidth 4).setWidth 32 := helper₃ 4 (by decide)
+  have hx : (x &&& 63).toUInt32.toBitVec = (x.toBitVec.setWidth 6).setWidth 32 := helper₃ 6 (by decide)
+  have hy : (y &&& 63).toUInt32.toBitVec = (y.toBitVec.setWidth 6).setWidth 32 := helper₃ 6 (by decide)
+  rw [assemble₃Unchecked, UInt32.toBitVec_or, UInt32.toBitVec_or, UInt32.toBitVec_shiftLeft, UInt32.toBitVec_shiftLeft,
+    hw, hx, hy, BitVec.setWidth_append_append_eq_shiftLeft_setWidth_or]
+  simp
+
+theorem toBitVec_val_assemble₃ {w x y : UInt8} (c : Char) : assemble₃ w x y = some c →
+    c.val.toBitVec = (w.toBitVec.setWidth 4 ++ x.toBitVec.setWidth 6 ++ y.toBitVec.setWidth 6).setWidth 32 := by
+  fun_cases assemble₃ with
+  | case1 => simp
+  | case2 => simp
+  | case3 => simp
+  | case4 hx r hr =>
+    simp only [Option.some.injEq, Nat.reduceAdd]
+    rintro rfl
+    subst r
+    simp [toBitVec_assemble₃Unchecked]
+
+theorem le_val_assemble₃ {w x y : UInt8} {c : Char} : assemble₃ w x y = some c → 0x800 ≤ c.val := by
+  rcases c with ⟨v, hv⟩
+  fun_cases assemble₃ with
+  | case1 => simp
+  | case2 => simp
+  | case3 => simp
+  | case4 hx r hr =>
+    simp only [Option.some.injEq, Char.mk.injEq]
+    rintro rfl
+    exact UInt32.not_lt.1 hr
+
+theorem val_assemble₃_le {w x y : UInt8} {c : Char} (h : assemble₃ w x y = some c) : c.val ≤ 0xffff := by
+  rw [UInt32.le_iff_toBitVec_le, toBitVec_val_assemble₃ _ h, BitVec.le_def, BitVec.toNat_setWidth_of_le (by simp)]
+  exact Nat.le_of_lt_succ (BitVec.toNat_lt_twoPow_of_le (Nat.le_refl _))
+
+theorem utf8Size_assemble₃ {w x y : UInt8} {c : Char} (h : assemble₃ w x y = some c) : c.utf8Size = 3 :=
+  Char.utf8Size_eq_three_iff.2 ⟨le_val_assemble₃ h, val_assemble₃_le h⟩
+
+theorem assemble₃_eq_some_of_toBitVec {w x y : UInt8} (hx : isInvalidContinuationByte x = false)
+    (hy : isInvalidContinuationByte y = false) {c : Char} (hc₀ : 0x800 ≤ c.val)
+    (hc : c.val.toBitVec = (w.toBitVec.setWidth 4 ++ x.toBitVec.setWidth 6 ++ y.toBitVec.setWidth 6).setWidth 32) :
+    assemble₃ w x y = some c := by
+  apply helper₆ hc toBitVec_val_assemble₃
+  fun_cases assemble₃ with
+  | case1 => simp_all
+  | case2 hx r hr =>
+    suffices 0x800 ≤ (assemble₃Unchecked w x y).toNat ∧ (assemble₃Unchecked w x y).toNat < 0x800 by omega
+    simp_all [← toBitVec_assemble₃Unchecked, r, UInt32.le_iff_toBitVec_le, UInt32.lt_iff_toBitVec_lt, BitVec.le_def, BitVec.lt_def]
+  | case3 hx r hr hr' =>
+    have hcv := c.valid
+    suffices ((55296 ≤ r.toNat ∧ r.toNat ≤ 57343) ∧ (r.toNat < 55296 ∨ (57343 < r.toNat ∧ r.toNat < 1114112))) by omega
+    simp_all [UInt32.isValidChar, Nat.isValidChar, ← toBitVec_assemble₃Unchecked, r, UInt32.le_iff_toBitVec_le, UInt32.lt_iff_toBitVec_lt,
+      BitVec.le_def, BitVec.lt_def, ← BitVec.toNat_inj]
+  | case4 => simp
+
+theorem isInvalidContinuationByte_eq_false_of_assemble₃_eq_some_left {w x y : UInt8} {c : Char} : assemble₃ w x y = some c → isInvalidContinuationByte x = false := by
+  fun_cases assemble₃ with simp_all
+
+theorem isInvalidContinuationByte_eq_false_of_assemble₃_eq_some_right {w x y : UInt8} {c : Char} : assemble₃ w x y = some c → isInvalidContinuationByte y = false := by
+  fun_cases assemble₃ with simp_all
+
+theorem assemble₃_eq_some_iff_utf8EncodeChar_eq {w x y : UInt8} {c : Char} :
+    parseFirstByte w = .twoMore ∧ assemble₃ w x y = some c ↔ String.utf8EncodeChar c = [w, x, y] := by
+  refine ⟨fun ⟨h₁, h₂⟩ => ?_, ?_⟩
+  · have hc := utf8Size_assemble₃ h₂
+    rw [(String.utf8EncodeChar c).eq_getElem_of_length_eq_three (by simp [hc])]
+    simp only [List.cons.injEq, UInt8.eq_iff_toBitVec_eq, and_true]
+    refine ⟨?_, ?_, ?_⟩
+    · rw [String.toBitVec_getElem_utf8EncodeChar_zero_of_utf8Size_eq_three hc,
+        toBitVec_val_assemble₃ _ h₂, BitVec.extractLsb'_setWidth_of_le (by decide),
+        BitVec.append_assoc, BitVec.cast_eq, BitVec.extractLsb'_append_eq_left,
+        ← toBitVec_eq_of_parseFirstByte_eq_twoMore h₁]
+    · rw [String.toBitVec_getElem_utf8EncodeChar_one_of_utf8Size_eq_three hc,
+        toBitVec_val_assemble₃ _ h₂, BitVec.extractLsb'_setWidth_of_le (by decide),
+        BitVec.extractLsb'_append_eq_of_le (by decide), BitVec.extractLsb'_append_eq_right,
+        ← toBitVec_eq_of_isInvalidContinuationByte_eq_false (isInvalidContinuationByte_eq_false_of_assemble₃_eq_some_left h₂)]
+    · rw [String.toBitVec_getElem_utf8EncodeChar_two_of_utf8Size_eq_three hc,
+        toBitVec_val_assemble₃ _ h₂, BitVec.extractLsb'_setWidth_of_le (by decide),
+        BitVec.extractLsb'_append_eq_right,
+        ← toBitVec_eq_of_isInvalidContinuationByte_eq_false (isInvalidContinuationByte_eq_false_of_assemble₃_eq_some_right h₂)]
+  · intro h
+    have h₀ : (String.utf8EncodeChar c).length = 3 := congrArg List.length h
+    have hc : c.utf8Size = 3 := String.length_utf8EncodeChar _ ▸ h₀
+    obtain ⟨rfl, rfl, rfl⟩ : w = (String.utf8EncodeChar c)[0] ∧ x = (String.utf8EncodeChar c)[1] ∧
+      y = (String.utf8EncodeChar c)[2] := by simp [h]
+    refine ⟨parseFirstByte_utf8EncodeChar_eq_twoMore hc, ?_⟩
+    apply assemble₃_eq_some_of_toBitVec
+    · apply isInvalidContinuationByte_getElem_utf8EncodeChar_one_of_utf8Size_eq_three hc
+    · apply isInvalidContinuationByte_getElem_utf8EncodeChar_two_of_utf8Size_eq_three hc
+    · simp [Char.utf8Size_eq_three_iff, UInt32.lt_iff_toNat_lt] at hc
+      exact UInt32.le_iff_toNat_le.2 (by simp; omega)
+    · rw [String.toBitVec_getElem_utf8EncodeChar_zero_of_utf8Size_eq_three hc,
+        String.toBitVec_getElem_utf8EncodeChar_one_of_utf8Size_eq_three hc,
+        String.toBitVec_getElem_utf8EncodeChar_two_of_utf8Size_eq_three hc,
+        BitVec.setWidth_append_eq_right, BitVec.setWidth_append_eq_right, BitVec.setWidth_append_eq_right,
+        BitVec.extractLsb'_append_extractLsb'_eq_extractLsb' (by simp),
+        BitVec.extractLsb'_append_extractLsb'_eq_extractLsb' (by simp),
+        ← BitVec.setWidth_eq_extractLsb' (by simp), BitVec.setWidth_setWidth_eq_self]
+      simpa [BitVec.lt_def, UInt32.le_iff_toNat_le] using Nat.lt_succ_iff.2 (Char.utf8Size_eq_three_iff.1 hc).2
+
+/-! # `assemble₄` -/
+
+@[inline, expose]
+public def assemble₄Unchecked (w x y z : UInt8) : UInt32 :=
+  let b₀ : UInt8 := w &&& 0x07
+  let b₁ := x &&& 0x3f
+  let b₂ := y &&& 0x3f
+  let b₃ := z &&& 0x3f
+  (b₀.toUInt32 <<< 18) ||| (b₁.toUInt32 <<< 12) ||| (b₂.toUInt32 <<< 6) ||| b₃.toUInt32
+
+@[inline, expose]
+public def assemble₄ (w x y z : UInt8) : Option Char :=
+  if isInvalidContinuationByte x || isInvalidContinuationByte y || isInvalidContinuationByte z then
+    none
+  else
+    let r := assemble₄Unchecked w x y z
+    if h₁ : r < 0x10000 then
+      none -- overlong encoding
+    else if h₂ : 0x10ffff < r then
+      none -- out-of-range code point
+    else
+      some ⟨r, ?threemore⟩
+where finally
+  case threemore =>
+    simp only [UInt32.not_lt, UInt32.le_iff_toNat_le, UInt32.reduceToNat] at h₁ h₂
+    exact Or.inr ⟨by omega, by omega⟩
+
+theorem toBitVec_assemble₄Unchecked {w x y z : UInt8} : (assemble₄Unchecked w x y z).toBitVec =
+    (w.toBitVec.setWidth 3 ++ x.toBitVec.setWidth 6 ++ y.toBitVec.setWidth 6 ++ z.toBitVec.setWidth 6).setWidth 32 := by
+  have hw : (w &&& 7).toUInt32.toBitVec = (w.toBitVec.setWidth 3).setWidth 32 := helper₃ 3 (by decide)
+  have hx : (x &&& 63).toUInt32.toBitVec = (x.toBitVec.setWidth 6).setWidth 32 := helper₃ 6 (by decide)
+  have hy : (y &&& 63).toUInt32.toBitVec = (y.toBitVec.setWidth 6).setWidth 32 := helper₃ 6 (by decide)
+  have hz : (z &&& 63).toUInt32.toBitVec = (z.toBitVec.setWidth 6).setWidth 32 := helper₃ 6 (by decide)
+  rw [assemble₄Unchecked, UInt32.toBitVec_or, UInt32.toBitVec_or, UInt32.toBitVec_or, UInt32.toBitVec_shiftLeft,
+    UInt32.toBitVec_shiftLeft, UInt32.toBitVec_shiftLeft, hw, hx, hy, hz, BitVec.setWidth_append_append_append_eq_shiftLeft_setWidth_or]
+  simp
+
+theorem toBitVec_val_assemble₄ {w x y z : UInt8} (c : Char) : assemble₄ w x y z = some c →
+    c.val.toBitVec = (w.toBitVec.setWidth 3 ++ x.toBitVec.setWidth 6 ++ y.toBitVec.setWidth 6 ++ z.toBitVec.setWidth 6).setWidth 32 := by
+  fun_cases assemble₄ with
+  | case1 => simp
+  | case2 => simp
+  | case3 => simp
+  | case4 hx r hr =>
+    simp only [Option.some.injEq, Nat.reduceAdd]
+    rintro rfl
+    subst r
+    simp [toBitVec_assemble₄Unchecked]
+
+theorem le_val_assemble₄ {w x y z : UInt8} {c : Char} : assemble₄ w x y z = some c → 0xffff < c.val := by
+  rcases c with ⟨v, hv⟩
+  fun_cases assemble₄ with
+  | case1 => simp
+  | case2 => simp
+  | case3 => simp
+  | case4 hx r hr =>
+    simp only [Option.some.injEq, Char.mk.injEq]
+    rintro rfl
+    have := UInt32.not_lt.1 hr
+    simp only [UInt32.le_iff_toNat_le, UInt32.reduceToNat] at this
+    simp only [UInt32.lt_iff_toNat_lt, UInt32.reduceToNat, gt_iff_lt]
+    omega
+
+theorem utf8Size_assemble₄ {w x y z : UInt8} {c : Char} (h : assemble₄ w x y z = some c) : c.utf8Size = 4 :=
+  Char.utf8Size_eq_four_iff.2 (le_val_assemble₄ h)
+
+theorem assemble₄_eq_some_of_toBitVec {w x y z : UInt8} (hx : isInvalidContinuationByte x = false)
+    (hy : isInvalidContinuationByte y = false) (hz : isInvalidContinuationByte z = false) {c : Char} (hc₀ : 0x10000 ≤ c.val)
+    (hc : c.val.toBitVec = (w.toBitVec.setWidth 3 ++ x.toBitVec.setWidth 6 ++ y.toBitVec.setWidth 6 ++ z.toBitVec.setWidth 6).setWidth 32) :
+    assemble₄ w x y z = some c := by
+  apply helper₆ hc toBitVec_val_assemble₄
+  fun_cases assemble₄ with
+  | case1 => simp_all
+  | case2 hx r hr =>
+    suffices 0x10000 ≤ (assemble₄Unchecked w x y z).toNat ∧ (assemble₄Unchecked w x y z).toNat < 0x10000 by omega
+    simp_all [← toBitVec_assemble₄Unchecked, r, UInt32.le_iff_toBitVec_le, UInt32.lt_iff_toBitVec_lt, BitVec.le_def, BitVec.lt_def]
+  | case3 hx r hr hr' =>
+    have hcv := c.valid
+    suffices (1114111 < r.toNat ∧ (r.toNat < 55296 ∨ (57343 < r.toNat ∧ r.toNat < 1114112))) by omega
+    simp_all [UInt32.isValidChar, Nat.isValidChar, ← toBitVec_assemble₄Unchecked, r, UInt32.le_iff_toBitVec_le, UInt32.lt_iff_toBitVec_lt,
+      BitVec.le_def, BitVec.lt_def, ← BitVec.toNat_inj]
+  | case4 => simp
+
+theorem isInvalidContinuationByte_eq_false_of_assemble₄_eq_some_left {w x y z : UInt8} {c : Char} : assemble₄ w x y z = some c → isInvalidContinuationByte x = false := by
+  fun_cases assemble₄ with simp_all
+
+theorem isInvalidContinuationByte_eq_false_of_assemble₄_eq_some_middle {w x y z : UInt8} {c : Char} : assemble₄ w x y z = some c → isInvalidContinuationByte y = false := by
+  fun_cases assemble₄ with simp_all
+
+theorem isInvalidContinuationByte_eq_false_of_assemble₄_eq_some_right {w x y z : UInt8} {c : Char} : assemble₄ w x y z = some c → isInvalidContinuationByte z = false := by
+  fun_cases assemble₄ with simp_all
+
+theorem assemble₄_eq_some_iff_utf8EncodeChar_eq {w x y z : UInt8} {c : Char} :
+    parseFirstByte w = .threeMore ∧ assemble₄ w x y z = some c ↔ String.utf8EncodeChar c = [w, x, y, z] := by
+  refine ⟨fun ⟨h₁, h₂⟩ => ?_, ?_⟩
+  · have hc := utf8Size_assemble₄ h₂
+    rw [(String.utf8EncodeChar c).eq_getElem_of_length_eq_four (by simp [hc])]
+    simp only [List.cons.injEq, UInt8.eq_iff_toBitVec_eq, and_true]
+    refine ⟨?_, ?_, ?_, ?_⟩
+    · rw [String.toBitVec_getElem_utf8EncodeChar_zero_of_utf8Size_eq_four hc,
+        toBitVec_val_assemble₄ _ h₂, BitVec.extractLsb'_setWidth_of_le (by decide),
+        BitVec.append_assoc, BitVec.cast_eq, BitVec.append_assoc, BitVec.cast_eq,
+        BitVec.extractLsb'_append_eq_left,
+        ← toBitVec_eq_of_parseFirstByte_eq_threeMore h₁]
+    · rw [String.toBitVec_getElem_utf8EncodeChar_one_of_utf8Size_eq_four hc,
+        toBitVec_val_assemble₄ _ h₂, BitVec.extractLsb'_setWidth_of_le (by decide),
+        BitVec.extractLsb'_append_eq_of_le (by decide), BitVec.extractLsb'_append_eq_of_le (by decide),
+        BitVec.extractLsb'_append_eq_right,
+        ← toBitVec_eq_of_isInvalidContinuationByte_eq_false (isInvalidContinuationByte_eq_false_of_assemble₄_eq_some_left h₂)]
+    · rw [String.toBitVec_getElem_utf8EncodeChar_two_of_utf8Size_eq_four hc,
+        toBitVec_val_assemble₄ _ h₂, BitVec.extractLsb'_setWidth_of_le (by decide),
+        BitVec.extractLsb'_append_eq_of_le (by decide), BitVec.extractLsb'_append_eq_right,
+        ← toBitVec_eq_of_isInvalidContinuationByte_eq_false (isInvalidContinuationByte_eq_false_of_assemble₄_eq_some_middle h₂)]
+    · rw [String.toBitVec_getElem_utf8EncodeChar_three_of_utf8Size_eq_four hc,
+        toBitVec_val_assemble₄ _ h₂, BitVec.extractLsb'_setWidth_of_le (by decide),
+        BitVec.extractLsb'_append_eq_right,
+        ← toBitVec_eq_of_isInvalidContinuationByte_eq_false (isInvalidContinuationByte_eq_false_of_assemble₄_eq_some_right h₂)]
+  · intro h
+    have h₀ : (String.utf8EncodeChar c).length = 4 := congrArg List.length h
+    have hc : c.utf8Size = 4 :=  String.length_utf8EncodeChar _ ▸ h₀
+    obtain ⟨rfl, rfl, rfl, rfl⟩ :
+      w = (String.utf8EncodeChar c)[0] ∧ x = (String.utf8EncodeChar c)[1] ∧ y = (String.utf8EncodeChar c)[2]
+        ∧ z = (String.utf8EncodeChar c)[3] := by simp [h]
+    refine ⟨parseFirstByte_utf8EncodeChar_eq_threeMore hc, ?_⟩
+    apply assemble₄_eq_some_of_toBitVec
+    · apply isInvalidContinuationByte_getElem_utf8EncodeChar_one_of_utf8Size_eq_four hc
+    · apply isInvalidContinuationByte_getElem_utf8EncodeChar_two_of_utf8Size_eq_four hc
+    · apply isInvalidContinuationByte_getElem_utf8EncodeChar_three_of_utf8Size_eq_four hc
+    · simp [Char.utf8Size_eq_four_iff, UInt32.lt_iff_toNat_lt] at hc
+      exact UInt32.le_iff_toNat_le.2 (by simp; omega)
+    · rw [String.toBitVec_getElem_utf8EncodeChar_zero_of_utf8Size_eq_four hc,
+        String.toBitVec_getElem_utf8EncodeChar_one_of_utf8Size_eq_four hc,
+        String.toBitVec_getElem_utf8EncodeChar_two_of_utf8Size_eq_four hc,
+        String.toBitVec_getElem_utf8EncodeChar_three_of_utf8Size_eq_four hc,
+        BitVec.setWidth_append_eq_right, BitVec.setWidth_append_eq_right, BitVec.setWidth_append_eq_right,
+        BitVec.setWidth_append_eq_right,
+        BitVec.extractLsb'_append_extractLsb'_eq_extractLsb' (by simp),
+        BitVec.extractLsb'_append_extractLsb'_eq_extractLsb' (by simp),
+        BitVec.extractLsb'_append_extractLsb'_eq_extractLsb' (by simp),
+        ← BitVec.setWidth_eq_extractLsb' (by simp), BitVec.setWidth_setWidth_eq_self]
+      have := c.toNat_val_le
+      simp only [Nat.reduceAdd, BitVec.lt_def, UInt32.toNat_toBitVec, BitVec.toNat_twoPow,
+        Nat.reducePow, Nat.reduceMod, gt_iff_lt]
+      omega
+
+end ByteArray.utf8DecodeChar?
+
+open ByteArray.utf8DecodeChar?
+
+/- # `utf8DecodeChar?` -/
+
+@[inline, expose]
+public def ByteArray.utf8DecodeChar? (bytes : ByteArray) (i : Nat) : Option Char :=
+  if h₀ : i < bytes.size then
+    match h : parseFirstByte bytes[i] with
+    | .invalid => none -- invalid first byte
+    | .done => assemble₁ bytes[i] h
+    | .oneMore =>
+      if h₁ : i + 1 < bytes.size then
+        assemble₂ bytes[i] bytes[i + 1]
+      else
+        none
+    | .twoMore =>
+      if h₁ : i + 2 < bytes.size then
+        assemble₃ bytes[i] bytes[i + 1] bytes[i + 2]
+      else none
+    | .threeMore =>
+      if h₁ : i + 3 < bytes.size then
+        assemble₄ bytes[i] bytes[i + 1] bytes[i + 2] bytes[i + 3]
+      else none
+  else none
+
+/-! # `utf8DecodeChar?` low-level API -/
+
+theorem parseFirstByte_eq_done_of_utf8DecodeChar?_eq_some {b : ByteArray} {i : Nat} {c : Char}
+    (h : b.utf8DecodeChar? i = some c) (hc : c.utf8Size = 1) (h') :
+    parseFirstByte (b[i]'h') = .done := by
+  revert h
+  fun_cases ByteArray.utf8DecodeChar? with
+  | case1 => simp
+  | case2 => simp_all
+  | case3 => exact (by omega) ∘ utf8Size_assemble₂
+  | case4 => simp
+  | case5 => exact (by omega) ∘ utf8Size_assemble₃
+  | case6 => simp
+  | case7 => exact (by omega) ∘ utf8Size_assemble₄
+  | case8 => simp
+  | case9 => simp
+
+theorem parseFirstByte_eq_oneMore_of_utf8DecodeChar?_eq_some {b : ByteArray} {i : Nat} {c : Char}
+    (h : b.utf8DecodeChar? i = some c) (hc : c.utf8Size = 2) (h') :
+    parseFirstByte (b[i]'h') = .oneMore := by
+  revert h
+  fun_cases ByteArray.utf8DecodeChar? with
+  | case1 => simp
+  | case2 => exact (by omega) ∘ utf8Size_assemble₁
+  | case3 => simp_all
+  | case4 => simp
+  | case5 => exact (by omega) ∘ utf8Size_assemble₃
+  | case6 => simp
+  | case7 => exact (by omega) ∘ utf8Size_assemble₄
+  | case8 => simp
+  | case9 => simp
+
+theorem parseFirstByte_eq_twoMore_of_utf8DecodeChar?_eq_some {b : ByteArray} {i : Nat} {c : Char}
+    (h : b.utf8DecodeChar? i = some c) (hc : c.utf8Size = 3) (h') :
+    parseFirstByte (b[i]'h') = .twoMore := by
+  revert h
+  fun_cases ByteArray.utf8DecodeChar? with
+  | case1 => simp
+  | case2 => exact (by omega) ∘ utf8Size_assemble₁
+  | case3 => exact (by omega) ∘ utf8Size_assemble₂
+  | case4 => simp
+  | case5 => simp_all
+  | case6 => simp
+  | case7 => exact (by omega) ∘ utf8Size_assemble₄
+  | case8 => simp
+  | case9 => simp
+
+theorem parseFirstByte_eq_threeMore_of_utf8DecodeChar?_eq_some {b : ByteArray} {i : Nat} {c : Char}
+    (h : b.utf8DecodeChar? i = some c) (hc : c.utf8Size = 4) (h') :
+    parseFirstByte (b[i]'h') = .threeMore := by
+  revert h
+  fun_cases ByteArray.utf8DecodeChar? with
+  | case1 => simp
+  | case2 => exact (by omega) ∘ utf8Size_assemble₁
+  | case3 => exact (by omega) ∘ utf8Size_assemble₂
+  | case4 => simp
+  | case5 => exact (by omega) ∘ utf8Size_assemble₃
+  | case6 => simp
+  | case7 => simp_all
+  | case8 => simp
+  | case9 => simp
+
+theorem utf8Size_le_of_utf8DecodeChar?_eq_some {b : ByteArray} {c : Char} :
+    b.utf8DecodeChar? 0 = some c → c.utf8Size ≤ b.size := by
+  fun_cases ByteArray.utf8DecodeChar? with
+  | case1 => simp
+  | case2 => exact (by omega) ∘ utf8Size_assemble₁
+  | case3 => exact (by omega) ∘ utf8Size_assemble₂
+  | case4 => simp
+  | case5 => exact (by omega) ∘ utf8Size_assemble₃
+  | case6 => simp
+  | case7 => exact (by omega) ∘ utf8Size_assemble₄
+  | case8 => simp
+  | case9 => simp
+
+theorem utf8DecodeChar?_eq_assemble₁ {b : ByteArray} (hb : 1 ≤ b.size) (h : parseFirstByte b[0] = .done) :
+    b.utf8DecodeChar? 0 = assemble₁ b[0] h := by
+  fun_cases ByteArray.utf8DecodeChar?
+  all_goals try (simp_all; done)
+  all_goals omega
+
+theorem utf8DecodeChar?_eq_assemble₂ {b : ByteArray} (hb : 2 ≤ b.size) (h : parseFirstByte b[0] = .oneMore) :
+    b.utf8DecodeChar? 0 = assemble₂ b[0] b[1] := by
+  fun_cases ByteArray.utf8DecodeChar?
+  all_goals try (simp_all; done)
+  all_goals omega
+
+theorem utf8DecodeChar?_eq_assemble₃ {b : ByteArray} (hb : 3 ≤ b.size) (h : parseFirstByte b[0] = .twoMore) :
+    b.utf8DecodeChar? 0 = assemble₃ b[0] b[1] b[2] := by
+  fun_cases ByteArray.utf8DecodeChar?
+  all_goals try (simp_all; done)
+  all_goals omega
+
+theorem utf8DecodeChar?_eq_assemble₄ {b : ByteArray} (hb : 4 ≤ b.size) (h : parseFirstByte b[0] = .threeMore) :
+    b.utf8DecodeChar? 0 = assemble₄ b[0] b[1] b[2] b[3] := by
+  fun_cases ByteArray.utf8DecodeChar?
+  all_goals try (simp_all; done)
+  all_goals omega
+
+theorem utf8DecodeChar?_append_eq_assemble₁ {l : List UInt8} {b : ByteArray} (hl : l.length = 1) (h : parseFirstByte l[0] = .done) :
+    (l.toByteArray ++ b).utf8DecodeChar? 0 = assemble₁ l[0] h := by
+  have : (l.toByteArray ++ b)[0]'(by simp [hl]; omega) = l[0] := by
+    rw [ByteArray.getElem_append_left (by simp [hl]), List.getElem_toByteArray]
+  rw [utf8DecodeChar?_eq_assemble₁ (by simp [hl])] <;> simp [this, h]
+
+theorem utf8DecodeChar?_append_eq_assemble₂ {l : List UInt8} {b : ByteArray} (hl : l.length = 2) (h : parseFirstByte l[0] = .oneMore) :
+    (l.toByteArray ++ b).utf8DecodeChar? 0 = assemble₂ l[0] l[1] := by
+  rw [utf8DecodeChar?_eq_assemble₂ (by simp [hl])]
+  all_goals repeat rw [ByteArray.getElem_append_left (by simp [hl])]
+  all_goals repeat rw [List.getElem_toByteArray]
+  assumption
+
+theorem utf8DecodeChar?_append_eq_assemble₃ {l : List UInt8} {b : ByteArray} (hl : l.length = 3) (h : parseFirstByte l[0] = .twoMore) :
+    (l.toByteArray ++ b).utf8DecodeChar? 0 = assemble₃ l[0] l[1] l[2] := by
+  rw [utf8DecodeChar?_eq_assemble₃ (by simp [hl])]
+  all_goals repeat rw [ByteArray.getElem_append_left (by simp [hl])]
+  all_goals repeat rw [List.getElem_toByteArray]
+  assumption
+
+theorem utf8DecodeChar?_append_eq_assemble₄ {l : List UInt8} {b : ByteArray} (hl : l.length = 4) (h : parseFirstByte l[0] = .threeMore) :
+    (l.toByteArray ++ b).utf8DecodeChar? 0 = assemble₄ l[0] l[1] l[2] l[3] := by
+  rw [utf8DecodeChar?_eq_assemble₄ (by simp [hl])]
+  all_goals repeat rw [ByteArray.getElem_append_left (by simp [hl])]
+  all_goals repeat rw [List.getElem_toByteArray]
+  assumption
+
+/-!
+# Main theorems
+
+`utf8DecodeChar?_utf8EncodeChar_append` and `toByteArray_of_utf8DecodeChar?_eq_some` are the two main results that together
+imply that UTF-8 encoding and decoding are inverse.
+-/
+
+public theorem ByteArray.utf8DecodeChar?_utf8EncodeChar_append {b : ByteArray} {c : Char} :
+    utf8DecodeChar? ((String.utf8EncodeChar c).toByteArray ++ b) 0 = some c := by
+  match hc : c.utf8Size, c.utf8Size_pos, c.utf8Size_le_four with
+  | 1, _, _ =>
+    have hc' : (String.utf8EncodeChar c).length = 1 := String.length_utf8EncodeChar _ ▸ hc
+    rw [utf8DecodeChar?_append_eq_assemble₁ hc' (parseFirstByte_utf8EncodeChar_eq_done hc)]
+    exact (assemble₁_eq_some_iff_utf8EncodeChar_eq.2 (List.eq_getElem_of_length_eq_one _ hc')).2
+  | 2, _, _ =>
+    have hc' : (String.utf8EncodeChar c).length = 2 := String.length_utf8EncodeChar _ ▸ hc
+    rw [utf8DecodeChar?_append_eq_assemble₂ hc' (parseFirstByte_utf8EncodeChar_eq_oneMore hc)]
+    exact (assemble₂_eq_some_iff_utf8EncodeChar_eq.2 (List.eq_getElem_of_length_eq_two _ hc')).2
+  | 3, _, _ =>
+    have hc' : (String.utf8EncodeChar c).length = 3 := String.length_utf8EncodeChar _ ▸ hc
+    rw [utf8DecodeChar?_append_eq_assemble₃ hc' (parseFirstByte_utf8EncodeChar_eq_twoMore hc)]
+    exact (assemble₃_eq_some_iff_utf8EncodeChar_eq.2 (List.eq_getElem_of_length_eq_three _ hc')).2
+  | 4, _, _ =>
+    have hc' : (String.utf8EncodeChar c).length = 4 := String.length_utf8EncodeChar _ ▸ hc
+    rw [utf8DecodeChar?_append_eq_assemble₄ hc' (parseFirstByte_utf8EncodeChar_eq_threeMore hc)]
+    exact (assemble₄_eq_some_iff_utf8EncodeChar_eq.2 (List.eq_getElem_of_length_eq_four _ hc')).2
+
+public theorem String.toByteArray_utf8EncodeChar_of_utf8DecodeChar?_eq_some {b : ByteArray} {c : Char} (h : b.utf8DecodeChar? 0 = some c) :
+    (String.utf8EncodeChar c).toByteArray = b.extract 0 c.utf8Size := by
+  have := utf8Size_le_of_utf8DecodeChar?_eq_some h
+  match hc : c.utf8Size, c.utf8Size_pos, c.utf8Size_le_four with
+  | 1, _, _ =>
+    have := parseFirstByte_eq_done_of_utf8DecodeChar?_eq_some h hc (by omega)
+    rw [utf8DecodeChar?_eq_assemble₁ (by omega) this] at h
+    rw [ByteArray.extract_add_one (by omega)]
+    congr
+    rw [← assemble₁_eq_some_iff_utf8EncodeChar_eq]
+    exact ⟨this, h⟩
+  | 2, _, _ =>
+    have := parseFirstByte_eq_oneMore_of_utf8DecodeChar?_eq_some h hc (by omega)
+    rw [utf8DecodeChar?_eq_assemble₂ (by omega) this] at h
+    rw [ByteArray.extract_add_two (by omega)]
+    congr
+    rw [← assemble₂_eq_some_iff_utf8EncodeChar_eq]
+    exact ⟨this, h⟩
+  | 3, _, _ =>
+    have := parseFirstByte_eq_twoMore_of_utf8DecodeChar?_eq_some h hc (by omega)
+    rw [utf8DecodeChar?_eq_assemble₃ (by omega) this] at h
+    rw [ByteArray.extract_add_three (by omega)]
+    congr
+    rw [← assemble₃_eq_some_iff_utf8EncodeChar_eq]
+    exact ⟨this, h⟩
+  | 4, _, _ =>
+    have := parseFirstByte_eq_threeMore_of_utf8DecodeChar?_eq_some h hc (by omega)
+    rw [utf8DecodeChar?_eq_assemble₄ (by omega) this] at h
+    rw [ByteArray.extract_add_four (by omega)]
+    congr
+    rw [← assemble₄_eq_some_iff_utf8EncodeChar_eq]
+    exact ⟨this, h⟩
+
+/-! # Corollaries -/
+
+public theorem ByteArray.eq_of_utf8DecodeChar?_eq_some {b : ByteArray} {c : Char} (h : utf8DecodeChar? b 0 = some c) :
+    b = (String.utf8EncodeChar c).toByteArray ++ b.extract c.utf8Size b.size := by
+  rw [String.toByteArray_utf8EncodeChar_of_utf8DecodeChar?_eq_some h,
+    ByteArray.extract_append_extract, Nat.zero_min, Nat.max_eq_right (utf8Size_le_of_utf8DecodeChar?_eq_some h),
+    ByteArray.extract_zero_size]
+
+public theorem ByteArray.exists_of_utf8DecodeChar?_eq_some {b : ByteArray} {c : Char} (h : utf8DecodeChar? b 0 = some c) :
+    ∃ l, b = (String.utf8EncodeChar c).toByteArray ++ l :=
+  ⟨b.extract c.utf8Size b.size, eq_of_utf8DecodeChar?_eq_some h⟩
+
+public theorem ByteArray.utf8DecodeChar?_eq_utf8DecodeChar?_extract {b : ByteArray} {i : Nat} :
+    utf8DecodeChar? b i = utf8DecodeChar? (b.extract i b.size) 0 := by
+  simp [utf8DecodeChar?]
+  have h₁ : i < b.size ↔ 0 < b.size - i := by omega
+  have h₂ : i + 1 < b.size ↔ 1 < b.size - i := by omega
+  have h₃ : i + 2 < b.size ↔ 2 < b.size - i := by omega
+  have h₄ : i + 3 < b.size ↔ 3 < b.size - i := by omega
+  have h₅ : ∀ h, b[i]'h = (b.extract i b.size)[0]'(by simp; omega) := by simp [ByteArray.getElem_extract]
+  have h₆ : ∀ h, b[i + 1]'h = (b.extract i b.size)[1]'(by simp; omega) := by simp [ByteArray.getElem_extract]
+  have h₇ : ∀ h, b[i + 2]'h = (b.extract i b.size)[2]'(by simp; omega) := by simp [ByteArray.getElem_extract]
+  have h₈ : ∀ h, b[i + 3]'h = (b.extract i b.size)[3]'(by simp; omega) := by simp [ByteArray.getElem_extract]
+  have h₉ : (b.extract i b.size).size = b.size - i := by simp
+  simp only [h₁]
+  split
+  · split
+    all_goals (rename_i h h'; simp only [h₅] at h')
+    · split <;> simp_all
+    · split <;> rename_i h''
+      all_goals try (simp [h'] at h''; done)
+      simp [h₅]
+    · symm
+      split <;> rename_i h''
+      all_goals try (simp [h'] at h''; done)
+      simp [h₂, h₅, h₆]
+    · symm
+      split <;> rename_i h''
+      all_goals try (simp [h'] at h''; done)
+      simp [h₃, h₅, h₆, h₇]
+    · symm
+      split <;> rename_i h''
+      all_goals try (simp [h'] at h''; done)
+      simp [h₄, h₅, h₆, h₇, h₈]
+  · rfl
+
+public theorem ByteArray.le_size_of_utf8DecodeChar?_eq_some {b : ByteArray} {i : Nat} {c : Char}
+    (h : utf8DecodeChar? b i = some c) : i + c.utf8Size ≤ b.size := by
+  rw [utf8DecodeChar?_eq_utf8DecodeChar?_extract] at h
+  obtain ⟨l, hl⟩ := exists_of_utf8DecodeChar?_eq_some h
+  replace hl := congrArg ByteArray.size hl
+  simp at hl
+  have hi : i < b.size := by
+    simp [utf8DecodeChar?] at h
+    obtain ⟨h, -⟩ := h
+    omega
+  omega
+
+public theorem ByteArray.lt_size_of_isSome_utf8DecodeChar? {b : ByteArray} {i : Nat} (h : (utf8DecodeChar? b i).isSome) :
+    i < b.size := by
+  obtain ⟨c, hc⟩ := Option.isSome_iff_exists.1 h
+  have := le_size_of_utf8DecodeChar?_eq_some hc
+  have := c.utf8Size_pos
+  omega
+
+public theorem ByteArray.utf8DecodeChar?_append_eq_some {b : ByteArray} {i : Nat} {c : Char} (h : utf8DecodeChar? b i = some c)
+    (b' : ByteArray) : utf8DecodeChar? (b ++ b') i = some c := by
+  have := le_size_of_utf8DecodeChar?_eq_some h
+  rw [utf8DecodeChar?_eq_utf8DecodeChar?_extract] at ⊢ h
+  rw [ByteArray.extract_eq_extract_append_extract b.size (by omega) (by simp), ByteArray.extract_append_size_left,
+    eq_of_utf8DecodeChar?_eq_some h, ByteArray.append_assoc, utf8DecodeChar?_utf8EncodeChar_append]
+
+public theorem ByteArray.isSome_utf8DecodeChar?_append {b : ByteArray} {i : Nat} (h : (utf8DecodeChar? b i).isSome)
+    (b' : ByteArray) : (utf8DecodeChar? (b ++ b') i).isSome := by
+  obtain ⟨c, hc⟩ := Option.isSome_iff_exists.1 h
+  rw [utf8DecodeChar?_append_eq_some hc, Option.isSome_some]
+
+public def ByteArray.utf8DecodeChar (bytes : ByteArray) (i : Nat) (h : (utf8DecodeChar? bytes i).isSome) : Char :=
+  (utf8DecodeChar? bytes i).get h
+
+public theorem ByteArray.utf8DecodeChar_eq_utf8DecodeChar_extract {b : ByteArray} {i : Nat} {h} :
+    utf8DecodeChar b i h =
+      utf8DecodeChar (b.extract i b.size) 0 (by rwa [utf8DecodeChar?_eq_utf8DecodeChar?_extract] at h) := by
+  simp [utf8DecodeChar, ← utf8DecodeChar?_eq_utf8DecodeChar?_extract]
+
+theorem ByteArray.utf8DecodeChar_extract_congr_of_le {bytes : ByteArray} (i j j' : Nat) {h h'} (hj : j ≤ j') :
+    utf8DecodeChar (bytes.extract i j) 0 h = utf8DecodeChar (bytes.extract i j') 0 h' := by
+  obtain ⟨c, hc⟩ := Option.isSome_iff_exists.1 h
+  obtain ⟨c', hc'⟩ := Option.isSome_iff_exists.1 h'
+  have hij : i ≤ j := by
+    have := lt_size_of_isSome_utf8DecodeChar? h
+    simp only [ByteArray.size_extract] at this
+    omega
+  have h₀ := eq_of_utf8DecodeChar?_eq_some hc'
+  simp only [utf8DecodeChar, hc, Option.get_some, hc', ← Option.some_inj (a := c)]
+  have := utf8DecodeChar?_append_eq_some hc (bytes.extract j j')
+  rw [← ByteArray.extract_eq_extract_append_extract j hij hj] at this
+  rw [← this, hc']
+
+public theorem ByteArray.utf8DecodeChar_extract_congr {bytes : ByteArray} (i j j' : Nat) {h h'} :
+    utf8DecodeChar (bytes.extract i j) 0 h = utf8DecodeChar (bytes.extract i j') 0 h' := by
+  obtain (hj|hj) := Nat.le_or_le j j'
+  · exact utf8DecodeChar_extract_congr_of_le _ _ _ hj
+  · exact (utf8DecodeChar_extract_congr_of_le _ _ _ hj).symm
+
+public theorem ByteArray.utf8EncodeChar_utf8DecodeChar {b : ByteArray} {i : Nat} {h} :
+    (String.utf8EncodeChar (utf8DecodeChar b i h)).toByteArray = b.extract i (i + (utf8DecodeChar b i h).utf8Size) := by
+  rw [utf8DecodeChar?_eq_utf8DecodeChar?_extract] at h
+  obtain ⟨c, hc⟩ := Option.isSome_iff_exists.1 h
+  rw [utf8DecodeChar_eq_utf8DecodeChar_extract, utf8DecodeChar,
+    String.toByteArray_utf8EncodeChar_of_utf8DecodeChar?_eq_some (b := b.extract i b.size) (by simp),
+    ByteArray.extract_extract, Nat.add_zero, Nat.min_eq_left]
+  exact le_size_of_utf8DecodeChar?_eq_some (by simp [← utf8DecodeChar?_eq_utf8DecodeChar?_extract])
+
+@[simp]
+public theorem List.utf8DecodeChar?_utf8Encode_singleton_append {b : ByteArray} {c : Char} :
+    ByteArray.utf8DecodeChar? ([c].utf8Encode ++ b) 0 = some c := by
+  rw [List.utf8Encode, List.flatMap_cons, List.toByteArray_append,
+    List.flatMap_nil, List.toByteArray_nil, ByteArray.append_empty,
+    ByteArray.utf8DecodeChar?_utf8EncodeChar_append]
+
+@[simp]
+public theorem List.utf8DecodeChar?_utf8Encode_singleton {c : Char} :
+    [c].utf8Encode.utf8DecodeChar? 0 = some c := by
+  simpa using utf8DecodeChar?_utf8Encode_singleton_append (b := ByteArray.empty)
+
+@[simp]
+public theorem List.utf8DecodeChar?_utf8Encode_cons {l : List Char} {c : Char} :
+    ByteArray.utf8DecodeChar? (c::l).utf8Encode 0 = some c := by
+  rw [List.utf8Encode, List.flatMap_cons, List.toByteArray_append,
+    ByteArray.utf8DecodeChar?_utf8EncodeChar_append]
+
+@[simp]
+public theorem List.utf8DecodeChar_utf8Encode_cons {l : List Char} {c : Char} {h} :
+    ByteArray.utf8DecodeChar (c::l).utf8Encode 0 h = c := by
+  simp [ByteArray.utf8DecodeChar]
diff --git a/src/Init/Data/String/Extra.lean b/src/Init/Data/String/Extra.lean
index 15c06af6c0..b96b36fb23 100644
--- a/src/Init/Data/String/Extra.lean
+++ b/src/Init/Data/String/Extra.lean
@@ -133,92 +133,15 @@ the corresponding string, or panics if the array is not a valid UTF-8 encoding o
 @[inline] def fromUTF8! (a : ByteArray) : String :=
   if h : validateUTF8 a then fromUTF8 a h else panic! "invalid UTF-8 string"
 
-/--
-Returns the sequence of bytes in a character's UTF-8 encoding.
--/
-def utf8EncodeCharFast (c : Char) : List UInt8 :=
-  let v := c.val
-  if v ≤ 0x7f then
-    [v.toUInt8]
-  else if v ≤ 0x7ff then
-    [(v >>>  6).toUInt8 &&& 0x1f ||| 0xc0,
-              v.toUInt8 &&& 0x3f ||| 0x80]
-  else if v ≤ 0xffff then
-    [(v >>> 12).toUInt8 &&& 0x0f ||| 0xe0,
-     (v >>>  6).toUInt8 &&& 0x3f ||| 0x80,
-              v.toUInt8 &&& 0x3f ||| 0x80]
-  else
-    [(v >>> 18).toUInt8 &&& 0x07 ||| 0xf0,
-     (v >>> 12).toUInt8 &&& 0x3f ||| 0x80,
-     (v >>>  6).toUInt8 &&& 0x3f ||| 0x80,
-              v.toUInt8 &&& 0x3f ||| 0x80]
-
-private theorem Nat.add_two_pow_eq_or_of_lt {b : Nat} (i : Nat) (b_lt : b < 2 ^ i) (a : Nat) :
-    b + 2 ^ i * a = b ||| 2 ^ i * a := by
-  rw [Nat.add_comm, Nat.or_comm, Nat.two_pow_add_eq_or_of_lt b_lt]
-
-@[csimp]
-theorem utf8EncodeChar_eq_utf8EncodeCharFast : @utf8EncodeChar = @utf8EncodeCharFast := by
-  funext c
-  simp only [utf8EncodeChar, utf8EncodeCharFast, UInt8.ofNat_uInt32ToNat, UInt8.ofNat_add,
-    UInt8.reduceOfNat, UInt32.le_iff_toNat_le, UInt32.reduceToNat]
-  split
-  · rfl
-  · split
-    · simp only [List.cons.injEq, ← UInt8.toNat_inj, UInt8.toNat_add, UInt8.toNat_ofNat',
-        Nat.reducePow, UInt8.reduceToNat, Nat.mod_add_mod, UInt8.toNat_or, UInt8.toNat_and,
-        UInt32.toNat_toUInt8, UInt32.toNat_shiftRight, UInt32.reduceToNat, Nat.reduceMod, and_true]
-      refine ⟨?_, ?_⟩
-      · rw [Nat.mod_eq_of_lt (by omega), Nat.add_two_pow_eq_or_of_lt 5 (by omega) 6,
-          Nat.and_two_pow_sub_one_eq_mod _ 5, Nat.shiftRight_eq_div_pow,
-          Nat.mod_eq_of_lt (b := 256) (by omega)]
-      · rw [Nat.mod_eq_of_lt (by omega), Nat.add_two_pow_eq_or_of_lt 6 (by omega) 2,
-          Nat.and_two_pow_sub_one_eq_mod _ 6, Nat.mod_mod_of_dvd _ (by decide)]
-    · split
-      · simp only [List.cons.injEq, ← UInt8.toNat_inj, UInt8.toNat_add, UInt8.toNat_ofNat',
-          Nat.reducePow, UInt8.reduceToNat, Nat.mod_add_mod, UInt8.toNat_or, UInt8.toNat_and,
-          UInt32.toNat_toUInt8, UInt32.toNat_shiftRight, UInt32.reduceToNat, Nat.reduceMod, and_true]
-        refine ⟨?_, ?_, ?_⟩
-        · rw [Nat.mod_eq_of_lt (by omega), Nat.add_two_pow_eq_or_of_lt 4 (by omega) 14,
-            Nat.and_two_pow_sub_one_eq_mod _ 4, Nat.shiftRight_eq_div_pow,
-            Nat.mod_eq_of_lt (b := 256) (by omega)]
-        · rw [Nat.mod_eq_of_lt (by omega), Nat.add_two_pow_eq_or_of_lt 6 (by omega) 2,
-            Nat.and_two_pow_sub_one_eq_mod _ 6, Nat.shiftRight_eq_div_pow,
-            Nat.mod_mod_of_dvd (c.val.toNat / 2 ^ 6) (by decide)]
-        · rw [Nat.mod_eq_of_lt (by omega), Nat.add_two_pow_eq_or_of_lt 6 (by omega) 2,
-            Nat.and_two_pow_sub_one_eq_mod _ 6, Nat.mod_mod_of_dvd c.val.toNat (by decide)]
-      · simp only [List.cons.injEq, ← UInt8.toNat_inj, UInt8.toNat_add, UInt8.toNat_ofNat',
-          Nat.reducePow, UInt8.reduceToNat, Nat.mod_add_mod, UInt8.toNat_or, UInt8.toNat_and,
-          UInt32.toNat_toUInt8, UInt32.toNat_shiftRight, UInt32.reduceToNat, Nat.reduceMod, and_true]
-        refine ⟨?_, ?_, ?_, ?_⟩
-        · rw [Nat.mod_eq_of_lt (by omega), Nat.add_two_pow_eq_or_of_lt 3 (by omega) 30,
-            Nat.and_two_pow_sub_one_eq_mod _ 3, Nat.shiftRight_eq_div_pow,
-            Nat.mod_mod_of_dvd _ (by decide)]
-        · rw [Nat.mod_eq_of_lt (by omega), Nat.add_two_pow_eq_or_of_lt 6 (by omega) 2,
-            Nat.and_two_pow_sub_one_eq_mod _ 6, Nat.shiftRight_eq_div_pow,
-            Nat.mod_mod_of_dvd (c.val.toNat / 2 ^ 12) (by decide)]
-        · rw [Nat.mod_eq_of_lt (by omega), Nat.add_two_pow_eq_or_of_lt 6 (by omega) 2,
-            Nat.and_two_pow_sub_one_eq_mod _ 6, Nat.shiftRight_eq_div_pow,
-            Nat.mod_mod_of_dvd (c.val.toNat / 2 ^ 6) (by decide)]
-        · rw [Nat.mod_eq_of_lt (by omega), Nat.add_two_pow_eq_or_of_lt 6 (by omega) 2,
-            Nat.and_two_pow_sub_one_eq_mod _ 6, Nat.mod_mod_of_dvd c.val.toNat (by decide)]
-
-@[simp] theorem length_utf8EncodeChar (c : Char) : (utf8EncodeChar c).length = c.utf8Size := by
-  simp [Char.utf8Size, utf8EncodeChar_eq_utf8EncodeCharFast, utf8EncodeCharFast]
-  cases Decidable.em (c.val ≤ 0x7f) <;> simp [*]
-  cases Decidable.em (c.val ≤ 0x7ff) <;> simp [*]
-  cases Decidable.em (c.val ≤ 0xffff) <;> simp [*]
-
 /--
 Encodes a string in UTF-8 as an array of bytes.
 -/
 @[extern "lean_string_to_utf8"]
 def toUTF8 (a : @& String) : ByteArray :=
-  ⟨⟨a.data.flatMap utf8EncodeChar⟩⟩
+  a.bytes
 
 @[simp] theorem size_toUTF8 (s : String) : s.toUTF8.size = s.utf8ByteSize := by
-  simp [toUTF8, ByteArray.size, Array.size, utf8ByteSize, List.flatMap]
-  induction s.data <;> simp [List.map, utf8ByteSize.go, Nat.add_comm, *]
+  rfl
 
 /--
 Accesses the indicated byte in the UTF-8 encoding of a string.
diff --git a/src/Init/Data/UInt/Bitwise.lean b/src/Init/Data/UInt/Bitwise.lean
index a1116fe27d..31853efe5f 100644
--- a/src/Init/Data/UInt/Bitwise.lean
+++ b/src/Init/Data/UInt/Bitwise.lean
@@ -1327,3 +1327,14 @@ theorem UInt64.right_le_or {a b : UInt64} : b ≤ a ||| b := by
   simpa [UInt64.le_iff_toNat_le] using Nat.right_le_or
 theorem USize.right_le_or {a b : USize} : b ≤ a ||| b := by
   simpa [USize.le_iff_toNat_le] using Nat.right_le_or
+
+theorem UInt8.and_lt_add_one {b c : UInt8} (h : c ≠ -1) : b &&& c < c + 1 :=
+  UInt8.lt_of_le_of_lt UInt8.and_le_right (UInt8.lt_add_one h)
+theorem UInt16.and_lt_add_one {b c : UInt16} (h : c ≠ -1) : b &&& c < c + 1 :=
+  UInt16.lt_of_le_of_lt UInt16.and_le_right (UInt16.lt_add_one h)
+theorem UInt32.and_lt_add_one {b c : UInt32} (h : c ≠ -1) : b &&& c < c + 1 :=
+  UInt32.lt_of_le_of_lt UInt32.and_le_right (UInt32.lt_add_one h)
+theorem UInt64.and_lt_add_one {b c : UInt64} (h : c ≠ -1) : b &&& c < c + 1 :=
+  UInt64.lt_of_le_of_lt UInt64.and_le_right (UInt64.lt_add_one h)
+theorem USize.and_lt_add_one {b c : USize} (h : c ≠ -1) : b &&& c < c + 1 :=
+  USize.lt_of_le_of_lt USize.and_le_right (USize.lt_add_one h)
diff --git a/src/Init/Data/UInt/Lemmas.lean b/src/Init/Data/UInt/Lemmas.lean
index 7f58667f95..da178dc85d 100644
--- a/src/Init/Data/UInt/Lemmas.lean
+++ b/src/Init/Data/UInt/Lemmas.lean
@@ -3163,3 +3163,15 @@ protected theorem USize.sub_lt {a b : USize} (hb : 0 < b) (hab : b ≤ a) : a -
   rw [lt_iff_toNat_lt, USize.toNat_sub_of_le _ _ hab]
   refine Nat.sub_lt ?_ (USize.lt_iff_toNat_lt.1 hb)
   exact USize.lt_iff_toNat_lt.1 (USize.lt_of_lt_of_le hb hab)
+
+theorem UInt8.lt_add_one {c : UInt8} (h : c ≠ -1) : c < c + 1 :=
+  UInt8.lt_iff_toBitVec_lt.2 (BitVec.lt_add_one (by simpa [← UInt8.toBitVec_inj] using h))
+theorem UInt16.lt_add_one {c : UInt16} (h : c ≠ -1) : c < c + 1 :=
+  UInt16.lt_iff_toBitVec_lt.2 (BitVec.lt_add_one (by simpa [← UInt16.toBitVec_inj] using h))
+theorem UInt32.lt_add_one {c : UInt32} (h : c ≠ -1) : c < c + 1 :=
+  UInt32.lt_iff_toBitVec_lt.2 (BitVec.lt_add_one (by simpa [← UInt32.toBitVec_inj] using h))
+theorem UInt64.lt_add_one {c : UInt64} (h : c ≠ -1) : c < c + 1 :=
+  UInt64.lt_iff_toBitVec_lt.2 (BitVec.lt_add_one (by simpa [← UInt64.toBitVec_inj] using h))
+theorem USize.lt_add_one {c : USize} (h : c ≠ -1) : c < c + 1 :=
+  USize.lt_iff_toBitVec_lt.2 (BitVec.lt_add_one
+    (by simpa [← USize.toBitVec_inj, BitVec.neg_one_eq_allOnes] using h))
diff --git a/src/Init/Prelude.lean b/src/Init/Prelude.lean
index c0f4cf7ab3..f3e9bda653 100644
--- a/src/Init/Prelude.lean
+++ b/src/Init/Prelude.lean
@@ -3010,218 +3010,56 @@ def List.concat {α : Type u} : List α → α → List α
   | cons a as, b => cons a (concat as b)
 
 /--
-Returns the sequence of bytes in a character's UTF-8 encoding.
+Appends two lists. Normally used via the `++` operator.
+
+Appending lists takes time proportional to the length of the first list: `O(|xs|)`.
+
+Examples:
+  * `[1, 2, 3] ++ [4, 5] = [1, 2, 3, 4, 5]`.
+  * `[] ++ [4, 5] = [4, 5]`.
+  * `[1, 2, 3] ++ [] = [1, 2, 3]`.
 -/
-def String.utf8EncodeChar (c : Char) : List UInt8 :=
-  let v := c.val.toNat
-  ite (LE.le v 0x7f)
-    (List.cons (UInt8.ofNat v) List.nil)
-    (ite (LE.le v 0x7ff)
-      (List.cons
-        (UInt8.ofNat (HAdd.hAdd (HMod.hMod (HDiv.hDiv v 64) 0x20) 0xc0))
-        (List.cons
-          (UInt8.ofNat (HAdd.hAdd (HMod.hMod v 0x40) 0x80))
-          List.nil))
-      (ite (LE.le v 0xffff)
-        (List.cons
-          (UInt8.ofNat (HAdd.hAdd (HMod.hMod (HDiv.hDiv v 4096) 0x10) 0xe0))
-          (List.cons
-            (UInt8.ofNat (HAdd.hAdd (HMod.hMod (HDiv.hDiv v 64) 0x40) 0x80))
-            (List.cons
-              (UInt8.ofNat (HAdd.hAdd (HMod.hMod v 0x40) 0x80))
-              List.nil)))
-        (List.cons
-          (UInt8.ofNat (HAdd.hAdd (HMod.hMod (HDiv.hDiv v 262144) 0x08) 0xf0))
-          (List.cons
-            (UInt8.ofNat (HAdd.hAdd (HMod.hMod (HDiv.hDiv v 4096) 0x40) 0x80))
-            (List.cons
-              (UInt8.ofNat (HAdd.hAdd (HMod.hMod (HDiv.hDiv v 64) 0x40) 0x80))
-              (List.cons
-                (UInt8.ofNat (HAdd.hAdd (HMod.hMod v 0x40) 0x80))
-                List.nil))))))
+protected def List.append : (xs ys : List α) → List α
+  | nil,       bs => bs
+  | cons a as, bs => cons a (List.append as bs)
 
 /--
-A string is a sequence of Unicode code points.
+Concatenates a list of lists into a single list, preserving the order of the elements.
 
-At runtime, strings are represented by [dynamic arrays](https://en.wikipedia.org/wiki/Dynamic_array)
-of bytes using the UTF-8 encoding. Both the size in bytes (`String.utf8ByteSize`) and in characters
-(`String.length`) are cached and take constant time. Many operations on strings perform in-place
-modifications when the reference to the string is unique.
+`O(|flatten L|)`.
+
+Examples:
+* `[["a"], ["b", "c"]].flatten = ["a", "b", "c"]`
+* `[["a"], [], ["b", "c"], ["d", "e", "f"]].flatten = ["a", "b", "c", "d", "e", "f"]`
 -/
-structure String where
-  /-- Pack a `List Char` into a `String`. This function is overridden by the
-  compiler and is O(n) in the length of the list. -/
-  mk ::
-  /-- Unpack `String` into a `List Char`. This function is overridden by the
-  compiler and is O(n) in the length of the list. -/
-  data : List Char
-
-attribute [extern "lean_string_mk"] String.mk
-attribute [extern "lean_string_data"] String.data
+def List.flatten : List (List α) → List α
+  | nil      => nil
+  | cons l L => List.append l (flatten L)
 
 /--
-Decides whether two strings are equal. Normally used via the `DecidableEq String` instance and the
-`=` operator.
+Applies a function to each element of the list, returning the resulting list of values.
 
-At runtime, this function is overridden with an efficient native implementation.
+`O(|l|)`.
+
+Examples:
+* `[a, b, c].map f = [f a, f b, f c]`
+* `[].map Nat.succ = []`
+* `["one", "two", "three"].map (·.length) = [3, 3, 5]`
+* `["one", "two", "three"].map (·.reverse) = ["eno", "owt", "eerht"]`
 -/
-@[extern "lean_string_dec_eq"]
-def String.decEq (s₁ s₂ : @& String) : Decidable (Eq s₁ s₂) :=
-  match s₁, s₂ with
-  | ⟨s₁⟩, ⟨s₂⟩ =>
-    dite (Eq s₁ s₂) (fun h => isTrue (congrArg _ h)) (fun h => isFalse (fun h' => String.noConfusion h' (fun h' => absurd h' h)))
-
-instance : DecidableEq String := String.decEq
+@[specialize] def List.map (f : α → β) : (l : List α) → List β
+  | nil       => nil
+  | cons a as => cons (f a) (map f as)
 
 /--
-A byte position in a `String`, according to its UTF-8 encoding.
+Applies a function that returns a list to each element of a list, and concatenates the resulting
+lists.
 
-Character positions (counting the Unicode code points rather than bytes) are represented by plain
-`Nat`s. Indexing a `String` by a `String.Pos` takes constant time, while character positions need to
-be translated internally to byte positions, which takes linear time.
-
-A byte position `p` is *valid* for a string `s` if `0 ≤ p ≤ s.endPos` and `p` lies on a UTF-8
-character boundary.
+Examples:
+* `[2, 3, 2].flatMap List.range = [0, 1, 0, 1, 2, 0, 1]`
+* `["red", "blue"].flatMap String.toList = ['r', 'e', 'd', 'b', 'l', 'u', 'e']`
 -/
-structure String.Pos where
-  /-- Get the underlying byte index of a `String.Pos` -/
-  byteIdx : Nat := 0
-
-instance : Inhabited String.Pos where
-  default := {}
-
-instance : DecidableEq String.Pos :=
-  fun ⟨a⟩ ⟨b⟩ => match decEq a b with
-    | isTrue h => isTrue (h ▸ rfl)
-    | isFalse h => isFalse (fun he => String.Pos.noConfusion he fun he => absurd he h)
-
-/--
-A region or slice of some underlying string.
-
-A substring contains an string together with the start and end byte positions of a region of
-interest. Actually extracting a substring requires copying and memory allocation, while many
-substrings of the same underlying string may exist with very little overhead, and they are more
-convenient than tracking the bounds by hand.
-
-Using its constructor explicitly, it is possible to construct a `Substring` in which one or both of
-the positions is invalid for the string. Many operations will return unexpected or confusing results
-if the start and stop positions are not valid. Instead, it's better to use API functions that ensure
-the validity of the positions in a substring to create and manipulate them.
--/
-structure Substring where
-  /-- The underlying string. -/
-  str      : String
-  /-- The byte position of the start of the string slice. -/
-  startPos : String.Pos
-  /-- The byte position of the end of the string slice. -/
-  stopPos  : String.Pos
-
-instance : Inhabited Substring where
-  default := ⟨"", {}, {}⟩
-
-/--
-The number of bytes used by the string's UTF-8 encoding.
--/
-@[inline, expose] def Substring.bsize : Substring → Nat
-  | ⟨_, b, e⟩ => e.byteIdx.sub b.byteIdx
-
-/--
-The number of bytes used by the string's UTF-8 encoding.
-
-At runtime, this function takes constant time because the byte length of strings is cached.
--/
-@[extern "lean_string_utf8_byte_size"]
-def String.utf8ByteSize : (@& String) → Nat
-  | ⟨s⟩ => go s
-where
-  go : List Char → Nat
-   | .nil       => 0
-   | .cons c cs => hAdd (go cs) c.utf8Size
-
-/--
-A UTF-8 byte position that points at the end of a string, just after the last character.
-
-* `"abc".endPos = ⟨3⟩`
-* `"L∃∀N".endPos = ⟨8⟩`
--/
-@[inline] def String.endPos (s : String) : String.Pos where
-  byteIdx := utf8ByteSize s
-
-/--
-Converts a `String` into a `Substring` that denotes the entire string.
--/
-@[inline] def String.toSubstring (s : String) : Substring where
-  str      := s
-  startPos := {}
-  stopPos  := s.endPos
-
-/--
-Converts a `String` into a `Substring` that denotes the entire string.
-
-This is a version of `String.toSubstring` that doesn't have an `@[inline]` annotation.
--/
-def String.toSubstring' (s : String) : Substring :=
-  s.toSubstring
-
-/--
-This function will cast a value of type `α` to type `β`, and is a no-op in the
-compiler. This function is **extremely dangerous** because there is no guarantee
-that types `α` and `β` have the same data representation, and this can lead to
-memory unsafety. It is also logically unsound, since you could just cast
-`True` to `False`. For all those reasons this function is marked as `unsafe`.
-
-It is implemented by lifting both `α` and `β` into a common universe, and then
-using `cast (lcProof : ULift (PLift α) = ULift (PLift β))` to actually perform
-the cast. All these operations are no-ops in the compiler.
-
-Using this function correctly requires some knowledge of the data representation
-of the source and target types. Some general classes of casts which are safe in
-the current runtime:
-
-* `Array α` to `Array β` where `α` and `β` have compatible representations,
-  or more generally for other inductive types.
-* `Quot α r` and `α`.
-* `@Subtype α p` and `α`, or generally any structure containing only one
-  non-`Prop` field of type `α`.
-* Casting `α` to/from `NonScalar` when `α` is a boxed generic type
-  (i.e. a function that accepts an arbitrary type `α` and is not specialized to
-  a scalar type like `UInt8`).
--/
-unsafe def unsafeCast {α : Sort u} {β : Sort v} (a : α) : β :=
-  PLift.down (ULift.down.{max u v} (cast lcProof (ULift.up.{max u v} (PLift.up a))))
-
-
-/-- Auxiliary definition for `panic`. -/
-/-
-This is a workaround for `panic` occurring in monadic code. See issue #695.
-The `panicCore` definition cannot be specialized since it is an extern.
-When `panic` occurs in monadic code, the `Inhabited α` parameter depends on a
-`[inst : Monad m]` instance. The `inst` parameter will not be eliminated during
-specialization if it occurs inside of a binder (to avoid work duplication), and
-will prevent the actual monad from being "copied" to the code being specialized.
-When we reimplement the specializer, we may consider copying `inst` if it also
-occurs outside binders or if it is an instance.
--/
-@[never_extract, extern "lean_panic_fn"]
-def panicCore {α : Sort u} [Inhabited α] (msg : String) : α := default
-
-/--
-`(panic "msg" : α)` has a built-in implementation which prints `msg` to
-the error buffer. It *does not* terminate execution, and because it is a safe
-function, it still has to return an element of `α`, so it takes `[Inhabited α]`
-and returns `default`. It is primarily intended for debugging in pure contexts,
-and assertion failures.
-
-Because this is a pure function with side effects, it is marked as
-`@[never_extract]` so that the compiler will not perform common sub-expression
-elimination and other optimizations that assume that the expression is pure.
--/
-@[noinline, never_extract]
-def panic {α : Sort u} [Inhabited α] (msg : String) : α :=
-  panicCore msg
-
--- TODO: this be applied directly to `Inhabited`'s definition when we remove the above workaround
-attribute [nospecialize] Inhabited
+@[inline] def List.flatMap {α : Type u} {β : Type v} (b : α → List β) (as : List α) : List β := flatten (map b as)
 
 /--
 `Array α` is the type of [dynamic arrays](https://en.wikipedia.org/wiki/Dynamic_array) with elements
@@ -3452,6 +3290,276 @@ def Array.extract (as : Array α) (start : Nat := 0) (stop : Nat := as.size) : A
   let sz' := Nat.sub (min stop as.size) start
   loop sz' start (emptyWithCapacity sz')
 
+/-- `ByteArray` is like `Array UInt8`, but with an efficient run-time representation as a packed
+byte buffer. -/
+structure ByteArray where
+  /-- The data contained in the byte array. -/
+  data : Array UInt8
+
+attribute [extern "lean_byte_array_mk"] ByteArray.mk
+attribute [extern "lean_byte_array_data"] ByteArray.data
+
+/--
+Constructs a new empty byte array with initial capacity `c`.
+-/
+@[extern "lean_mk_empty_byte_array"]
+def ByteArray.emptyWithCapacity (c : @& Nat) : ByteArray :=
+  { data := Array.empty }
+
+/--
+Constructs a new empty byte array with initial capacity `0`.
+
+Use `ByteArray.emptyWithCapacity` to create an array with a greater initial capacity.
+-/
+def ByteArray.empty : ByteArray := emptyWithCapacity 0
+
+/--
+Adds an element to the end of an array. The resulting array's size is one greater than the input
+array. If there are no other references to the array, then it is modified in-place.
+
+This takes amortized `O(1)` time because `Array α` is represented by a dynamic array.
+-/
+@[extern "lean_byte_array_push"]
+def ByteArray.push : ByteArray → UInt8 → ByteArray
+  | ⟨bs⟩, b => ⟨bs.push b⟩
+
+/--
+Converts a list of bytes into a `ByteArray`.
+-/
+def List.toByteArray (bs : List UInt8) : ByteArray :=
+  let rec loop
+    | nil,        r => r
+    | cons b bs,  r => loop bs (r.push b)
+  loop bs ByteArray.empty
+
+/-- Returns the size of the byte array. -/
+@[extern "lean_byte_array_size"]
+def ByteArray.size : (@& ByteArray) → Nat
+  | ⟨bs⟩ => bs.size
+
+/--
+Returns the sequence of bytes in a character's UTF-8 encoding.
+-/
+def String.utf8EncodeChar (c : Char) : List UInt8 :=
+  let v := c.val.toNat
+  ite (LE.le v 0x7f)
+    (List.cons (UInt8.ofNat v) List.nil)
+    (ite (LE.le v 0x7ff)
+      (List.cons
+        (UInt8.ofNat (HAdd.hAdd (HMod.hMod (HDiv.hDiv v 64) 0x20) 0xc0))
+        (List.cons
+          (UInt8.ofNat (HAdd.hAdd (HMod.hMod v 0x40) 0x80))
+          List.nil))
+      (ite (LE.le v 0xffff)
+        (List.cons
+          (UInt8.ofNat (HAdd.hAdd (HMod.hMod (HDiv.hDiv v 4096) 0x10) 0xe0))
+          (List.cons
+            (UInt8.ofNat (HAdd.hAdd (HMod.hMod (HDiv.hDiv v 64) 0x40) 0x80))
+            (List.cons
+              (UInt8.ofNat (HAdd.hAdd (HMod.hMod v 0x40) 0x80))
+              List.nil)))
+        (List.cons
+          (UInt8.ofNat (HAdd.hAdd (HMod.hMod (HDiv.hDiv v 262144) 0x08) 0xf0))
+          (List.cons
+            (UInt8.ofNat (HAdd.hAdd (HMod.hMod (HDiv.hDiv v 4096) 0x40) 0x80))
+            (List.cons
+              (UInt8.ofNat (HAdd.hAdd (HMod.hMod (HDiv.hDiv v 64) 0x40) 0x80))
+              (List.cons
+                (UInt8.ofNat (HAdd.hAdd (HMod.hMod v 0x40) 0x80))
+                List.nil))))))
+
+/-- Encode a list of characters (Unicode scalar value) in UTF-8. This is an inefficient model
+implementation. Use `List.asString` instead. -/
+def List.utf8Encode (l : List Char) : ByteArray :=
+  l.flatMap String.utf8EncodeChar |>.toByteArray
+
+/-- A byte array is valid UTF-8 if it is of the form `List.Internal.utf8Encode m` for some `m`.
+
+Note that in order for this definition to be well-behaved it is necessary to know that this `m`
+is unique. To show this, one defines UTF-8 decoding and shows that encoding and decoding are
+mutually inverse. -/
+inductive ByteArray.IsValidUtf8 (b : ByteArray) : Prop
+  /-- Show that a byte -/
+  | intro (m : List Char) (hm : Eq b (List.utf8Encode m))
+
+/--
+A string is a sequence of Unicode scalar values.
+
+At runtime, strings are represented by [dynamic arrays](https://en.wikipedia.org/wiki/Dynamic_array)
+of bytes using the UTF-8 encoding. Both the size in bytes (`String.utf8ByteSize`) and in characters
+(`String.length`) are cached and take constant time. Many operations on strings perform in-place
+modifications when the reference to the string is unique.
+-/
+structure String where ofByteArray ::
+  /-- The bytes of the UTF-8 encoding of the string. -/
+  bytes : ByteArray
+  /-- The bytes of the string form valid UTF-8. -/
+  isValidUtf8 : ByteArray.IsValidUtf8 bytes
+
+attribute [extern "lean_string_to_utf8"] String.bytes
+
+/--
+Decides whether two strings are equal. Normally used via the `DecidableEq String` instance and the
+`=` operator.
+
+At runtime, this function is overridden with an efficient native implementation.
+-/
+@[extern "lean_string_dec_eq"]
+def String.decEq (s₁ s₂ : @& String) : Decidable (Eq s₁ s₂) :=
+  match s₁, s₂ with
+  | ⟨⟨⟨s₁⟩⟩, _⟩, ⟨⟨⟨s₂⟩⟩, _⟩ =>
+    dite (Eq s₁ s₂) (fun h => match s₁, s₂, h with | _, _, Eq.refl _ => isTrue rfl)
+      (fun h => isFalse
+        (fun h' => h (congrArg (fun s => Array.toList (ByteArray.data (String.bytes s))) h')))
+
+instance : DecidableEq String := String.decEq
+
+/--
+A byte position in a `String`, according to its UTF-8 encoding.
+
+Character positions (counting the Unicode code points rather than bytes) are represented by plain
+`Nat`s. Indexing a `String` by a `String.Pos` takes constant time, while character positions need to
+be translated internally to byte positions, which takes linear time.
+
+A byte position `p` is *valid* for a string `s` if `0 ≤ p ≤ s.endPos` and `p` lies on a UTF-8
+character boundary.
+-/
+structure String.Pos where
+  /-- Get the underlying byte index of a `String.Pos` -/
+  byteIdx : Nat := 0
+
+instance : Inhabited String.Pos where
+  default := {}
+
+instance : DecidableEq String.Pos :=
+  fun ⟨a⟩ ⟨b⟩ => match decEq a b with
+    | isTrue h => isTrue (h ▸ rfl)
+    | isFalse h => isFalse (fun he => String.Pos.noConfusion he fun he => absurd he h)
+
+/--
+A region or slice of some underlying string.
+
+A substring contains an string together with the start and end byte positions of a region of
+interest. Actually extracting a substring requires copying and memory allocation, while many
+substrings of the same underlying string may exist with very little overhead, and they are more
+convenient than tracking the bounds by hand.
+
+Using its constructor explicitly, it is possible to construct a `Substring` in which one or both of
+the positions is invalid for the string. Many operations will return unexpected or confusing results
+if the start and stop positions are not valid. Instead, it's better to use API functions that ensure
+the validity of the positions in a substring to create and manipulate them.
+-/
+structure Substring where
+  /-- The underlying string. -/
+  str      : String
+  /-- The byte position of the start of the string slice. -/
+  startPos : String.Pos
+  /-- The byte position of the end of the string slice. -/
+  stopPos  : String.Pos
+
+instance : Inhabited Substring where
+  default := ⟨"", {}, {}⟩
+
+/--
+The number of bytes used by the string's UTF-8 encoding.
+-/
+@[inline, expose] def Substring.bsize : Substring → Nat
+  | ⟨_, b, e⟩ => e.byteIdx.sub b.byteIdx
+
+/--
+The number of bytes used by the string's UTF-8 encoding.
+
+At runtime, this function takes constant time because the byte length of strings is cached.
+-/
+@[extern "lean_string_utf8_byte_size"]
+def String.utf8ByteSize (s : @& String) : Nat :=
+  s.bytes.size
+
+/--
+A UTF-8 byte position that points at the end of a string, just after the last character.
+
+* `"abc".endPos = ⟨3⟩`
+* `"L∃∀N".endPos = ⟨8⟩`
+-/
+@[inline] def String.endPos (s : String) : String.Pos where
+  byteIdx := utf8ByteSize s
+
+/--
+Converts a `String` into a `Substring` that denotes the entire string.
+-/
+@[inline] def String.toSubstring (s : String) : Substring where
+  str      := s
+  startPos := {}
+  stopPos  := s.endPos
+
+/--
+Converts a `String` into a `Substring` that denotes the entire string.
+
+This is a version of `String.toSubstring` that doesn't have an `@[inline]` annotation.
+-/
+def String.toSubstring' (s : String) : Substring :=
+  s.toSubstring
+
+/--
+This function will cast a value of type `α` to type `β`, and is a no-op in the
+compiler. This function is **extremely dangerous** because there is no guarantee
+that types `α` and `β` have the same data representation, and this can lead to
+memory unsafety. It is also logically unsound, since you could just cast
+`True` to `False`. For all those reasons this function is marked as `unsafe`.
+
+It is implemented by lifting both `α` and `β` into a common universe, and then
+using `cast (lcProof : ULift (PLift α) = ULift (PLift β))` to actually perform
+the cast. All these operations are no-ops in the compiler.
+
+Using this function correctly requires some knowledge of the data representation
+of the source and target types. Some general classes of casts which are safe in
+the current runtime:
+
+* `Array α` to `Array β` where `α` and `β` have compatible representations,
+  or more generally for other inductive types.
+* `Quot α r` and `α`.
+* `@Subtype α p` and `α`, or generally any structure containing only one
+  non-`Prop` field of type `α`.
+* Casting `α` to/from `NonScalar` when `α` is a boxed generic type
+  (i.e. a function that accepts an arbitrary type `α` and is not specialized to
+  a scalar type like `UInt8`).
+-/
+unsafe def unsafeCast {α : Sort u} {β : Sort v} (a : α) : β :=
+  PLift.down (ULift.down.{max u v} (cast lcProof (ULift.up.{max u v} (PLift.up a))))
+
+
+/-- Auxiliary definition for `panic`. -/
+/-
+This is a workaround for `panic` occurring in monadic code. See issue #695.
+The `panicCore` definition cannot be specialized since it is an extern.
+When `panic` occurs in monadic code, the `Inhabited α` parameter depends on a
+`[inst : Monad m]` instance. The `inst` parameter will not be eliminated during
+specialization if it occurs inside of a binder (to avoid work duplication), and
+will prevent the actual monad from being "copied" to the code being specialized.
+When we reimplement the specializer, we may consider copying `inst` if it also
+occurs outside binders or if it is an instance.
+-/
+@[never_extract, extern "lean_panic_fn"]
+def panicCore {α : Sort u} [Inhabited α] (msg : String) : α := default
+
+/--
+`(panic "msg" : α)` has a built-in implementation which prints `msg` to
+the error buffer. It *does not* terminate execution, and because it is a safe
+function, it still has to return an element of `α`, so it takes `[Inhabited α]`
+and returns `default`. It is primarily intended for debugging in pure contexts,
+and assertion failures.
+
+Because this is a pure function with side effects, it is marked as
+`@[never_extract]` so that the compiler will not perform common sub-expression
+elimination and other optimizations that assume that the expression is pure.
+-/
+@[noinline, never_extract]
+def panic {α : Sort u} [Inhabited α] (msg : String) : α :=
+  panicCore msg
+
+-- TODO: this be applied directly to `Inhabited`'s definition when we remove the above workaround
+attribute [nospecialize] Inhabited
+
 /--
 The `>>=` operator is overloaded via instances of `bind`.
 
diff --git a/src/Init/SizeOf.lean b/src/Init/SizeOf.lean
index ee5c00e984..ec8446e12d 100644
--- a/src/Init/SizeOf.lean
+++ b/src/Init/SizeOf.lean
@@ -84,10 +84,11 @@ deriving instance SizeOf for USize
 deriving instance SizeOf for Char
 deriving instance SizeOf for Option
 deriving instance SizeOf for List
+deriving instance SizeOf for Array
+deriving instance SizeOf for ByteArray
 deriving instance SizeOf for String
 deriving instance SizeOf for String.Pos
 deriving instance SizeOf for Substring
-deriving instance SizeOf for Array
 deriving instance SizeOf for Except
 deriving instance SizeOf for EStateM.Result
 
diff --git a/src/Lean/DocString/Parser.lean b/src/Lean/DocString/Parser.lean
index 2e79782977..2ae30749c1 100644
--- a/src/Lean/DocString/Parser.lean
+++ b/src/Lean/DocString/Parser.lean
@@ -611,7 +611,7 @@ mutual
           asStringFn (atomicFn (noSpaceBefore >> repFn count (satisfyFn (· == char) s!"'{tok count}'"))))
 
   where
-    tok (count : Nat) : String := ⟨List.replicate count char⟩
+    tok (count : Nat) : String := (List.replicate count char).asString
     opener (ctxt : InlineCtxt) : ParserFn :=
       match getter ctxt with
       | none => many1Fn (satisfyFn (· == char) s!"any number of {char}s")
diff --git a/tests/lean/interactive/hover.lean.expected.out b/tests/lean/interactive/hover.lean.expected.out
index c23b82bd6e..f4b9ca9fef 100644
--- a/tests/lean/interactive/hover.lean.expected.out
+++ b/tests/lean/interactive/hover.lean.expected.out
@@ -631,7 +631,7 @@
   "end": {"line": 294, "character": 20}},
  "contents":
  {"value":
-  "```lean\nList.map.{u, v} {α : Type u} {β : Type v} (f : α → β) (l : List α) : List β\n```\n***\nApplies a function to each element of the list, returning the resulting list of values.\n\n`O(|l|)`.\n\nExamples:\n* `[a, b, c].map f = [f a, f b, f c]`\n* `[].map Nat.succ = []`\n* `[\"one\", \"two\", \"three\"].map (·.length) = [3, 3, 5]`\n* `[\"one\", \"two\", \"three\"].map (·.reverse) = [\"eno\", \"owt\", \"eerht\"]`\n\n***\n*import Init.Data.List.Basic*",
+  "```lean\nList.map.{u_1, u_2} {α : Type u_1} {β : Type u_2} (f : α → β) (l : List α) : List β\n```\n***\nApplies a function to each element of the list, returning the resulting list of values.\n\n`O(|l|)`.\n\nExamples:\n* `[a, b, c].map f = [f a, f b, f c]`\n* `[].map Nat.succ = []`\n* `[\"one\", \"two\", \"three\"].map (·.length) = [3, 3, 5]`\n* `[\"one\", \"two\", \"three\"].map (·.reverse) = [\"eno\", \"owt\", \"eerht\"]`\n\n***\n*import Init.Prelude*",
   "kind": "markdown"}}
 {"textDocument": {"uri": "file:///hover.lean"},
  "position": {"line": 297, "character": 26}}
diff --git a/tests/lean/run/2846.lean b/tests/lean/run/2846.lean
index ae9e389c23..9c345b6a67 100644
--- a/tests/lean/run/2846.lean
+++ b/tests/lean/run/2846.lean
@@ -5,8 +5,8 @@
 /-!
 Defined without named arguments, prints without named arguments.
 -/
-/-- info: String.append : String → String → String -/
-#guard_msgs in #check String.append
+/-- info: Nat.add : Nat → Nat → Nat -/
+#guard_msgs in #check Nat.add
 
 /-!
 The List argument is not named, it is not printed as a named argument.
diff --git a/tests/lean/run/arthur1.lean b/tests/lean/run/arthur1.lean
index 16e38b0a7a..5bf10b6134 100644
--- a/tests/lean/run/arthur1.lean
+++ b/tests/lean/run/arthur1.lean
@@ -85,7 +85,7 @@ def removeRightmostZeros (s : String) : String :=
       if a != '0'
         then aux [] (a :: (buff ++ res)) as
         else aux (a :: buff) res as
-  ⟨aux [] [] s.data⟩
+  (aux [] [] s.data).asString
 
 protected def Literal.toString : Literal → String
   | bool  b => toString b
diff --git a/tests/lean/run/arthur2.lean b/tests/lean/run/arthur2.lean
index 3cb7873927..9797d1926e 100644
--- a/tests/lean/run/arthur2.lean
+++ b/tests/lean/run/arthur2.lean
@@ -85,7 +85,7 @@ def removeRightmostZeros (s : String) : String :=
       if a != '0'
         then aux [] (a :: (buff ++ res)) as
         else aux (a :: buff) res as
-  ⟨aux [] [] s.data⟩
+  (aux [] [] s.data).asString
 
 protected def Literal.toString : Literal → String
   | bool  b => toString b
diff --git a/tests/lean/run/fun_cases.lean b/tests/lean/run/fun_cases.lean
index 1de0946b18..043875013c 100644
--- a/tests/lean/run/fun_cases.lean
+++ b/tests/lean/run/fun_cases.lean
@@ -11,8 +11,8 @@ example (x : Option Nat) (f : Nat → Nat) : (x.map f).isSome = x.isSome := by
   case case2 =>   simp
 
 /--
-info: List.map.fun_cases.{u} {α : Type u} (motive : List α → Prop) (case1 : motive [])
-  (case2 : ∀ (a : α) (as : List α), motive (a :: as)) (x✝ : List α) : motive x✝
+info: List.map.fun_cases.{u_1} {α : Type u_1} (motive : List α → Prop) (case1 : motive [])
+  (case2 : ∀ (head : α) (as : List α), motive (head :: as)) (x✝ : List α) : motive x✝
 -/
 #guard_msgs in
 #check List.map.fun_cases
diff --git a/tests/lean/run/lex.lean b/tests/lean/run/lex.lean
index b0d6cbf208..e036c79669 100644
--- a/tests/lean/run/lex.lean
+++ b/tests/lean/run/lex.lean
@@ -78,7 +78,7 @@ namespace NonMutual
 
 def lex [Monad m] [MonadExceptOf LexErr m] (current? : Option (List Char × Nat)) (it : String.Iterator) : m (List Token) := do
   let currTok := fun
-    | (cs, n) => { text := {data := cs.reverse}, tok := Tok.num n }
+    | (cs, n) => { text := cs.reverse.asString , tok := Tok.num n }
   if it.atEnd then
     return current?.toList.map currTok
   else
diff --git a/tests/lean/run/printEqns.lean b/tests/lean/run/printEqns.lean
index 6be213f783..68d2dd3a03 100644
--- a/tests/lean/run/printEqns.lean
+++ b/tests/lean/run/printEqns.lean
@@ -1,17 +1,17 @@
 /--
 info: equations:
-@[defeq] theorem List.append.eq_1.{u} : ∀ {α : Type u} (x : List α), [].append x = x
-@[defeq] theorem List.append.eq_2.{u} : ∀ {α : Type u} (x : List α) (a : α) (l : List α),
-  (a :: l).append x = a :: l.append x
+@[defeq] theorem List.append.eq_1.{u_1} : ∀ {α : Type u_1} (x : List α), [].append x = x
+@[defeq] theorem List.append.eq_2.{u_1} : ∀ {α : Type u_1} (x : List α) (a : α) (as : List α),
+  (a :: as).append x = a :: as.append x
 -/
 #guard_msgs in
 #print eqns List.append
 
 /--
 info: equations:
-@[defeq] theorem List.append.eq_1.{u} : ∀ {α : Type u} (x : List α), [].append x = x
-@[defeq] theorem List.append.eq_2.{u} : ∀ {α : Type u} (x : List α) (a : α) (l : List α),
-  (a :: l).append x = a :: l.append x
+@[defeq] theorem List.append.eq_1.{u_1} : ∀ {α : Type u_1} (x : List α), [].append x = x
+@[defeq] theorem List.append.eq_2.{u_1} : ∀ {α : Type u_1} (x : List α) (a : α) (as : List α),
+  (a :: as).append x = a :: as.append x
 -/
 #guard_msgs in
 #print equations List.append
diff --git a/tests/lean/setLit.lean.expected.out b/tests/lean/setLit.lean.expected.out
index 78a9c7253c..64485f5db6 100644
--- a/tests/lean/setLit.lean.expected.out
+++ b/tests/lean/setLit.lean.expected.out
@@ -4,10 +4,11 @@ setLit.lean:22:19-22:21: error: overloaded, errors
   
   Hint: Additional diagnostic information may be available using the `set_option diagnostics true` command.
   
-  Fields missing: `data`
+  Fields missing: `bytes`, `isValidUtf8`
   
   Hint: Add missing fields:
-     ̲d̲a̲t̲a̲ ̲:̲=̲ ̲_̲ ̲
+     ̲b̲y̲t̲e̲s̲ ̲:̲=̲ ̲_̲
+    ̲ ̲ ̲ ̲ ̲ ̲ ̲ ̲ ̲ ̲ ̲ ̲ ̲ ̲ ̲ ̲ ̲ ̲ ̲ ̲ ̲ ̲i̲s̲V̲a̲l̲i̲d̲U̲t̲f̲8̲ ̲:̲=̲ ̲_̲ ̲
 setLit.lean:24:31-24:38: error: overloaded, errors 
   failed to synthesize
     Singleton Nat String
diff --git a/tests/lean/sizeof.lean.expected.out b/tests/lean/sizeof.lean.expected.out
index 59217b3878..e17ceafea8 100644
--- a/tests/lean/sizeof.lean.expected.out
+++ b/tests/lean/sizeof.lean.expected.out
@@ -4,7 +4,7 @@
 13
 101
 558
-311
-313
+310
+312
 11
 InfTree.node.sizeOf_spec.{u} {α : Type u} [SizeOf α] (children : Nat → InfTree α) : sizeOf (InfTree.node children) = 1
diff --git a/tests/lean/string_imp2.lean b/tests/lean/string_imp2.lean
index 80dff0c48d..e72d2381f4 100644
--- a/tests/lean/string_imp2.lean
+++ b/tests/lean/string_imp2.lean
@@ -20,7 +20,7 @@ it₁.remainingToString ++ "-" ++ it₂.remainingToString
 #eval "αβγ".mkIterator.next.1
 #eval "αβγ".mkIterator.next.next.1
 #eval "αβγ".mkIterator.next.2
-#eval "αβ".1
+#eval "αβ".data
 #eval "αβ".push 'a'
 #eval g "α"
 #eval "".mkIterator.curr