diff --git a/src/Init/Data/String/Basic.lean b/src/Init/Data/String/Basic.lean index 1ab508ca59..1ddbda4e5a 100644 --- a/src/Init/Data/String/Basic.lean +++ b/src/Init/Data/String/Basic.lean @@ -33,9 +33,9 @@ theorem List.utf8Encode_append {l l' : List Char} : theorem List.utf8Encode_cons {c : Char} {l : List Char} : (c :: l).utf8Encode = [c].utf8Encode ++ l.utf8Encode := by rw [← singleton_append, List.utf8Encode_append] -theorem List.isUtf8FirstByte_getElem_utf8Encode_singleton {c : Char} {i : Nat} {hi : i < [c].utf8Encode.size} : - UInt8.IsUtf8FirstByte [c].utf8Encode[i] ↔ i = 0 := by - simp [List.utf8Encode_singleton, UInt8.isUtf8FirstByte_getElem_utf8EncodeChar] +theorem List.isUTF8FirstByte_getElem_utf8Encode_singleton {c : Char} {i : Nat} {hi : i < [c].utf8Encode.size} : + UInt8.IsUTF8FirstByte [c].utf8Encode[i] ↔ i = 0 := by + simp [List.utf8Encode_singleton, UInt8.isUTF8FirstByte_getElem_utf8EncodeChar] @[simp] theorem String.utf8EncodeChar_ne_nil {c : Char} : String.utf8EncodeChar c ≠ [] := by @@ -45,26 +45,26 @@ theorem String.utf8EncodeChar_ne_nil {c : Char} : String.utf8EncodeChar c ≠ [] theorem List.utf8Encode_eq_empty {l : List Char} : l.utf8Encode = ByteArray.empty ↔ l = [] := by simp [utf8Encode, ← List.eq_nil_iff_forall_not_mem] -theorem ByteArray.isValidUtf8_utf8Encode {l : List Char} : IsValidUtf8 l.utf8Encode := +theorem ByteArray.isValidUTF8_utf8Encode {l : List Char} : IsValidUTF8 l.utf8Encode := .intro l rfl @[simp] -theorem ByteArray.isValidUtf8_empty : IsValidUtf8 ByteArray.empty := +theorem ByteArray.isValidUTF8_empty : IsValidUTF8 ByteArray.empty := .intro [] (by simp) -theorem Char.isValidUtf8_toByteArray_utf8EncodeChar {c : Char} : - ByteArray.IsValidUtf8 (String.utf8EncodeChar c).toByteArray := +theorem Char.isValidUTF8_toByteArray_utf8EncodeChar {c : Char} : + ByteArray.IsValidUTF8 (String.utf8EncodeChar c).toByteArray := .intro [c] (by simp [List.utf8Encode_singleton]) -theorem ByteArray.IsValidUtf8.append {b b' : ByteArray} (h : IsValidUtf8 b) (h' : IsValidUtf8 b') : - IsValidUtf8 (b ++ b') := by +theorem ByteArray.IsValidUTF8.append {b b' : ByteArray} (h : IsValidUTF8 b) (h' : IsValidUTF8 b') : + IsValidUTF8 (b ++ b') := by rcases h with ⟨m, rfl⟩ rcases h' with ⟨m', rfl⟩ exact .intro (m ++ m') (by simp) -theorem ByteArray.isValidUtf8_utf8Encode_singleton_append_iff {b : ByteArray} {c : Char} : - IsValidUtf8 ([c].utf8Encode ++ b) ↔ IsValidUtf8 b := by - refine ⟨?_, fun h => IsValidUtf8.append isValidUtf8_utf8Encode h⟩ +theorem ByteArray.isValidUTF8_utf8Encode_singleton_append_iff {b : ByteArray} {c : Char} : + IsValidUTF8 ([c].utf8Encode ++ b) ↔ IsValidUTF8 b := by + refine ⟨?_, fun h => IsValidUTF8.append isValidUTF8_utf8Encode h⟩ rintro ⟨l, hl⟩ match l with | [] => simp at hl @@ -75,7 +75,7 @@ theorem ByteArray.isValidUtf8_utf8Encode_singleton_append_iff {b : ByteArray} {c List.utf8DecodeChar?_utf8Encode_cons] using hl rw [← List.singleton_append (l := l), List.utf8Encode_append, ByteArray.append_right_inj] at hl - exact hl ▸ isValidUtf8_utf8Encode + exact hl ▸ isValidUTF8_utf8Encode @[inline, expose] def ByteArray.utf8Decode? (b : ByteArray) : Option (Array Char) := @@ -104,13 +104,13 @@ where if hi : i = b.size then true else - match h : validateUtf8At b i with + match h : validateUTF8At b i with | false => false - | true => go fuel (i + (b[i].utf8ByteSize (isUtf8FirstByte_of_validateUtf8At h)).byteIdx) + | true => go fuel (i + (b[i].utf8ByteSize (isUTF8FirstByte_of_validateUTF8At h)).byteIdx) ?_ ?_ termination_by structural fuel finally - all_goals rw [ByteArray.validateUtf8At_eq_isSome_utf8DecodeChar?] at h + all_goals rw [ByteArray.validateUTF8At_eq_isSome_utf8DecodeChar?] at h · rw [← ByteArray.utf8Size_utf8DecodeChar (h := h)] exact add_utf8Size_utf8DecodeChar_le_size · rw [← ByteArray.utf8Size_utf8DecodeChar (h := h)] @@ -118,7 +118,7 @@ finally have := (b.utf8DecodeChar i h).utf8Size_pos omega -theorem ByteArray.isSome_utf8Decode?Go_eq_validateUtf8Go {b : ByteArray} {fuel : Nat} +theorem ByteArray.isSome_utf8Decode?Go_eq_validateUTF8Go {b : ByteArray} {fuel : Nat} {i : Nat} {acc : Array Char} {hi : i ≤ b.size} {hf : b.size - i < fuel} : (utf8Decode?.go b fuel i acc hi hf).isSome = validateUTF8.go b fuel i hi hf := by fun_induction utf8Decode?.go with @@ -128,20 +128,20 @@ theorem ByteArray.isSome_utf8Decode?Go_eq_validateUtf8Go {b : ByteArray} {fuel : split · rfl · rename_i heq - simp [validateUtf8At_eq_isSome_utf8DecodeChar?, h₂] at heq + simp [validateUTF8At_eq_isSome_utf8DecodeChar?, h₂] at heq | case3 i acc hi fuel hf h₁ c h₂ ih => simp [validateUTF8.go, h₁] split · rename_i heq - simp [validateUtf8At_eq_isSome_utf8DecodeChar?, h₂] at heq + simp [validateUTF8At_eq_isSome_utf8DecodeChar?, h₂] at heq · rw [ih] congr rw [← ByteArray.utf8Size_utf8DecodeChar (h := by simp [h₂])] simp [utf8DecodeChar, h₂] -theorem ByteArray.isSome_utf8Decode?_eq_validateUtf8 {b : ByteArray} : +theorem ByteArray.isSome_utf8Decode?_eq_validateUTF8 {b : ByteArray} : b.utf8Decode?.isSome = b.validateUTF8 := - b.isSome_utf8Decode?Go_eq_validateUtf8Go + b.isSome_utf8Decode?Go_eq_validateUTF8Go theorem ByteArray.utf8Decode?.go.congr {b b' : ByteArray} {fuel fuel' i i' : Nat} {acc acc' : Array Char} {hi hi' hf hf'} (hbb' : b = b') (hii' : i = i') (hacc : acc = acc') : @@ -172,7 +172,7 @@ theorem ByteArray.utf8Decode?_empty : ByteArray.empty.utf8Decode? = some #[] := simp [utf8Decode?, utf8Decode?.go] private theorem ByteArray.isSome_utf8Decode?go_iff {b : ByteArray} {fuel i : Nat} {hi : i ≤ b.size} {hf} {acc : Array Char} : - (ByteArray.utf8Decode?.go b fuel i acc hi hf).isSome ↔ IsValidUtf8 (b.extract i b.size) := by + (ByteArray.utf8Decode?.go b fuel i acc hi hf).isSome ↔ IsValidUTF8 (b.extract i b.size) := by fun_induction ByteArray.utf8Decode?.go with | case1 => simp | case2 fuel i hi hf acc h₁ h₂ => @@ -193,23 +193,23 @@ private theorem ByteArray.isSome_utf8Decode?go_iff {b : ByteArray} {fuel i : Nat rw [ByteArray.extract_eq_extract_append_extract (i := i) (i + c.utf8Size) (by omega) (le_size_of_utf8DecodeChar?_eq_some h₂)] at hl ⊢ rw [ByteArray.append_inj_left hl (by have := le_size_of_utf8DecodeChar?_eq_some h₂; simp; omega), - ← List.utf8Encode_singleton, isValidUtf8_utf8Encode_singleton_append_iff] + ← List.utf8Encode_singleton, isValidUTF8_utf8Encode_singleton_append_iff] theorem ByteArray.isSome_utf8Decode?_iff {b : ByteArray} : - b.utf8Decode?.isSome ↔ IsValidUtf8 b := by + b.utf8Decode?.isSome ↔ IsValidUTF8 b := by rw [utf8Decode?, isSome_utf8Decode?go_iff, extract_zero_size] @[simp] theorem ByteArray.validateUTF8_eq_true_iff {b : ByteArray} : - b.validateUTF8 = true ↔ IsValidUtf8 b := by - rw [← isSome_utf8Decode?_eq_validateUtf8, isSome_utf8Decode?_iff] + b.validateUTF8 = true ↔ IsValidUTF8 b := by + rw [← isSome_utf8Decode?_eq_validateUTF8, isSome_utf8Decode?_iff] @[simp] theorem ByteArray.validateUTF8_eq_false_iff {b : ByteArray} : - b.validateUTF8 = false ↔ ¬ IsValidUtf8 b := by + b.validateUTF8 = false ↔ ¬ IsValidUTF8 b := by simp [← Bool.not_eq_true] -instance {b : ByteArray} : Decidable b.IsValidUtf8 := +instance {b : ByteArray} : Decidable b.IsValidUTF8 := decidable_of_iff (b.validateUTF8 = true) ByteArray.validateUTF8_eq_true_iff /-- @@ -217,7 +217,7 @@ Decodes an array of bytes that encode a string as [UTF-8](https://en.wikipedia.o the corresponding string. -/ @[inline, expose] -def String.fromUTF8 (a : @& ByteArray) (h : a.IsValidUtf8) : String := +def String.fromUTF8 (a : @& ByteArray) (h : a.IsValidUTF8) : String := .ofByteArray a h /-- @@ -225,14 +225,14 @@ Decodes an array of bytes that encode a string as [UTF-8](https://en.wikipedia.o the corresponding string, or returns `none` if the array is not a valid UTF-8 encoding of a string. -/ @[inline, expose] def String.fromUTF8? (a : ByteArray) : Option String := - if h : a.IsValidUtf8 then some (fromUTF8 a h) else none + if h : a.IsValidUTF8 then some (fromUTF8 a h) else none /-- Decodes an array of bytes that encode a string as [UTF-8](https://en.wikipedia.org/wiki/UTF-8) into the corresponding string, or panics if the array is not a valid UTF-8 encoding of a string. -/ @[inline, expose] def String.fromUTF8! (a : ByteArray) : String := - if h : a.IsValidUtf8 then fromUTF8 a h else panic! "invalid UTF-8 string" + if h : a.IsValidUTF8 then fromUTF8 a h else panic! "invalid UTF-8 string" /-- Encodes a string in UTF-8 as an array of bytes. @@ -258,7 +258,7 @@ Examples: @[extern "lean_string_append", expose] def String.append (s : String) (t : @& String) : String where bytes := s.bytes ++ t.bytes - isValidUtf8 := s.isValidUtf8.append t.isValidUtf8 + isValidUTF8 := s.isValidUTF8.append t.isValidUTF8 instance : Append String where append s t := s.append t @@ -294,7 +294,7 @@ theorem List.asString_append {l₁ l₂ : List Char} : (l₁ ++ l₂).asString = @[expose] def String.Internal.toArray (b : String) : Array Char := - b.bytes.utf8Decode?.get (b.bytes.isSome_utf8Decode?_iff.2 b.isValidUtf8) + b.bytes.utf8Decode?.get (b.bytes.isSome_utf8Decode?_iff.2 b.isValidUTF8) @[simp] theorem String.Internal.toArray_empty : String.Internal.toArray "" = #[] := by @@ -556,10 +556,10 @@ Predicate for validity of positions inside a `String`. There are multiple equivalent definitions for validity. We say that a position is valid if the string obtained by taking all of the bytes up to, but -excluding, the given position, is valid UTF-8; see `Pos.isValid_iff_isValidUtf8_extract_zero`. +excluding, the given position, is valid UTF-8; see `Pos.isValid_iff_isValidUTF8_extract_zero`. Similarly, a position is valid if the string obtained by taking all of the bytes starting at the -given position is valid UTF-8; see `Pos.isValid_iff_isValidUtf8_extract_utf8ByteSize`. +given position is valid UTF-8; see `Pos.isValid_iff_isValidUTF8_extract_utf8ByteSize`. An equivalent condition is that the position is the length of the UTF-8 encoding of some prefix of the characters of the string; see `Pos.isValid_iff_exists_append` and @@ -567,7 +567,7 @@ some prefix of the characters of the string; see `Pos.isValid_iff_exists_append` Another equivalent condition that can be checked efficiently is that the position is either the end position or strictly smaller than the end position and the byte at the position satisfies -`UInt8.IsUtf8FirstByte`; see `Pos.isValid_iff_isUtf8FirstByte`. +`UInt8.IsUTF8FirstByte`; see `Pos.isValid_iff_isUTF8FirstByte`. Examples: * `String.Pos.IsValid "abc" ⟨0⟩` @@ -582,7 +582,7 @@ Examples: -/ structure Pos.Raw.IsValid (s : String) (off : String.Pos.Raw) : Prop where private mk :: le_endPos : off ≤ s.endPos - isValidUtf8_extract_zero : (s.bytes.extract 0 off.byteIdx).IsValidUtf8 + isValidUTF8_extract_zero : (s.bytes.extract 0 off.byteIdx).IsValidUTF8 theorem _root_.List.isPrefix_of_utf8Encode_append_eq_utf8Encode {l m : List Char} (b : ByteArray) (h : l.utf8Encode ++ b = m.utf8Encode) : l <+: m := by @@ -604,8 +604,8 @@ theorem _root_.List.isPrefix_of_utf8Encode_append_eq_utf8Encode {l m : List Char open List in theorem Pos.Raw.IsValid.exists {s : String} {p : Pos.Raw} (h : p.IsValid s) : ∃ m₁ m₂ : List Char, m₁.utf8Encode = s.bytes.extract 0 p.byteIdx ∧ (m₁ ++ m₂).asString = s := by - obtain ⟨l, hl⟩ := s.isValidUtf8 - obtain ⟨m₁, hm₁⟩ := h.isValidUtf8_extract_zero + obtain ⟨l, hl⟩ := s.isValidUTF8 + obtain ⟨m₁, hm₁⟩ := h.isValidUTF8_extract_zero suffices m₁ <+: l by obtain ⟨m₂, rfl⟩ := this refine ⟨m₁, m₂, hm₁.symm, ?_⟩ @@ -616,12 +616,12 @@ theorem Pos.Raw.IsValid.exists {s : String} {p : Pos.Raw} (h : p.IsValid s) : ByteArray.extract_zero_size] simpa using h.le_endPos -theorem Pos.Raw.IsValid.isValidUtf8_extract_utf8ByteSize {s : String} {p : Pos.Raw} (h : p.IsValid s) : - ByteArray.IsValidUtf8 (s.bytes.extract p.byteIdx s.utf8ByteSize) := by +theorem Pos.Raw.IsValid.isValidUTF8_extract_utf8ByteSize {s : String} {p : Pos.Raw} (h : p.IsValid s) : + ByteArray.IsValidUTF8 (s.bytes.extract p.byteIdx s.utf8ByteSize) := by obtain ⟨m₁, m₂, hm, rfl⟩ := h.exists simp only [List.asString_append, bytes_append, List.bytes_asString] rw [ByteArray.extract_append_eq_right] - · exact ByteArray.isValidUtf8_utf8Encode + · exact ByteArray.isValidUTF8_utf8Encode · rw [hm] simp only [List.asString_append, bytes_append, List.bytes_asString, ByteArray.size_extract, ByteArray.size_append, Nat.sub_zero] @@ -631,7 +631,7 @@ theorem Pos.Raw.IsValid.isValidUtf8_extract_utf8ByteSize {s : String} {p : Pos.R theorem Pos.Raw.isValid_iff_exists_append {s : String} {p : Pos.Raw} : p.IsValid s ↔ ∃ s₁ s₂ : String, s = s₁ ++ s₂ ∧ p = s₁.endPos := by - refine ⟨fun h => ⟨⟨_, h.isValidUtf8_extract_zero⟩, ⟨_, h.isValidUtf8_extract_utf8ByteSize⟩, ?_, ?_⟩, ?_⟩ + refine ⟨fun h => ⟨⟨_, h.isValidUTF8_extract_zero⟩, ⟨_, h.isValidUTF8_extract_utf8ByteSize⟩, ?_, ?_⟩, ?_⟩ · apply String.bytes_inj.1 have := Pos.Raw.le_iff.1 h.le_endPos simp_all [← size_bytes] @@ -640,7 +640,7 @@ theorem Pos.Raw.isValid_iff_exists_append {s : String} {p : Pos.Raw} : simp [Nat.min_eq_left this] · rintro ⟨s₁, s₂, rfl, rfl⟩ refine ⟨by simp [Pos.Raw.le_iff], ?_⟩ - simpa [ByteArray.extract_append_eq_left] using s₁.isValidUtf8 + simpa [ByteArray.extract_append_eq_left] using s₁.isValidUTF8 @[simp] theorem Pos.Raw.byteIdx_zero : (0 : Pos.Raw).byteIdx = 0 := rfl @@ -648,12 +648,12 @@ theorem Pos.Raw.byteIdx_zero : (0 : Pos.Raw).byteIdx = 0 := rfl @[simp] theorem Pos.Raw.isValid_zero {s : String} : (0 : Pos.Raw).IsValid s where le_endPos := by simp [Pos.Raw.le_iff] - isValidUtf8_extract_zero := by simp + isValidUTF8_extract_zero := by simp @[simp] theorem Pos.Raw.isValid_endPos {s : String} : s.endPos.IsValid s where le_endPos := by simp [Pos.Raw.le_iff] - isValidUtf8_extract_zero := by simp [← size_bytes, s.isValidUtf8] + isValidUTF8_extract_zero := by simp [← size_bytes, s.isValidUTF8] @[simp] theorem Pos.Raw.isValid_empty_iff {p : Pos.Raw} : p.IsValid "" ↔ p = 0 := by @@ -798,26 +798,21 @@ where /-- Accesses the indicated byte in the UTF-8 encoding of a string. -At runtime, this function is implemented by efficient, constant-time code. --/ -@[extern "lean_string_get_byte_fast", expose] -def getUtf8Byte (s : @& String) (p : Pos.Raw) (h : p < s.endPos) : UInt8 := - s.bytes[p.byteIdx] - -/-- -Accesses the indicated byte in the UTF-8 encoding of a string. - At runtime, this function is implemented by efficient, constant-time code. -/ @[extern "lean_string_get_byte_fast", expose] def getUTF8Byte (s : @& String) (p : Pos.Raw) (h : p < s.endPos) : UInt8 := s.bytes[p.byteIdx] +@[deprecated getUTF8Byte (since := "2025-10-01"), extern "lean_string_get_byte_fast", expose] +abbrev getUtf8Byte (s : String) (p : Pos.Raw) (h : p < s.endPos) : UInt8 := + s.getUTF8Byte p h + @[simp] theorem endPos_empty : "".endPos = 0 := rfl -theorem Pos.Raw.isValid_iff_isUtf8FirstByte {s : String} {p : Pos.Raw} : - p.IsValid s ↔ p = s.endPos ∨ ∃ (h : p < s.endPos), (s.getUtf8Byte p h).IsUtf8FirstByte := by +theorem Pos.Raw.isValid_iff_isUTF8FirstByte {s : String} {p : Pos.Raw} : + p.IsValid s ↔ p = s.endPos ∨ ∃ (h : p < s.endPos), (s.getUTF8Byte p h).IsUTF8FirstByte := by induction s using push_induction with | empty => simp [Pos.Raw.lt_iff] | push s c ih => @@ -825,22 +820,22 @@ theorem Pos.Raw.isValid_iff_isUtf8FirstByte {s : String} {p : Pos.Raw} : refine ⟨?_, ?_⟩ · rintro ((rfl|⟨h, hb⟩)|h) · refine Or.inr ⟨by simp [Pos.Raw.lt_iff, Char.utf8Size_pos], ?_⟩ - simp only [getUtf8Byte, bytes_push, byteIdx_endPos] + simp only [getUTF8Byte, bytes_push, byteIdx_endPos] rw [ByteArray.getElem_append_right (by simp)] - simp [List.isUtf8FirstByte_getElem_utf8Encode_singleton] + simp [List.isUTF8FirstByte_getElem_utf8Encode_singleton] · refine Or.inr ⟨by simp [lt_iff] at h ⊢; omega, ?_⟩ - simp only [getUtf8Byte, bytes_push] - rwa [ByteArray.getElem_append_left, ← getUtf8Byte] + simp only [getUTF8Byte, bytes_push] + rwa [ByteArray.getElem_append_left, ← getUTF8Byte] · exact Or.inl (by simpa [endPos_push]) · rintro (h|⟨h, hb⟩) · exact Or.inr (by simpa [endPos_push] using h) - · simp only [getUtf8Byte, bytes_push] at hb + · simp only [getUTF8Byte, bytes_push] at hb by_cases h' : p < s.endPos · refine Or.inl (Or.inr ⟨h', ?_⟩) - rwa [ByteArray.getElem_append_left h', ← getUtf8Byte] at hb + rwa [ByteArray.getElem_append_left h', ← getUTF8Byte] at hb · refine Or.inl (Or.inl ?_) rw [ByteArray.getElem_append_right (by simp [lt_iff] at h' ⊢; omega)] at hb - simp only [size_bytes, List.isUtf8FirstByte_getElem_utf8Encode_singleton] at hb + simp only [size_bytes, List.isUTF8FirstByte_getElem_utf8Encode_singleton] at hb ext simp only [lt_iff, byteIdx_endPos, Nat.not_lt] at ⊢ h' omega @@ -865,13 +860,13 @@ Examples: @[extern "lean_string_is_valid_pos", expose] def Pos.Raw.isValid (s : @&String) (p : @& Pos.Raw) : Bool := if h : p < s.endPos then - (s.getUtf8Byte p h).IsUtf8FirstByte + (s.getUTF8Byte p h).IsUTF8FirstByte else p = s.endPos @[simp] theorem Pos.Raw.isValid_eq_true_iff {s : String} {p : Pos.Raw} : p.isValid s = true ↔ p.IsValid s := by - rw [isValid_iff_isUtf8FirstByte] + rw [isValid_iff_isUTF8FirstByte] fun_cases isValid s p with | case1 h => simp_all only [decide_eq_true_eq, exists_true_left, iff_or_self] @@ -899,29 +894,29 @@ theorem Pos.Raw.isValid_iff_isSome_utf8DecodeChar? {s : String} {p : Pos.Raw} : simp · exact Or.inr (ByteArray.isSome_utf8DecodeChar?_append h _) · simp [endPos_push] - · refine isValid_iff_isUtf8FirstByte.2 (Or.inr ?_) + · refine isValid_iff_isUTF8FirstByte.2 (Or.inr ?_) obtain ⟨c, hc⟩ := Option.isSome_iff_exists.1 h refine ⟨?_, ?_⟩ · have := ByteArray.le_size_of_utf8DecodeChar?_eq_some hc have := c.utf8Size_pos simp only [lt_iff, byteIdx_endPos, gt_iff_lt, ← size_bytes] omega - · rw [getUtf8Byte] - exact ByteArray.isUtf8FirstByte_of_isSome_utf8DecodeChar? h + · rw [getUTF8Byte] + exact ByteArray.isUTF8FirstByte_of_isSome_utf8DecodeChar? h -theorem _root_.ByteArray.IsValidUtf8.isUtf8FirstByte_getElem_zero {b : ByteArray} - (h : b.IsValidUtf8) (h₀ : 0 < b.size) : b[0].IsUtf8FirstByte := by +theorem _root_.ByteArray.IsValidUTF8.isUTF8FirstByte_getElem_zero {b : ByteArray} + (h : b.IsValidUTF8) (h₀ : 0 < b.size) : b[0].IsUTF8FirstByte := by rcases h with ⟨m, rfl⟩ have : m ≠ [] := by rintro rfl simp at h₀ conv => congr; congr; rw [← List.cons_head_tail this, ← List.singleton_append, List.utf8Encode_append] rw [ByteArray.getElem_append_left] - · exact List.isUtf8FirstByte_getElem_utf8Encode_singleton.2 rfl + · exact List.isUTF8FirstByte_getElem_utf8Encode_singleton.2 rfl · simp [List.utf8Encode_singleton, Char.utf8Size_pos] -theorem isUtf8FirstByte_getUtf8Byte_zero {b : String} {h} : (b.getUtf8Byte 0 h).IsUtf8FirstByte := - b.isValidUtf8.isUtf8FirstByte_getElem_zero _ +theorem isUTF8FirstByte_getUTF8Byte_zero {b : String} {h} : (b.getUTF8Byte 0 h).IsUTF8FirstByte := + b.isValidUTF8.isUTF8FirstByte_getElem_zero _ protected theorem Pos.Raw.le_trans {a b c : Pos.Raw} : a ≤ b → b ≤ c → a ≤ c := by simpa [le_iff] using Nat.le_trans @@ -929,43 +924,43 @@ protected theorem Pos.Raw.le_trans {a b c : Pos.Raw} : a ≤ b → b ≤ c → a protected theorem Pos.Raw.lt_of_lt_of_le {a b c : Pos.Raw} : a < b → b ≤ c → a < c := by simpa [le_iff, lt_iff] using Nat.lt_of_lt_of_le -theorem Pos.Raw.isValidUtf8_extract_iff {s : String} (p₁ p₂ : Pos.Raw) (hle : p₁ ≤ p₂) (hle' : p₂ ≤ s.endPos) : - (s.bytes.extract p₁.byteIdx p₂.byteIdx).IsValidUtf8 ↔ p₁ = p₂ ∨ (p₁.IsValid s ∧ p₂.IsValid s) := by +theorem Pos.Raw.isValidUTF8_extract_iff {s : String} (p₁ p₂ : Pos.Raw) (hle : p₁ ≤ p₂) (hle' : p₂ ≤ s.endPos) : + (s.bytes.extract p₁.byteIdx p₂.byteIdx).IsValidUTF8 ↔ p₁ = p₂ ∨ (p₁.IsValid s ∧ p₂.IsValid s) := by have hle'' : p₂.byteIdx ≤ s.bytes.size := by simpa [le_iff] using hle' refine ⟨fun h => Classical.or_iff_not_imp_left.2 (fun h' => ?_), ?_⟩ · have hlt : p₁ < p₂ := by simp_all [le_iff, lt_iff, Pos.Raw.ext_iff] omega have h₁ : p₁.IsValid s := by - rw [isValid_iff_isUtf8FirstByte] + rw [isValid_iff_isUTF8FirstByte] refine Or.inr ⟨Pos.Raw.lt_of_lt_of_le hlt hle', ?_⟩ have hlt' : 0 < p₂.byteIdx - p₁.byteIdx := by simp [lt_iff] at hlt omega - have := h.isUtf8FirstByte_getElem_zero + have := h.isUTF8FirstByte_getElem_zero simp only [ByteArray.size_extract, Nat.min_eq_left hle'', hlt', ByteArray.getElem_extract, Nat.add_zero] at this - simp [getUtf8Byte, this trivial] + simp [getUTF8Byte, this trivial] refine ⟨h₁, ⟨hle', ?_⟩⟩ rw [ByteArray.extract_eq_extract_append_extract p₁.byteIdx (by simp) hle] - exact h₁.isValidUtf8_extract_zero.append h + exact h₁.isValidUTF8_extract_zero.append h · refine fun h => h.elim (by rintro rfl; simp) (fun ⟨h₁, h₂⟩ => ?_) - let t : String := ⟨_, h₂.isValidUtf8_extract_zero⟩ + let t : String := ⟨_, h₂.isValidUTF8_extract_zero⟩ have htb : t.bytes = s.bytes.extract 0 p₂.byteIdx := rfl have ht : p₁.IsValid t := by refine ⟨?_, ?_⟩ · simpa [le_iff, t, Nat.min_eq_left hle'', ← size_bytes] - · simpa [htb, ByteArray.extract_extract, Nat.min_eq_left (le_iff.1 hle)] using h₁.isValidUtf8_extract_zero + · simpa [htb, ByteArray.extract_extract, Nat.min_eq_left (le_iff.1 hle)] using h₁.isValidUTF8_extract_zero simpa [htb, ByteArray.extract_extract, Nat.zero_add, Nat.min_eq_left hle'', ← size_bytes] - using ht.isValidUtf8_extract_utf8ByteSize + using ht.isValidUTF8_extract_utf8ByteSize -theorem Pos.Raw.isValid_iff_isValidUtf8_extract_zero {s : String} {p : Pos.Raw} : - p.IsValid s ↔ p ≤ s.endPos ∧ (s.bytes.extract 0 p.byteIdx).IsValidUtf8 := +theorem Pos.Raw.isValid_iff_isValidUTF8_extract_zero {s : String} {p : Pos.Raw} : + p.IsValid s ↔ p ≤ s.endPos ∧ (s.bytes.extract 0 p.byteIdx).IsValidUTF8 := ⟨fun ⟨h₁, h₂⟩ => ⟨h₁, h₂⟩, fun ⟨h₁, h₂⟩ => ⟨h₁, h₂⟩⟩ -theorem Pos.Raw.isValid_iff_isValidUtf8_extract_utf8ByteSize {s : String} {p : Pos.Raw} : - p.IsValid s ↔ p ≤ s.endPos ∧ (s.bytes.extract p.byteIdx s.utf8ByteSize).IsValidUtf8 := by - refine ⟨fun h => ⟨h.le_endPos, h.isValidUtf8_extract_utf8ByteSize⟩, fun ⟨h₁, h₂⟩ => ?_⟩ - rw [← byteIdx_endPos, isValidUtf8_extract_iff _ _ h₁ (by simp [le_iff])] at h₂ +theorem Pos.Raw.isValid_iff_isValidUTF8_extract_utf8ByteSize {s : String} {p : Pos.Raw} : + p.IsValid s ↔ p ≤ s.endPos ∧ (s.bytes.extract p.byteIdx s.utf8ByteSize).IsValidUTF8 := by + refine ⟨fun h => ⟨h.le_endPos, h.isValidUTF8_extract_utf8ByteSize⟩, fun ⟨h₁, h₂⟩ => ?_⟩ + rw [← byteIdx_endPos, isValidUTF8_extract_iff _ _ h₁ (by simp [le_iff])] at h₂ obtain (rfl|h₂) := h₂ · simp · exact h₂.1 @@ -1000,12 +995,12 @@ def endValidPos (s : String) : s.ValidPos where offset := s.endPos isValid := by simp -theorem ValidPos.isValidUtf8_extract {s : String} (pos₁ pos₂ : s.ValidPos) : - (s.bytes.extract pos₁.offset.byteIdx pos₂.offset.byteIdx).IsValidUtf8 := by +theorem ValidPos.isValidUTF8_extract {s : String} (pos₁ pos₂ : s.ValidPos) : + (s.bytes.extract pos₁.offset.byteIdx pos₂.offset.byteIdx).IsValidUTF8 := by by_cases h : pos₁.offset ≤ pos₂.offset - · exact (Pos.Raw.isValidUtf8_extract_iff _ _ h pos₂.isValid.le_endPos).2 (Or.inr ⟨pos₁.isValid, pos₂.isValid⟩) + · exact (Pos.Raw.isValidUTF8_extract_iff _ _ h pos₂.isValid.le_endPos).2 (Or.inr ⟨pos₁.isValid, pos₂.isValid⟩) · rw [ByteArray.extract_eq_empty_iff.2] - · exact ByteArray.isValidUtf8_empty + · exact ByteArray.isValidUTF8_empty · rw [Nat.min_eq_left] · rw [Pos.Raw.le_iff] at h omega @@ -1063,8 +1058,8 @@ Accesses the indicated byte in the UTF-8 encoding of a string slice. At runtime, this function is implemented by efficient, constant-time code. -/ @[inline, expose] -def Slice.getUtf8Byte (s : Slice) (p : Pos.Raw) (h : p < s.utf8ByteSize) : UInt8 := - s.str.getUtf8Byte (s.startInclusive.offset + p) (by +def Slice.getUTF8Byte (s : Slice) (p : Pos.Raw) (h : p < s.utf8ByteSize) : UInt8 := + s.str.getUTF8Byte (s.startInclusive.offset + p) (by have := s.endExclusive.isValid.le_endPos simp only [Pos.Raw.lt_iff, byteIdx_utf8ByteSize, Pos.Raw.le_iff, byteIdx_endPos, Pos.Raw.byteIdx_add] at * omega) @@ -1074,16 +1069,16 @@ Accesses the indicated byte in the UTF-8 encoding of the string slice, or panics is out-of-bounds. -/ @[expose] -def Slice.getUtf8Byte! (s : Slice) (p : String.Pos.Raw) : UInt8 := +def Slice.getUTF8Byte! (s : Slice) (p : String.Pos.Raw) : UInt8 := if h : p < s.utf8ByteSize then - s.getUtf8Byte p h + s.getUTF8Byte p h else panic! "String slice access is out of bounds." @[extern "lean_string_utf8_extract"] def ValidPos.extract {s : @& String} (b e : @& s.ValidPos) : String where bytes := s.bytes.extract b.offset.byteIdx e.offset.byteIdx - isValidUtf8 := b.isValidUtf8_extract e + isValidUTF8 := b.isValidUTF8_extract e /-- Creates a `String` from a `String.Slice` by copying the bytes. -/ @[inline] @@ -1103,17 +1098,17 @@ theorem Slice.utf8ByteSize_copy {s : Slice} : theorem Slice.endPos_copy {s : Slice} : s.copy.endPos = s.utf8ByteSize := by simp [Pos.Raw.ext_iff] -theorem Slice.getUtf8Byte_eq_getUtf8Byte_copy {s : Slice} {p : Pos.Raw} {h : p < s.utf8ByteSize} : - s.getUtf8Byte p h = s.copy.getUtf8Byte p (by simpa) := by - simp [getUtf8Byte, String.getUtf8Byte, bytes_copy, ByteArray.getElem_extract] +theorem Slice.getUTF8Byte_eq_getUTF8Byte_copy {s : Slice} {p : Pos.Raw} {h : p < s.utf8ByteSize} : + s.getUTF8Byte p h = s.copy.getUTF8Byte p (by simpa) := by + simp [getUTF8Byte, String.getUTF8Byte, bytes_copy, ByteArray.getElem_extract] -theorem Slice.getUtf8Byte_copy {s : Slice} {p : Pos.Raw} {h} : - s.copy.getUtf8Byte p h = s.getUtf8Byte p (by simpa using h) := by - rw [getUtf8Byte_eq_getUtf8Byte_copy] +theorem Slice.getUTF8Byte_copy {s : Slice} {p : Pos.Raw} {h} : + s.copy.getUTF8Byte p h = s.getUTF8Byte p (by simpa using h) := by + rw [getUTF8Byte_eq_getUTF8Byte_copy] -theorem Slice.isUtf8FirstByte_utf8ByteAt_zero {s : Slice} {h} : - (s.getUtf8Byte 0 h).IsUtf8FirstByte := by - simpa [getUtf8Byte_eq_getUtf8Byte_copy] using s.copy.isUtf8FirstByte_getUtf8Byte_zero +theorem Slice.isUTF8FirstByte_utf8ByteAt_zero {s : Slice} {h} : + (s.getUTF8Byte 0 h).IsUTF8FirstByte := by + simpa [getUTF8Byte_eq_getUTF8Byte_copy] using s.copy.isUTF8FirstByte_getUTF8Byte_zero @[simp] theorem Pos.Raw.add_zero {p : Pos.Raw} : p + 0 = p := by @@ -1127,7 +1122,7 @@ theorem Pos.Raw.isValid_copy_iff {s : Slice} {p : Pos.Raw} : · have := s.startInclusive_le_endExclusive simp_all only [Slice.endPos_copy, le_iff, Slice.byteIdx_utf8ByteSize] rw [Slice.bytes_copy, ByteArray.extract_extract, Nat.add_zero, Nat.min_eq_left (by omega)] at h₂ - rw [← byteIdx_add, Pos.Raw.isValidUtf8_extract_iff] at h₂ + rw [← byteIdx_add, Pos.Raw.isValidUTF8_extract_iff] at h₂ · rcases h₂ with (h₂|⟨-, h₂⟩) · rw [← h₂] exact s.startInclusive.isValid @@ -1140,7 +1135,7 @@ theorem Pos.Raw.isValid_copy_iff {s : Slice} {p : Pos.Raw} : · have := s.startInclusive_le_endExclusive simp_all only [le_iff, Slice.byteIdx_utf8ByteSize] rw [Slice.bytes_copy, ByteArray.extract_extract, Nat.add_zero, Nat.min_eq_left (by omega)] - rw [← byteIdx_add, Pos.Raw.isValidUtf8_extract_iff] + rw [← byteIdx_add, Pos.Raw.isValidUTF8_extract_iff] · exact Or.inr ⟨s.startInclusive.isValid, h₂⟩ · simp [le_iff] · have := s.endExclusive.isValid.le_endPos @@ -1186,22 +1181,22 @@ def Slice.endPos (s : Slice) : s.Pos where @[simp] theorem ByteString.Slice.offset_endPos {s : Slice} : s.endPos.offset = s.utf8ByteSize := (rfl) -theorem Pos.Raw.isValidForSlice_iff_isUtf8FirstByte {s : Slice} {p : Pos.Raw} : - p.IsValidForSlice s ↔ (p = s.utf8ByteSize ∨ (∃ (h : p < s.utf8ByteSize), (s.getUtf8Byte p h).IsUtf8FirstByte)) := by - simp [← isValid_copy_iff, isValid_iff_isUtf8FirstByte, Slice.getUtf8Byte_copy] +theorem Pos.Raw.isValidForSlice_iff_isUTF8FirstByte {s : Slice} {p : Pos.Raw} : + p.IsValidForSlice s ↔ (p = s.utf8ByteSize ∨ (∃ (h : p < s.utf8ByteSize), (s.getUTF8Byte p h).IsUTF8FirstByte)) := by + simp [← isValid_copy_iff, isValid_iff_isUTF8FirstByte, Slice.getUTF8Byte_copy] /-- Efficiently checks whether a position is at a UTF-8 character boundary of the slice `s`. -/ @[expose] def Pos.Raw.isValidForSlice (s : Slice) (p : Pos.Raw) : Bool := if h : p < s.utf8ByteSize then - (s.getUtf8Byte p h).IsUtf8FirstByte + (s.getUTF8Byte p h).IsUTF8FirstByte else p = s.utf8ByteSize @[simp] theorem Pos.Raw.isValidForSlice_eq_true_iff {s : Slice} {p : Pos.Raw} : p.isValidForSlice s = true ↔ p.IsValidForSlice s := by - rw [isValidForSlice_iff_isUtf8FirstByte] + rw [isValidForSlice_iff_isUTF8FirstByte] fun_cases isValidForSlice with | case1 h => simp_all only [decide_eq_true_eq, exists_true_left, iff_or_self] @@ -1257,22 +1252,22 @@ theorem Pos.Raw.isValidForSlice_iff_isSome_utf8DecodeChar? {s : Slice} {p : Pos. simp only [size_bytes, Slice.utf8ByteSize_copy, ge_iff_le] omega · simp - · rw [isValidForSlice_iff_isUtf8FirstByte] + · rw [isValidForSlice_iff_isUTF8FirstByte] rintro (rfl|⟨h₁, h₂⟩) · simp - · exact Or.inr ⟨h₁, ByteArray.isUtf8FirstByte_of_isSome_utf8DecodeChar? h₂⟩ + · exact Or.inr ⟨h₁, ByteArray.isUTF8FirstByte_of_isSome_utf8DecodeChar? h₂⟩ /-- Returns the byte at a position in a slice that is not the end position. -/ @[inline, expose] def Slice.Pos.byte {s : Slice} (pos : s.Pos) (h : pos ≠ s.endPos) : UInt8 := - s.getUtf8Byte pos.offset (by + s.getUTF8Byte pos.offset (by have := pos.isValidForSlice.le_utf8ByteSize simp_all [Pos.ext_iff, String.Pos.Raw.ext_iff, Pos.Raw.le_iff, Pos.Raw.lt_iff] omega) -theorem Slice.Pos.isUtf8FirstByte_byte {s : Slice} {pos : s.Pos} {h : pos ≠ s.endPos} : - (pos.byte h).IsUtf8FirstByte := - ((Pos.Raw.isValidForSlice_iff_isUtf8FirstByte.1 pos.isValidForSlice).elim (fun h' => (h (Pos.ext h')).elim) (·.2)) +theorem Slice.Pos.isUTF8FirstByte_byte {s : Slice} {pos : s.Pos} {h : pos ≠ s.endPos} : + (pos.byte h).IsUTF8FirstByte := + ((Pos.Raw.isValidForSlice_iff_isUTF8FirstByte.1 pos.isValidForSlice).elim (fun h' => (h (Pos.ext h')).elim) (·.2)) /-- Given a valid position on a slice `s`, obtains the corresponding valid position on the underlying string `s.str`. -/ @@ -1687,7 +1682,7 @@ theorem Slice.Pos.get_eq_get_toCopy {s : Slice} {pos : s.Pos} {h} : theorem Slice.Pos.byte_toCopy {s : Slice} {pos : s.Pos} (h) : pos.toCopy.byte h = pos.byte (by rintro rfl; simp at h) := by rw [ValidPos.byte, Slice.Pos.byte, Slice.Pos.byte] - simp [getUtf8Byte, String.getUtf8Byte, bytes_copy, ByteArray.getElem_extract] + simp [getUTF8Byte, String.getUTF8Byte, bytes_copy, ByteArray.getElem_extract] theorem Slice.Pos.byte_eq_byte_toCopy {s : Slice} {pos : s.Pos} {h} : pos.byte h = pos.toCopy.byte (ne_of_apply_ne ValidPos.ofCopy (by simp [h])) := @@ -1745,14 +1740,14 @@ theorem Slice.Pos.copy_eq_append_get {s : Slice} {pos : s.Pos} (h : pos ≠ s.en rw [append_assoc, ← ht₂, ← copy_eq_copy_replaceEnd] theorem Slice.Pos.utf8ByteSize_byte {s : Slice} {pos : s.Pos} {h : pos ≠ s.endPos} : - (pos.byte h).utf8ByteSize pos.isUtf8FirstByte_byte = ⟨(pos.get h).utf8Size⟩ := by - simp [getUtf8Byte, byte, String.getUtf8Byte, get_eq_utf8DecodeChar, ByteArray.utf8Size_utf8DecodeChar] + (pos.byte h).utf8ByteSize pos.isUTF8FirstByte_byte = ⟨(pos.get h).utf8Size⟩ := by + simp [getUTF8Byte, byte, String.getUTF8Byte, get_eq_utf8DecodeChar, ByteArray.utf8Size_utf8DecodeChar] /-- Advances a valid position on a slice to the next valid position, given a proof that the position is not the past-the-end position, which guarantees that such a position exists. -/ @[expose] def Slice.Pos.next {s : Slice} (pos : s.Pos) (h : pos ≠ s.endPos) : s.Pos where - offset := pos.offset + (pos.byte h).utf8ByteSize pos.isUtf8FirstByte_byte + offset := pos.offset + (pos.byte h).utf8ByteSize pos.isUTF8FirstByte_byte isValidForSlice := by obtain ⟨t₁, t₂, ht, ht'⟩ := copy_eq_append_get h replace ht' : pos.offset = ⟨t₁.utf8ByteSize⟩ := Eq.symm (String.Pos.Raw.ext ht') @@ -1802,13 +1797,13 @@ def Slice.Pos.prevAux {s : Slice} (pos : s.Pos) (h : pos ≠ s.startPos) : Strin omega) where go (off : Nat) (h₁ : ⟨off⟩ < s.utf8ByteSize) : String.Pos.Raw := - if hbyte : (s.getUtf8Byte ⟨off⟩ h₁).IsUtf8FirstByte then + if hbyte : (s.getUTF8Byte ⟨off⟩ h₁).IsUTF8FirstByte then ⟨off⟩ else have : 0 ≠ off := by intro h obtain hoff : (⟨off⟩ : String.Pos.Raw) = 0 := by simpa [String.Pos.Raw.ext_iff] using h.symm - simp [hoff, s.isUtf8FirstByte_utf8ByteAt_zero] at hbyte + simp [hoff, s.isUTF8FirstByte_utf8ByteAt_zero] at hbyte match off with | 0 => False.elim (by contradiction) | off + 1 => go off (by simp [Pos.Raw.lt_iff] at ⊢ h₁; omega) @@ -1820,12 +1815,12 @@ theorem Pos.Raw.isValidForSlice_prevAuxGo {s : Slice} (off : Nat) (h₁ : ⟨off | zero => rw [Slice.Pos.prevAux.go] split - · exact Pos.Raw.isValidForSlice_iff_isUtf8FirstByte.2 (Or.inr ⟨_, ‹_›⟩) + · exact Pos.Raw.isValidForSlice_iff_isUTF8FirstByte.2 (Or.inr ⟨_, ‹_›⟩) · simpa using elim | succ off ih => rw [Slice.Pos.prevAux.go] split - · exact Pos.Raw.isValidForSlice_iff_isUtf8FirstByte.2 (Or.inr ⟨_, ‹_›⟩) + · exact Pos.Raw.isValidForSlice_iff_isUTF8FirstByte.2 (Or.inr ⟨_, ‹_›⟩) · simpa using ih _ where elim {P : Pos.Raw → Prop} {h : False} : P h.elim := h.elim @@ -1965,8 +1960,8 @@ def Slice.findNextPos (offset : String.Pos.Raw) (s : Slice) (_h : offset < s.utf where go (offset : String.Pos.Raw) : s.Pos := if h : offset < s.utf8ByteSize then - if h' : (s.getUtf8Byte offset h).IsUtf8FirstByte then - s.pos offset (Pos.Raw.isValidForSlice_iff_isUtf8FirstByte.2 (Or.inr ⟨_, h'⟩)) + if h' : (s.getUTF8Byte offset h).IsUTF8FirstByte then + s.pos offset (Pos.Raw.isValidForSlice_iff_isUTF8FirstByte.2 (Or.inr ⟨_, h'⟩)) else go offset.inc else diff --git a/src/Init/Data/String/Decode.lean b/src/Init/Data/String/Decode.lean index 4b58e9eee0..381e0c7471 100644 --- a/src/Init/Data/String/Decode.lean +++ b/src/Init/Data/String/Decode.lean @@ -982,7 +982,7 @@ end ByteArray.utf8DecodeChar? open ByteArray.utf8DecodeChar? -/- # `utf8DecodeChar?` and `validateUtf8At` -/ +/- # `utf8DecodeChar?` and `validateUTF8At` -/ @[inline, expose] public def ByteArray.utf8DecodeChar? (bytes : ByteArray) (i : Nat) : Option Char := @@ -1006,7 +1006,7 @@ public def ByteArray.utf8DecodeChar? (bytes : ByteArray) (i : Nat) : Option Char else none @[inline, expose] -public def ByteArray.validateUtf8At (bytes : ByteArray) (i : Nat) : Bool := +public def ByteArray.validateUTF8At (bytes : ByteArray) (i : Nat) : Bool := if h₀ : i < bytes.size then match h : parseFirstByte bytes[i] with | .invalid => false @@ -1212,9 +1212,9 @@ public theorem String.toByteArray_utf8EncodeChar_of_utf8DecodeChar?_eq_some {b : rw [← assemble₄_eq_some_iff_utf8EncodeChar_eq] exact ⟨this, h⟩ -public theorem ByteArray.validateUtf8At_eq_isSome_utf8DecodeChar? {b : ByteArray} {i : Nat} : - b.validateUtf8At i = (b.utf8DecodeChar? i).isSome := by - simp only [validateUtf8At, utf8DecodeChar?] +public theorem ByteArray.validateUTF8At_eq_isSome_utf8DecodeChar? {b : ByteArray} {i : Nat} : + b.validateUTF8At i = (b.utf8DecodeChar? i).isSome := by + simp only [validateUTF8At, utf8DecodeChar?] split · split <;> (try split) <;> simp [verify₁_eq_isSome_assemble₁, verify₂_eq_isSome_assemble₂, verify₃_eq_isSome_assemble₃, verify₄_eq_isSome_assemble₄] @@ -1285,9 +1285,9 @@ public theorem ByteArray.lt_size_of_isSome_utf8DecodeChar? {b : ByteArray} {i : have := c.utf8Size_pos omega -public theorem ByteArray.lt_size_of_validateUtf8At {b : ByteArray} {i : Nat} : - validateUtf8At b i = true → i < b.size := - validateUtf8At_eq_isSome_utf8DecodeChar? ▸ lt_size_of_isSome_utf8DecodeChar? +public theorem ByteArray.lt_size_of_validateUTF8At {b : ByteArray} {i : Nat} : + validateUTF8At b i = true → i < b.size := + validateUTF8At_eq_isSome_utf8DecodeChar? ▸ lt_size_of_isSome_utf8DecodeChar? public theorem ByteArray.utf8DecodeChar?_append_eq_some {b : ByteArray} {i : Nat} {c : Char} (h : utf8DecodeChar? b i = some c) (b' : ByteArray) : utf8DecodeChar? (b ++ b') i = some c := by @@ -1366,7 +1366,7 @@ public theorem List.utf8DecodeChar_utf8Encode_cons {l : List Char} {c : Char} {h ByteArray.utf8DecodeChar (c::l).utf8Encode 0 h = c := by simp [ByteArray.utf8DecodeChar] -/-! # `UInt8.IsUtf8FirstByte` -/ +/-! # `UInt8.IsUTF8FirstByte` -/ namespace UInt8 @@ -1375,27 +1375,27 @@ Predicate for whether a byte can appear as the first byte of the UTF-8 encoding scalar value. -/ @[expose] -public def IsUtf8FirstByte (c : UInt8) : Prop := +public def IsUTF8FirstByte (c : UInt8) : Prop := c &&& 0x80 = 0 ∨ c &&& 0xe0 = 0xc0 ∨ c &&& 0xf0 = 0xe0 ∨ c &&& 0xf8 = 0xf0 -public instance {c : UInt8} : Decidable c.IsUtf8FirstByte := +public instance {c : UInt8} : Decidable c.IsUTF8FirstByte := inferInstanceAs <| Decidable (c &&& 0x80 = 0 ∨ c &&& 0xe0 = 0xc0 ∨ c &&& 0xf0 = 0xe0 ∨ c &&& 0xf8 = 0xf0) -theorem isUtf8FirstByte_iff_parseFirstByte_ne_invalid {c : UInt8} : - c.IsUtf8FirstByte ↔ parseFirstByte c ≠ FirstByte.invalid := by - fun_cases parseFirstByte with simp_all [IsUtf8FirstByte] +theorem isUTF8FirstByte_iff_parseFirstByte_ne_invalid {c : UInt8} : + c.IsUTF8FirstByte ↔ parseFirstByte c ≠ FirstByte.invalid := by + fun_cases parseFirstByte with simp_all [IsUTF8FirstByte] @[simp] -public theorem isUtf8FirstByte_getElem_utf8EncodeChar {c : Char} {i : Nat} {hi : i < (String.utf8EncodeChar c).length} : - (String.utf8EncodeChar c)[i].IsUtf8FirstByte ↔ i = 0 := by +public theorem isUTF8FirstByte_getElem_utf8EncodeChar {c : Char} {i : Nat} {hi : i < (String.utf8EncodeChar c).length} : + (String.utf8EncodeChar c)[i].IsUTF8FirstByte ↔ i = 0 := by obtain (rfl|hi₀) := Nat.eq_zero_or_pos i - · simp only [isUtf8FirstByte_iff_parseFirstByte_ne_invalid, iff_true] + · simp only [isUTF8FirstByte_iff_parseFirstByte_ne_invalid, iff_true] match h : c.utf8Size, c.utf8Size_pos, c.utf8Size_le_four with | 1, _, _ => simp [parseFirstByte_utf8EncodeChar_eq_done h] | 2, _, _ => simp [parseFirstByte_utf8EncodeChar_eq_oneMore h] | 3, _, _ => simp [parseFirstByte_utf8EncodeChar_eq_twoMore h] | 4, _, _ => simp [parseFirstByte_utf8EncodeChar_eq_threeMore h] - · simp only [isUtf8FirstByte_iff_parseFirstByte_ne_invalid, ne_eq, Nat.ne_of_lt' hi₀, iff_false, + · simp only [isUTF8FirstByte_iff_parseFirstByte_ne_invalid, ne_eq, Nat.ne_of_lt' hi₀, iff_false, Classical.not_not] apply parseFirstByte_eq_invalid_of_isInvalidContinuationByte_eq_false simp only [String.length_utf8EncodeChar] at hi @@ -1408,12 +1408,12 @@ public theorem isUtf8FirstByte_getElem_utf8EncodeChar {c : Char} {i : Nat} {hi : | 4, _, _, 2, _, _ => simp [isInvalidContinuationByte_getElem_utf8EncodeChar_two_of_utf8Size_eq_four h] | 4, _, _, 3, _, _ => simp [isInvalidContinuationByte_getElem_utf8EncodeChar_three_of_utf8Size_eq_four h] -public theorem isUtf8FirstByte_getElem_zero_utf8EncodeChar {c : Char} : - ((String.utf8EncodeChar c)[0]'(by simp [c.utf8Size_pos])).IsUtf8FirstByte := by +public theorem isUTF8FirstByte_getElem_zero_utf8EncodeChar {c : Char} : + ((String.utf8EncodeChar c)[0]'(by simp [c.utf8Size_pos])).IsUTF8FirstByte := by simp @[expose] -public def utf8ByteSize (c : UInt8) (_h : c.IsUtf8FirstByte) : String.Pos.Raw := +public def utf8ByteSize (c : UInt8) (_h : c.IsUTF8FirstByte) : String.Pos.Raw := if c &&& 0x80 = 0 then ⟨1⟩ else if c &&& 0xe0 = 0xc0 then @@ -1430,7 +1430,7 @@ def _root_.ByteArray.utf8DecodeChar?.FirstByte.utf8ByteSize : FirstByte → Stri | .twoMore => ⟨3⟩ | .threeMore => ⟨4⟩ -theorem utf8ByteSize_eq_utf8ByteSize_parseFirstByte {c : UInt8} {h : c.IsUtf8FirstByte} : +theorem utf8ByteSize_eq_utf8ByteSize_parseFirstByte {c : UInt8} {h : c.IsUTF8FirstByte} : c.utf8ByteSize h = (parseFirstByte c).utf8ByteSize := by simp only [utf8ByteSize, FirstByte.utf8ByteSize, parseFirstByte, beq_iff_eq] split @@ -1447,28 +1447,28 @@ theorem utf8ByteSize_eq_utf8ByteSize_parseFirstByte {c : UInt8} {h : c.IsUtf8Fir end UInt8 -public theorem ByteArray.isUtf8FirstByte_getElem_zero_utf8EncodeChar_append {c : Char} {b : ByteArray} : - (((String.utf8EncodeChar c).toByteArray ++ b)[0]'(by simp; have := c.utf8Size_pos; omega)).IsUtf8FirstByte := by +public theorem ByteArray.isUTF8FirstByte_getElem_zero_utf8EncodeChar_append {c : Char} {b : ByteArray} : + (((String.utf8EncodeChar c).toByteArray ++ b)[0]'(by simp; have := c.utf8Size_pos; omega)).IsUTF8FirstByte := by rw [ByteArray.getElem_append_left (by simp [c.utf8Size_pos]), - List.getElem_toByteArray, UInt8.isUtf8FirstByte_getElem_utf8EncodeChar] + List.getElem_toByteArray, UInt8.isUTF8FirstByte_getElem_utf8EncodeChar] -public theorem ByteArray.isUtf8FirstByte_of_isSome_utf8DecodeChar? {b : ByteArray} {i : Nat} - (h : (utf8DecodeChar? b i).isSome) : (b[i]'(lt_size_of_isSome_utf8DecodeChar? h)).IsUtf8FirstByte := by +public theorem ByteArray.isUTF8FirstByte_of_isSome_utf8DecodeChar? {b : ByteArray} {i : Nat} + (h : (utf8DecodeChar? b i).isSome) : (b[i]'(lt_size_of_isSome_utf8DecodeChar? h)).IsUTF8FirstByte := by rw [utf8DecodeChar?_eq_utf8DecodeChar?_extract] at h - suffices ((b.extract i b.size)[0]'(lt_size_of_isSome_utf8DecodeChar? h)).IsUtf8FirstByte by + suffices ((b.extract i b.size)[0]'(lt_size_of_isSome_utf8DecodeChar? h)).IsUTF8FirstByte by simpa [ByteArray.getElem_extract, Nat.add_zero] using this obtain ⟨c, hc⟩ := Option.isSome_iff_exists.1 h conv => congr; congr; rw [eq_of_utf8DecodeChar?_eq_some hc] - exact isUtf8FirstByte_getElem_zero_utf8EncodeChar_append + exact isUTF8FirstByte_getElem_zero_utf8EncodeChar_append -public theorem ByteArray.isUtf8FirstByte_of_validateUtf8At {b : ByteArray} {i : Nat} : - (h : validateUtf8At b i = true) → (b[i]'(lt_size_of_validateUtf8At h)).IsUtf8FirstByte := by - simp only [validateUtf8At_eq_isSome_utf8DecodeChar?] - exact isUtf8FirstByte_of_isSome_utf8DecodeChar? +public theorem ByteArray.isUTF8FirstByte_of_validateUTF8At {b : ByteArray} {i : Nat} : + (h : validateUTF8At b i = true) → (b[i]'(lt_size_of_validateUTF8At h)).IsUTF8FirstByte := by + simp only [validateUTF8At_eq_isSome_utf8DecodeChar?] + exact isUTF8FirstByte_of_isSome_utf8DecodeChar? theorem Char.byteIdx_utf8ByteSize_getElem_utf8EncodeChar {c : Char} : (((String.utf8EncodeChar c)[0]'(by simp [c.utf8Size_pos])).utf8ByteSize - UInt8.isUtf8FirstByte_getElem_zero_utf8EncodeChar).byteIdx = c.utf8Size := by + UInt8.isUTF8FirstByte_getElem_zero_utf8EncodeChar).byteIdx = c.utf8Size := by rw [UInt8.utf8ByteSize_eq_utf8ByteSize_parseFirstByte] obtain (hc|hc|hc|hc) := c.utf8Size_eq · rw [parseFirstByte_utf8EncodeChar_eq_done hc, FirstByte.utf8ByteSize, hc] @@ -1478,7 +1478,7 @@ theorem Char.byteIdx_utf8ByteSize_getElem_utf8EncodeChar {c : Char} : public theorem ByteArray.utf8Size_utf8DecodeChar {b : ByteArray} {i} {h} : (utf8DecodeChar b i h).utf8Size = - ((b[i]'(lt_size_of_isSome_utf8DecodeChar? h)).utf8ByteSize (isUtf8FirstByte_of_isSome_utf8DecodeChar? h)).byteIdx := by + ((b[i]'(lt_size_of_isSome_utf8DecodeChar? h)).utf8ByteSize (isUTF8FirstByte_of_isSome_utf8DecodeChar? h)).byteIdx := by rw [← Char.byteIdx_utf8ByteSize_getElem_utf8EncodeChar] simp only [List.getElem_eq_getElem_toByteArray, utf8EncodeChar_utf8DecodeChar] simp [ByteArray.getElem_extract] diff --git a/src/Init/Data/String/Pattern/Basic.lean b/src/Init/Data/String/Pattern/Basic.lean index 904e4e2a30..f14484195c 100644 --- a/src/Init/Data/String/Pattern/Basic.lean +++ b/src/Init/Data/String/Pattern/Basic.lean @@ -88,7 +88,7 @@ where have hr := by simp [Pos.Raw.le_iff] at h h2 ⊢ omega - if lhs.getUtf8Byte (lstart + curr) hl == rhs.getUtf8Byte (rstart + curr) hr then + if lhs.getUTF8Byte (lstart + curr) hl == rhs.getUTF8Byte (rstart + curr) hr then go curr.inc else false diff --git a/src/Init/Data/String/Pattern/String.lean b/src/Init/Data/String/Pattern/String.lean index 2ae54796cc..66d598b12f 100644 --- a/src/Init/Data/String/Pattern/String.lean +++ b/src/Init/Data/String/Pattern/String.lean @@ -39,16 +39,16 @@ partial def buildTable (pat : Slice) : Array String.Pos.Raw := where go (pos : String.Pos.Raw) (table : Array String.Pos.Raw) := if h : pos < pat.utf8ByteSize then - let patByte := pat.getUtf8Byte pos h + let patByte := pat.getUTF8Byte pos h let distance := computeDistance table[table.size - 1]! patByte table - let distance := if patByte = pat.getUtf8Byte! distance then distance.inc else distance + let distance := if patByte = pat.getUTF8Byte! distance then distance.inc else distance go pos.inc (table.push distance) else table computeDistance (distance : String.Pos.Raw) (patByte : UInt8) (table : Array String.Pos.Raw) : String.Pos.Raw := - if distance > 0 && patByte != pat.getUtf8Byte! distance then + if distance > 0 && patByte != pat.getUTF8Byte! distance then computeDistance table[distance.byteIdx - 1]! patByte table else distance @@ -62,7 +62,7 @@ def iter (s : Slice) (pat : Slice) : Std.Iter (α := ForwardSliceSearcher s) (Se partial def backtrackIfNecessary (pat : Slice) (table : Array String.Pos.Raw) (stackByte : UInt8) (needlePos : String.Pos.Raw) : String.Pos.Raw := - if needlePos != 0 && stackByte != pat.getUtf8Byte! needlePos then + if needlePos != 0 && stackByte != pat.getUTF8Byte! needlePos then backtrackIfNecessary pat table stackByte table[needlePos.byteIdx - 1]! else needlePos @@ -95,9 +95,9 @@ instance (s : Slice) : Std.Iterators.Iterator (ForwardSliceSearcher s) Id (Searc let rec findNext (startPos : String.Pos.Raw) (currStackPos : String.Pos.Raw) (needlePos : String.Pos.Raw) (h : stackPos ≤ currStackPos) := if h1 : currStackPos < s.utf8ByteSize then - let stackByte := s.getUtf8Byte currStackPos h1 + let stackByte := s.getUTF8Byte currStackPos h1 let needlePos := backtrackIfNecessary needle table stackByte needlePos - let patByte := needle.getUtf8Byte! needlePos + let patByte := needle.getUTF8Byte! needlePos if stackByte != patByte then let nextStackPos := s.findNextPos currStackPos h1 |>.offset let res := .rejected (s.pos! startPos) (s.pos! nextStackPos) diff --git a/src/Init/Data/String/Slice.lean b/src/Init/Data/String/Slice.lean index f497c8154a..8866aaacc9 100644 --- a/src/Init/Data/String/Slice.lean +++ b/src/Init/Data/String/Slice.lean @@ -688,8 +688,8 @@ def eqIgnoreAsciiCase (s1 s2 : Slice) : Bool := where go (s1 : Slice) (s1Curr : String.Pos.Raw) (s2 : Slice) (s2Curr : String.Pos.Raw) : Bool := if h : s1Curr < s1.utf8ByteSize ∧ s2Curr < s2.utf8ByteSize then - let c1 := (s1.getUtf8Byte s1Curr h.left).toAsciiLower - let c2 := (s2.getUtf8Byte s2Curr h.right).toAsciiLower + let c1 := (s1.getUTF8Byte s1Curr h.left).toAsciiLower + let c2 := (s2.getUTF8Byte s2Curr h.right).toAsciiLower if c1 == c2 then go s1 s1Curr.inc s2 s2Curr.inc else @@ -900,12 +900,12 @@ instance [Pure m] : Std.Iterators.Iterator ByteIterator m UInt8 where ∃ h1 : it.internalState.offset < it.internalState.s.utf8ByteSize, it.internalState.s = it'.internalState.s ∧ it'.internalState.offset = it.internalState.offset.inc ∧ - it.internalState.s.getUtf8Byte it.internalState.offset h1 = out + it.internalState.s.getUTF8Byte it.internalState.offset h1 = out | .skip _ => False | .done => ¬ it.internalState.offset < it.internalState.s.utf8ByteSize step := fun ⟨s, offset⟩ => if h : offset < s.utf8ByteSize then - pure ⟨.yield ⟨s, offset.inc⟩ (s.getUtf8Byte offset h), by simp [h]⟩ + pure ⟨.yield ⟨s, offset.inc⟩ (s.getUTF8Byte offset h), by simp [h]⟩ else pure ⟨.done, by simp [h]⟩ @@ -981,7 +981,7 @@ instance [Pure m] : Std.Iterators.Iterator RevByteIterator m UInt8 where it.internalState.s = it'.internalState.s ∧ it.internalState.offset ≠ 0 ∧ it'.internalState.offset = it.internalState.offset.dec ∧ - it.internalState.s.getUtf8Byte it.internalState.offset.dec h1 = out + it.internalState.s.getUTF8Byte it.internalState.offset.dec h1 = out | .skip _ => False | .done => it.internalState.offset = 0 step := fun ⟨s, offset, hinv⟩ => @@ -994,7 +994,7 @@ instance [Pure m] : Std.Iterators.Iterator RevByteIterator m UInt8 where simp [String.Pos.Raw.le_iff, nextOffset] at hinv ⊢ omega have hiter := by simp [nextOffset, hbound, h] - pure ⟨.yield ⟨s, nextOffset, hinv⟩ (s.getUtf8Byte nextOffset hbound), hiter⟩ + pure ⟨.yield ⟨s, nextOffset, hinv⟩ (s.getUTF8Byte nextOffset hbound), hiter⟩ else pure ⟨.done, by simpa using h⟩ diff --git a/src/Init/Data/ToString/Name.lean b/src/Init/Data/ToString/Name.lean index d6952fe182..5fa3a0a34e 100644 --- a/src/Init/Data/ToString/Name.lean +++ b/src/Init/Data/ToString/Name.lean @@ -26,14 +26,14 @@ namespace Lean.Name -- If you change this, also change the corresponding function in `Init.Meta`. private partial def needsNoEscapeAsciiRest (s : String) (i : Nat) : Bool := if h : i < s.utf8ByteSize then - let c := s.getUtf8Byte ⟨i⟩ h + let c := s.getUTF8Byte ⟨i⟩ h isIdRestAscii c && needsNoEscapeAsciiRest s (i + 1) else true -- If you change this, also change the corresponding function in `Init.Meta`. @[inline] private def needsNoEscapeAscii (s : String) (h : s.utf8ByteSize > 0) : Bool := - let c := s.getUtf8Byte 0 h + let c := s.getUTF8Byte 0 h isIdFirstAscii c && needsNoEscapeAsciiRest s 1 -- If you change this, also change the corresponding function in `Init.Meta`. diff --git a/src/Init/Meta/Defs.lean b/src/Init/Meta/Defs.lean index 46f6e9e046..59470a5246 100644 --- a/src/Init/Meta/Defs.lean +++ b/src/Init/Meta/Defs.lean @@ -156,12 +156,6 @@ def isInaccessibleUserName : Name → Bool | Name.num p _ => isInaccessibleUserName p | _ => false --- FIXME: `getUtf8Byte` is in `Init.Data.String.Extra`, which causes an import cycle with --- `Init.Meta`. Moving `getUtf8Byte` up to `Init.Data.String.Basic` creates another import cycle. --- Please replace this definition with `getUtf8Byte` when the string refactor is through. -@[extern "lean_string_get_byte_fast"] -private opaque getUtf8Byte' (s : @& String) (n : Nat) (h : n < s.utf8ByteSize) : UInt8 - section ToString /-! @@ -177,14 +171,14 @@ inner-loop function like `Name.toString`. -- If you change this, also change the corresponding function in `Init.Data.ToString.Name`. private partial def needsNoEscapeAsciiRest (s : String) (i : Nat) : Bool := if h : i < s.utf8ByteSize then - let c := getUtf8Byte' s i h + let c := String.Internal.getUTF8Byte s i h isIdRestAscii c && needsNoEscapeAsciiRest s (i + 1) else true -- If you change this, also change the corresponding function in `Init.Data.ToString.Name`. @[inline] private def needsNoEscapeAscii (s : String) (h : s.utf8ByteSize > 0) : Bool := - let c := getUtf8Byte' s 0 h + let c := String.Internal.getUTF8Byte s 0 h isIdFirstAscii c && needsNoEscapeAsciiRest s 1 -- If you change this, also change the corresponding function in `Init.Data.ToString.Name`. diff --git a/src/Init/Prelude.lean b/src/Init/Prelude.lean index 94b1d404ee..559df950e6 100644 --- a/src/Init/Prelude.lean +++ b/src/Init/Prelude.lean @@ -3378,7 +3378,7 @@ def List.utf8Encode (l : List Char) : ByteArray := Note that in order for this definition to be well-behaved it is necessary to know that this `m` is unique. To show this, one defines UTF-8 decoding and shows that encoding and decoding are mutually inverse. -/ -inductive ByteArray.IsValidUtf8 (b : ByteArray) : Prop +inductive ByteArray.IsValidUTF8 (b : ByteArray) : Prop /-- Show that a byte -/ | intro (m : List Char) (hm : Eq b (List.utf8Encode m)) @@ -3393,10 +3393,10 @@ modifications when the reference to the string is unique. structure String where ofByteArray :: /-- The bytes of the UTF-8 encoding of the string. Since strings have a special representation in the runtime, this function actually takes linear time and space at runtime. For efficient access - to the string's bytes, use `String.utf8ByteSize` and `String.getUtf8Byte`. -/ + to the string's bytes, use `String.utf8ByteSize` and `String.getUTF8Byte`. -/ bytes : ByteArray /-- The bytes of the string form valid UTF-8. -/ - isValidUtf8 : ByteArray.IsValidUtf8 bytes + isValidUTF8 : ByteArray.IsValidUTF8 bytes attribute [extern "lean_string_to_utf8"] String.bytes attribute [extern "lean_string_from_utf8_unchecked"] String.ofByteArray diff --git a/src/Lean/Data/Json/Printer.lean b/src/Lean/Data/Json/Printer.lean index 787e8713a2..edac9b6f80 100644 --- a/src/Lean/Data/Json/Printer.lean +++ b/src/Lean/Data/Json/Printer.lean @@ -64,7 +64,7 @@ private def needEscape (s : String) : Bool := where go (s : String) (i : Nat) : Bool := if h : i < s.utf8ByteSize then - let byte := s.getUtf8Byte ⟨i⟩ h + let byte := s.getUTF8Byte ⟨i⟩ h have h1 : byte.toNat < 256 := UInt8.toNat_lt_size byte have h2 : escapeTable.val.size = 256 := escapeTable.property if escapeTable.val.get byte.toNat (Nat.lt_of_lt_of_eq h1 h2.symm) == 0 then diff --git a/src/Lean/Data/Trie.lean b/src/Lean/Data/Trie.lean index 701622fae8..9dce9c1554 100644 --- a/src/Lean/Data/Trie.lean +++ b/src/Lean/Data/Trie.lean @@ -63,7 +63,7 @@ instance : Inhabited (Trie α) where partial def upsert (t : Trie α) (s : String) (f : Option α → α) : Trie α := let rec insertEmpty (i : Nat) : Trie α := if h : i < s.utf8ByteSize then - let c := s.getUtf8Byte ⟨i⟩ h + let c := s.getUTF8Byte ⟨i⟩ h let t := insertEmpty (i + 1) node1 none c t else @@ -71,14 +71,14 @@ partial def upsert (t : Trie α) (s : String) (f : Option α → α) : Trie α : let rec loop | i, leaf v => if h : i < s.utf8ByteSize then - let c := s.getUtf8Byte ⟨i⟩ h + let c := s.getUTF8Byte ⟨i⟩ h let t := insertEmpty (i + 1) node1 v c t else leaf (f v) | i, node1 v c' t' => if h : i < s.utf8ByteSize then - let c := s.getUtf8Byte ⟨i⟩ h + let c := s.getUTF8Byte ⟨i⟩ h if c == c' then node1 v c' (loop (i + 1) t') else @@ -88,7 +88,7 @@ partial def upsert (t : Trie α) (s : String) (f : Option α → α) : Trie α : node1 (f v) c' t' | i, node v cs ts => if h : i < s.utf8ByteSize then - let c := s.getUtf8Byte ⟨i⟩ h + let c := s.getUTF8Byte ⟨i⟩ h match cs.findIdx? (· == c) with | none => let t := insertEmpty (i + 1) @@ -113,7 +113,7 @@ partial def find? (t : Trie α) (s : String) : Option α := val | i, node1 val c' t' => if h : i < s.utf8ByteSize then - let c := s.getUtf8Byte ⟨i⟩ h + let c := s.getUTF8Byte ⟨i⟩ h if c == c' then loop (i + 1) t' else none @@ -121,7 +121,7 @@ partial def find? (t : Trie α) (s : String) : Option α := val | i, node val cs ts => if h : i < s.utf8ByteSize then - let c := s.getUtf8Byte ⟨i⟩ h + let c := s.getUTF8Byte ⟨i⟩ h match cs.findIdx? (· == c) with | none => none | some idx => loop (i + 1) ts[idx]! @@ -150,7 +150,7 @@ partial def findPrefix (t : Trie α) (pre : String) : Array α := go t 0 where go (t : Trie α) (i : Nat) : Array α := if h : i < pre.utf8ByteSize then - let c := pre.getUtf8Byte ⟨i⟩ h + let c := pre.getUTF8Byte ⟨i⟩ h match t with | leaf _val => .empty | node1 _val c' t' => @@ -175,7 +175,7 @@ partial def matchPrefix (s : String) (t : Trie α) (i : String.Pos.Raw) | node1 v c' t', i, res => let res := if v.isSome then v else res if h : i < endByte then - let c := s.getUtf8Byte ⟨i⟩ (by simp; omega) + let c := s.getUTF8Byte ⟨i⟩ (by simp; omega) if c == c' then loop t' (i + 1) res else res @@ -184,7 +184,7 @@ partial def matchPrefix (s : String) (t : Trie α) (i : String.Pos.Raw) | node v cs ts, i, res => let res := if v.isSome then v else res if h : i < endByte then - let c := s.getUtf8Byte ⟨i⟩ (by simp; omega) + let c := s.getUTF8Byte ⟨i⟩ (by simp; omega) match cs.findIdx? (· == c) with | none => res | some idx => loop ts[idx]! (i + 1) res diff --git a/src/lake/Lake/Build/Trace.lean b/src/lake/Lake/Build/Trace.lean index ca65a9ae59..7286b60459 100644 --- a/src/lake/Lake/Build/Trace.lean +++ b/src/lake/Lake/Build/Trace.lean @@ -124,7 +124,7 @@ public def ofJsonNumber? (n : JsonNumber) : Except String Hash := /-- Parse a hash from a string of hexadecimal digits. Does no validation. -/ public def ofHex (s : String) : Hash := mk <| s.utf8ByteSize.fold (init := 0) fun i h n => - let c := s.getUtf8Byte ⟨i⟩ h + let c := s.getUTF8Byte ⟨i⟩ h if c ≤ 57 then n*16 + (c - 48).toUInt64 else if 97 ≤ c then n*16 + (c - 87).toUInt64 -- c - 'a' + 10 = (c - 87) else n*16 + (c - 55).toUInt64 -- c - 'A' + 10 = (c - 55) diff --git a/src/lake/Lake/Util/String.lean b/src/lake/Lake/Util/String.lean index 39e7c5fbfc..85922e79e5 100644 --- a/src/lake/Lake/Util/String.lean +++ b/src/lake/Lake/Util/String.lean @@ -24,7 +24,7 @@ public def zpad (n : Nat) (len : Nat) : String := /-- Returns whether a string is composed of only hexadecimal digits. -/ public def isHex (s : String) : Bool := s.utf8ByteSize.all fun i h => - let c := s.getUtf8Byte ⟨i⟩ h + let c := s.getUTF8Byte ⟨i⟩ h if c ≤ 57 then -- '9' 48 ≤ c -- '0' else if c ≤ 102 then -- 'f' diff --git a/tests/lean/run/utf8英語.lean b/tests/lean/run/utf8英語.lean index be5ade6179..d3b95481d9 100644 --- a/tests/lean/run/utf8英語.lean +++ b/tests/lean/run/utf8英語.lean @@ -12,7 +12,7 @@ macro "test_extern'" t:term " => " v:term : command => def checkGet (s : String) (arr : Array UInt8) := (List.range s.utf8ByteSize).all fun i => - let c := if h : _ then s.getUtf8Byte ⟨i⟩ h else unreachable! + let c := if h : _ then s.getUTF8Byte ⟨i⟩ h else unreachable! c == arr[i]! macro "validate" arr:term " => " "↯" : command => diff --git a/tests/lean/setLit.lean.expected.out b/tests/lean/setLit.lean.expected.out index 64485f5db6..8e7e49634e 100644 --- a/tests/lean/setLit.lean.expected.out +++ b/tests/lean/setLit.lean.expected.out @@ -4,11 +4,11 @@ setLit.lean:22:19-22:21: error: overloaded, errors Hint: Additional diagnostic information may be available using the `set_option diagnostics true` command. - Fields missing: `bytes`, `isValidUtf8` + Fields missing: `bytes`, `isValidUTF8` Hint: Add missing fields: ̲b̲y̲t̲e̲s̲ ̲:̲=̲ ̲_̲ - ̲ ̲ ̲ ̲ ̲ ̲ ̲ ̲ ̲ ̲ ̲ ̲ ̲ ̲ ̲ ̲ ̲ ̲ ̲ ̲ ̲ ̲i̲s̲V̲a̲l̲i̲d̲U̲t̲f̲8̲ ̲:̲=̲ ̲_̲ ̲ + ̲ ̲ ̲ ̲ ̲ ̲ ̲ ̲ ̲ ̲ ̲ ̲ ̲ ̲ ̲ ̲ ̲ ̲ ̲ ̲ ̲ ̲i̲s̲V̲a̲l̲i̲d̲U̲T̲F̲8̲ ̲:̲=̲ ̲_̲ ̲ setLit.lean:24:31-24:38: error: overloaded, errors failed to synthesize Singleton Nat String