diff --git a/src/Init/Data/String/Basic.lean b/src/Init/Data/String/Basic.lean index 1dbe48e753..1ea50f2e68 100644 --- a/src/Init/Data/String/Basic.lean +++ b/src/Init/Data/String/Basic.lean @@ -24,23 +24,51 @@ instance : LT String := instance decLt (s₁ s₂ : @& String) : Decidable (s₁ < s₂) := List.hasDecidableLt s₁.data s₂.data +/-- +Returns the length of a string in Unicode code points. + +Examples: +* `"".length = 0` +* `"abc".length = 3` +* `"L∃∀N".length = 4` +-/ @[extern "lean_string_length"] def length : (@& String) → Nat | ⟨s⟩ => s.length -/-- The internal implementation uses dynamic arrays and will perform destructive updates - if the String is not shared. -/ +/-- +Pushes a character onto the end of a string. + +The internal implementation uses dynamic arrays and will perform destructive updates +if the string is not shared. + +Example: `"abc".push 'd' = "abcd"` +-/ @[extern "lean_string_push"] def push : String → Char → String | ⟨s⟩, c => ⟨s ++ [c]⟩ -/-- The internal implementation uses dynamic arrays and will perform destructive updates - if the String is not shared. -/ +/-- +Appends two strings. + +The internal implementation uses dynamic arrays and will perform destructive updates +if the string is not shared. + +Example: `"abc".append "def" = "abcdef"` +-/ @[extern "lean_string_append"] def append : String → (@& String) → String | ⟨a⟩, ⟨b⟩ => ⟨a ++ b⟩ -/-- O(n) in the runtime, where n is the length of the String -/ +/-- +Converts a string to a list of characters. + +Even though the logical model of strings is as a structure that wraps a list of characters, +this operation takes time and space linear in the length of the string, because the compiler +uses an optimized representation as dynamic arrays. + +Example: `"abc".toList = ['a', 'b', 'c']` +-/ def toList (s : String) : List Char := s.data @@ -59,9 +87,17 @@ def utf8GetAux : List Char → Pos → Pos → Char | c::cs, i, p => if i = p then c else utf8GetAux cs (i + c) p /-- - Return character at position `p`. If `p` is not a valid position - returns `(default : Char)`. - See `utf8GetAux` for the reference implementation. +Returns the character at position `p` of a string. If `p` is not a valid position, +returns `(default : Char)`. + +See `utf8GetAux` for the reference implementation. + +Examples: +* `"abc".get ⟨1⟩ = 'b'` +* `"abc".get ⟨3⟩ = (default : Char) = 'A'` + +Positions can also be invalid if a byte index points into the middle of a multi-byte UTF-8 +character. For example,`"L∃∀N".get ⟨2⟩ = (default : Char) = 'A'`. -/ @[extern "lean_string_utf8_get"] def get (s : @& String) (p : @& Pos) : Char := @@ -72,12 +108,30 @@ def utf8GetAux? : List Char → Pos → Pos → Option Char | [], _, _ => none | c::cs, i, p => if i = p then c else utf8GetAux? cs (i + c) p +/-- +Returns the character at position `p`. If `p` is not a valid position, returns `none`. + +Examples: +* `"abc".get? ⟨1⟩ = some 'b'` +* `"abc".get? ⟨3⟩ = none` + +Positions can also be invalid if a byte index points into the middle of a multi-byte UTF-8 +character. For example, `"L∃∀N".get? ⟨2⟩ = none` +-/ @[extern "lean_string_utf8_get_opt"] def get? : (@& String) → (@& Pos) → Option Char | ⟨s⟩, p => utf8GetAux? s 0 p /-- - Similar to `get`, but produces a panic error message if `p` is not a valid `String.Pos`. +Returns the character at position `p` of a string. If `p` is not a valid position, +returns `(default : Char)` and produces a panic error message. + +Examples: +* `"abc".get! ⟨1⟩ = 'b'` +* `"abc".get! ⟨3⟩` panics + +Positions can also be invalid if a byte index points into the middle of a multi-byte UTF-8 character. For example, +`"L∃∀N".get! ⟨2⟩` panics. -/ @[extern "lean_string_utf8_get_bang"] def get! (s : @& String) (p : @& Pos) : Char := @@ -89,13 +143,48 @@ def utf8SetAux (c' : Char) : List Char → Pos → Pos → List Char | c::cs, i, p => if i = p then (c'::cs) else c::(utf8SetAux c' cs (i + c) p) +/-- +Replaces the character at a specified position in a string with a new character. If the position +is invalid, the string is returned unchanged. + +If both the replacement character and the replaced character are ASCII characters and the string +is not shared, destructive updates are used. + +Examples: +* `"abc".set ⟨1⟩ 'B' = "aBc"` +* `"abc".set ⟨3⟩ 'D' = "abc"` +* `"L∃∀N".set ⟨4⟩ 'X' = "L∃XN"` + +Because `'∃'` is a multi-byte character, the byte index `2` in `L∃∀N` is an invalid position, +so `"L∃∀N".set ⟨2⟩ 'X' = "L∃∀N"`. +-/ @[extern "lean_string_utf8_set"] def set : String → (@& Pos) → Char → String | ⟨s⟩, i, c => ⟨utf8SetAux c s 0 i⟩ +/-- +Replaces the character at position `p` in the string `s` with the result of applying `f` to that character. +If `p` is an invalid position, the string is returned unchanged. + +Examples: +* `abc.modify ⟨1⟩ Char.toUpper = "aBc"` +* `abc.modify ⟨3⟩ Char.toUpper = "abc"` +-/ def modify (s : String) (i : Pos) (f : Char → Char) : String := s.set i <| f <| s.get i +/-- +Returns the next position in a string after position `p`. If `p` is not a valid position or `p = s.endPos`, +the result is unspecified. + +Examples: +* `"abc".next ⟨1⟩ = String.Pos.mk 2` +* `"L∃∀N".next ⟨1⟩ = String.Pos.mk 4`, since `'∃'` is a multi-byte UTF-8 character + +Cases where the result is unspecified: +* `"abc".next ⟨3⟩`, since `3 = s.endPos` +* `"L∃∀N".next ⟨2⟩`, since `2` points into the middle of a multi-byte UTF-8 character +-/ @[extern "lean_string_utf8_next"] def next (s : @& String) (p : @& Pos) : Pos := let c := get s p