doc: add docstrings and usage examples in Init.Data.String.Basic (#4001)

Add docstrings and usage examples for `String.length`, `.push`,
`.append`, `.get?`, `.set`, `.modyify`, and `.next`. Update docstrings
and add usage examples for `String.toList`, `.get`, and `.get!`.

---------

Co-authored-by: Joachim Breitner <mail@joachim-breitner.de>
Co-authored-by: David Thrane Christiansen <david@davidchristiansen.dk>
This commit is contained in:
Austin Letson 2024-05-07 19:49:43 -04:00 committed by GitHub
parent 2a5ca00ad6
commit b8e67d87a8
No known key found for this signature in database
GPG key ID: B5690EEEBB952194

View file

@ -24,23 +24,51 @@ instance : LT String :=
instance decLt (s₁ s₂ : @& String) : Decidable (s₁ < s₂) :=
List.hasDecidableLt s₁.data s₂.data
/--
Returns the length of a string in Unicode code points.
Examples:
* `"".length = 0`
* `"abc".length = 3`
* `"L∃∀N".length = 4`
-/
@[extern "lean_string_length"]
def length : (@& String) → Nat
| ⟨s⟩ => s.length
/-- The internal implementation uses dynamic arrays and will perform destructive updates
if the String is not shared. -/
/--
Pushes a character onto the end of a string.
The internal implementation uses dynamic arrays and will perform destructive updates
if the string is not shared.
Example: `"abc".push 'd' = "abcd"`
-/
@[extern "lean_string_push"]
def push : String → Char → String
| ⟨s⟩, c => ⟨s ++ [c]⟩
/-- The internal implementation uses dynamic arrays and will perform destructive updates
if the String is not shared. -/
/--
Appends two strings.
The internal implementation uses dynamic arrays and will perform destructive updates
if the string is not shared.
Example: `"abc".append "def" = "abcdef"`
-/
@[extern "lean_string_append"]
def append : String → (@& String) → String
| ⟨a⟩, ⟨b⟩ => ⟨a ++ b⟩
/-- O(n) in the runtime, where n is the length of the String -/
/--
Converts a string to a list of characters.
Even though the logical model of strings is as a structure that wraps a list of characters,
this operation takes time and space linear in the length of the string, because the compiler
uses an optimized representation as dynamic arrays.
Example: `"abc".toList = ['a', 'b', 'c']`
-/
def toList (s : String) : List Char :=
s.data
@ -59,9 +87,17 @@ def utf8GetAux : List Char → Pos → Pos → Char
| c::cs, i, p => if i = p then c else utf8GetAux cs (i + c) p
/--
Return character at position `p`. If `p` is not a valid position
returns `(default : Char)`.
See `utf8GetAux` for the reference implementation.
Returns the character at position `p` of a string. If `p` is not a valid position,
returns `(default : Char)`.
See `utf8GetAux` for the reference implementation.
Examples:
* `"abc".get ⟨1⟩ = 'b'`
* `"abc".get ⟨3⟩ = (default : Char) = 'A'`
Positions can also be invalid if a byte index points into the middle of a multi-byte UTF-8
character. For example,`"L∃∀N".get ⟨2⟩ = (default : Char) = 'A'`.
-/
@[extern "lean_string_utf8_get"]
def get (s : @& String) (p : @& Pos) : Char :=
@ -72,12 +108,30 @@ def utf8GetAux? : List Char → Pos → Pos → Option Char
| [], _, _ => none
| c::cs, i, p => if i = p then c else utf8GetAux? cs (i + c) p
/--
Returns the character at position `p`. If `p` is not a valid position, returns `none`.
Examples:
* `"abc".get? ⟨1⟩ = some 'b'`
* `"abc".get? ⟨3⟩ = none`
Positions can also be invalid if a byte index points into the middle of a multi-byte UTF-8
character. For example, `"L∃∀N".get? ⟨2⟩ = none`
-/
@[extern "lean_string_utf8_get_opt"]
def get? : (@& String) → (@& Pos) → Option Char
| ⟨s⟩, p => utf8GetAux? s 0 p
/--
Similar to `get`, but produces a panic error message if `p` is not a valid `String.Pos`.
Returns the character at position `p` of a string. If `p` is not a valid position,
returns `(default : Char)` and produces a panic error message.
Examples:
* `"abc".get! ⟨1⟩ = 'b'`
* `"abc".get! ⟨3⟩` panics
Positions can also be invalid if a byte index points into the middle of a multi-byte UTF-8 character. For example,
`"L∃∀N".get! ⟨2⟩` panics.
-/
@[extern "lean_string_utf8_get_bang"]
def get! (s : @& String) (p : @& Pos) : Char :=
@ -89,13 +143,48 @@ def utf8SetAux (c' : Char) : List Char → Pos → Pos → List Char
| c::cs, i, p =>
if i = p then (c'::cs) else c::(utf8SetAux c' cs (i + c) p)
/--
Replaces the character at a specified position in a string with a new character. If the position
is invalid, the string is returned unchanged.
If both the replacement character and the replaced character are ASCII characters and the string
is not shared, destructive updates are used.
Examples:
* `"abc".set ⟨1⟩ 'B' = "aBc"`
* `"abc".set ⟨3⟩ 'D' = "abc"`
* `"L∃∀N".set ⟨4⟩ 'X' = "L∃XN"`
Because `'∃'` is a multi-byte character, the byte index `2` in `L∃∀N` is an invalid position,
so `"L∃∀N".set ⟨2⟩ 'X' = "L∃∀N"`.
-/
@[extern "lean_string_utf8_set"]
def set : String → (@& Pos) → Char → String
| ⟨s⟩, i, c => ⟨utf8SetAux c s 0 i⟩
/--
Replaces the character at position `p` in the string `s` with the result of applying `f` to that character.
If `p` is an invalid position, the string is returned unchanged.
Examples:
* `abc.modify ⟨1⟩ Char.toUpper = "aBc"`
* `abc.modify ⟨3⟩ Char.toUpper = "abc"`
-/
def modify (s : String) (i : Pos) (f : Char → Char) : String :=
s.set i <| f <| s.get i
/--
Returns the next position in a string after position `p`. If `p` is not a valid position or `p = s.endPos`,
the result is unspecified.
Examples:
* `"abc".next ⟨1⟩ = String.Pos.mk 2`
* `"L∃∀N".next ⟨1⟩ = String.Pos.mk 4`, since `'∃'` is a multi-byte UTF-8 character
Cases where the result is unspecified:
* `"abc".next ⟨3⟩`, since `3 = s.endPos`
* `"L∃∀N".next ⟨2⟩`, since `2` points into the middle of a multi-byte UTF-8 character
-/
@[extern "lean_string_utf8_next"]
def next (s : @& String) (p : @& Pos) : Pos :=
let c := get s p