From 644c1d4e36b4d9d4bd2dd7ebe4a02294f25f1fe2 Mon Sep 17 00:00:00 2001 From: Austin Letson Date: Wed, 5 Jun 2024 01:16:56 -0400 Subject: [PATCH] doc: add docstrings and examples for `String` functions (#4332) Add docstrings, usage examples, and doctests for `String.get'`, `String.next'`, `String.posOf`, `String.revPosOf`. --- src/Init/Data/String/Basic.lean | 55 +++++++++++++++++++++++++++++++-- tests/lean/run/string.lean | 27 ++++++++++++++++ 2 files changed, 80 insertions(+), 2 deletions(-) diff --git a/src/Init/Data/String/Basic.lean b/src/Init/Data/String/Basic.lean index 7a3a8101da..d8b3f17e70 100644 --- a/src/Init/Data/String/Basic.lean +++ b/src/Init/Data/String/Basic.lean @@ -256,7 +256,26 @@ def atEnd : (@& String) → (@& Pos) → Bool | s, p => p.byteIdx ≥ utf8ByteSize s /-- -Similar to `get` but runtime does not perform bounds check. +Returns the character at position `p` of a string. +If `p` is not a valid position, returns `(default : Char)`. + +Requires evidence, `h`, that `p` is within bounds +instead of performing a runtime bounds check as in `get`. + +Examples: +* `"abc".get' 0 (by decide) = 'a'` +* `let lean := "L∃∀N"; lean.get' (0 |> lean.next |> lean.next) (by decide) = '∀'` + +A typical pattern combines `get'` with a dependent if-else expression +to avoid the overhead of an additional bounds check. For example: +``` +def getInBounds? (s : String) (p : String.Pos) : Option Char := + if h : s.atEnd p then none else some (s.get' p h) +``` + +Even with evidence of `¬ s.atEnd p`, +`p` may be invalid if a byte index points into the middle of a multi-byte UTF-8 character. +For example, `"L∃∀N".get' ⟨2⟩ (by decide) = (default : Char)`. -/ @[extern "lean_string_utf8_get_fast"] def get' (s : @& String) (p : @& Pos) (h : ¬ s.atEnd p) : Char := @@ -264,7 +283,21 @@ def get' (s : @& String) (p : @& Pos) (h : ¬ s.atEnd p) : Char := | ⟨s⟩ => utf8GetAux s 0 p /-- -Similar to `next` but runtime does not perform bounds check. +Returns the next position in a string after position `p`. +If `p` is not a valid position, the result is unspecified. + +Requires evidence, `h`, that `p` is within bounds +instead of performing a runtime bounds check as in `next`. + +Examples: +* `let abc := "abc"; abc.get (abc.next' 0 (by decide)) = 'b'` + +A typical pattern combines `next'` with a dependent if-else expression +to avoid the overhead of an additional bounds check. For example: +``` +def next? (s: String) (p : String.Pos) : Option Char := + if h : s.atEnd p then none else s.get (s.next' p h) +``` -/ @[extern "lean_string_utf8_next_fast"] def next' (s : @& String) (p : @& Pos) (h : ¬ s.atEnd p) : Pos := @@ -305,6 +338,15 @@ def posOfAux (s : String) (c : Char) (stopPos : Pos) (pos : Pos) : Pos := else pos termination_by stopPos.1 - pos.1 +/-- +Returns the position of the first occurrence of a character, `c`, in `s`. +If `s` does not contain `c`, returns `s.endPos`. + +Examples: +* `"abba".posOf 'a' = ⟨0⟩` +* `"abba".posOf 'z' = ⟨4⟩` +* `"L∃∀N".posOf '∀' = ⟨4⟩` +-/ @[inline] def posOf (s : String) (c : Char) : Pos := posOfAux s c s.endPos 0 @@ -317,6 +359,15 @@ def revPosOfAux (s : String) (c : Char) (pos : Pos) : Option Pos := else revPosOfAux s c pos termination_by pos.1 +/-- +Returns the position of the last occurrence of a character, `c`, in `s`. +If `s` does not contain `c`, returns `none`. + +Examples: +* `"abba".posOf 'a' = some ⟨3⟩` +* `"abba".posOf 'z' = none` +* `"L∃∀N".posOf '∀' = some ⟨4⟩` +-/ def revPosOf (s : String) (c : Char) : Option Pos := revPosOfAux s c s.endPos diff --git a/tests/lean/run/string.lean b/tests/lean/run/string.lean index 4a37ffba66..f6098d3361 100644 --- a/tests/lean/run/string.lean +++ b/tests/lean/run/string.lean @@ -15,3 +15,30 @@ def lean : String := "L∃∀N" #guard (0 |> abc.next |> abc.next |> abc.atEnd) = false #guard (0 |> abc.next |> abc.next |> abc.next |> abc.next |> abc.atEnd) = true #guard (0 |> lean.next |> lean.next |> lean.next |> lean.next |> lean.atEnd) = true + +-- get' +#guard "abc".get' 0 (by decide) = 'a' +#guard let lean := "L∃∀N"; lean.get' (0 |> lean.next |> lean.next) (by decide) = '∀' +def getInBounds? (s : String) (p: String.Pos) : Option Char := + if h : s.atEnd p then none else some (s.get' p h) +#guard getInBounds? abc ⟨1⟩ = some 'b' +#guard getInBounds? abc ⟨5⟩ = none +#guard getInBounds? lean ⟨4⟩ = some '∀' +#guard "L∃∀N".get' ⟨2⟩ (by decide) = (default : Char) + +-- next' +#guard let abc := "abc"; abc.get (abc.next' 0 (by decide)) = 'b' +def next? (s: String) (p: String.Pos) : Option Char := + if h : s.atEnd p then none else s.get (s.next' p h) +#guard next? abc ⟨1⟩ = some 'c' +#guard next? abc ⟨5⟩ = none + +-- posOf +#guard "abba".posOf 'a' = ⟨0⟩ +#guard "abba".posOf 'z' = ⟨4⟩ +#guard "L∃∀N".posOf '∀' = ⟨4⟩ + +-- revPosOf +#guard "abba".revPosOf 'a' = some ⟨3⟩ +#guard "abba".revPosOf 'z' = none +#guard "L∃∀N".revPosOf '∀' = some ⟨4⟩