From f3ab908888cc79f86b4e7ded68745240fe963137 Mon Sep 17 00:00:00 2001 From: Wojciech Nawrocki Date: Fri, 15 Jan 2021 12:46:18 -0500 Subject: [PATCH] fix: substring APIs --- src/Init/Data/String/Basic.lean | 40 +++++++++++++----------- tests/lean/string_imp2.lean | 5 +++ tests/lean/string_imp2.lean.expected.out | 3 ++ 3 files changed, 29 insertions(+), 19 deletions(-) diff --git a/src/Init/Data/String/Basic.lean b/src/Init/Data/String/Basic.lean index 2d8757bba5..0e30faa8b5 100644 --- a/src/Init/Data/String/Basic.lean +++ b/src/Init/Data/String/Basic.lean @@ -9,12 +9,6 @@ import Init.Data.Char.Basic import Init.Data.Option.Basic universes u -/-- A byte position in a `String`. Internally, `String`s are UTF-8 encoded. -Codepoint positions (counting the Unicode codepoints rather than bytes) -are represented by plain `Nat`s instead. -Indexing a `String` by a byte position is constant-time, while codepoint -positions need to be translated internally to byte positions in linear-time. -/ - def List.asString (s : List Char) : String := ⟨s⟩ @@ -332,44 +326,52 @@ namespace Substring @[inline] def toIterator : Substring → String.Iterator | ⟨s, b, _⟩ => ⟨s, b⟩ +/-- Returns the codepoint at the given offset into the substring. -/ @[inline] def get : Substring → String.Pos → Char | ⟨s, b, _⟩, p => s.get (b+p) -@[inline] def next : Substring → String.Pos → String.Pos +/-- Given an offset of a codepoint into the substring, +returns the offset there of the next codepoint. -/ +@[inline] private def next : Substring → String.Pos → String.Pos | ⟨s, b, e⟩, p => - let p := s.next (b+p) - if p > e then e - b else p - b + let absP := b+p + if absP = e then p else s.next absP - b -@[inline] def prev : Substring → String.Pos → String.Pos +/-- Given an offset of a codepoint into the substring, +returns the offset there of the previous codepoint. -/ +@[inline] private def prev : Substring → String.Pos → String.Pos | ⟨s, b, _⟩, p => - if p = b then p else s.prev (b+p) - b + let absP := b+p + if absP = b then p else s.prev absP - b -def nextn : Substring → Nat → String.Pos → String.Pos +private def nextn : Substring → Nat → String.Pos → String.Pos | ss, 0, p => p | ss, i+1, p => ss.nextn i (ss.next p) -def prevn : Substring → String.Pos → Nat → String.Pos +private def prevn : Substring → String.Pos → Nat → String.Pos | ss, 0, p => p | ss, i+1, p => ss.prevn i (ss.prev p) @[inline] def front (s : Substring) : Char := s.get 0 +/-- Returns the offset into `s` of the first occurence of `c` in `s`, +or `s.bsize` if `c` doesn't occur. -/ @[inline] def posOf (s : Substring) (c : Char) : String.Pos := match s with | ⟨s, b, e⟩ => (String.posOfAux s c e b) - b @[inline] def drop : Substring → Nat → Substring - | ss@⟨s, b, e⟩, n => ⟨s, ss.nextn n b, e⟩ + | ss@⟨s, b, e⟩, n => ⟨s, b + ss.nextn n 0, e⟩ @[inline] def dropRight : Substring → Nat → Substring - | ss@⟨s, b, e⟩, n => ⟨s, b, ss.prevn n e⟩ + | ss@⟨s, b, e⟩, n => ⟨s, b, b + ss.prevn n ss.bsize⟩ @[inline] def take : Substring → Nat → Substring - | ss@⟨s, b, e⟩, n => ⟨s, b, ss.nextn n b⟩ + | ss@⟨s, b, e⟩, n => ⟨s, b, b + ss.nextn n 0⟩ @[inline] def takeRight : Substring → Nat → Substring - | ss@⟨s, b, e⟩, n => ⟨s, ss.prevn n e, e⟩ + | ss@⟨s, b, e⟩, n => ⟨s, b + ss.prevn n ss.bsize, e⟩ @[inline] def atEnd : Substring → String.Pos → Bool | ⟨s, b, e⟩, p => b + p == e @@ -419,7 +421,7 @@ partial def splitOn (s : Substring) (sep : String := " ") : List Substring := def contains (s : Substring) (c : Char) : Bool := s.any (fun a => a == c) -@[specialize] partial def takeWhileAux (s : String) (stopPos : String.Pos) (p : Char → Bool) (i : String.Pos) : String.Pos := +@[specialize] private partial def takeWhileAux (s : String) (stopPos : String.Pos) (p : Char → Bool) (i : String.Pos) : String.Pos := if i == stopPos then i else if p (s.get i) then takeWhileAux s stopPos p (s.next i) else i @@ -434,7 +436,7 @@ def contains (s : Substring) (c : Char) : Bool := let b := takeWhileAux s e p b; ⟨s, b, e⟩ -@[specialize] partial def takeRightWhileAux (s : String) (begPos : String.Pos) (p : Char → Bool) (i : String.Pos) : String.Pos := +@[specialize] private partial def takeRightWhileAux (s : String) (begPos : String.Pos) (p : Char → Bool) (i : String.Pos) : String.Pos := if i == begPos then i else let i' := s.prev i diff --git a/tests/lean/string_imp2.lean b/tests/lean/string_imp2.lean index 28c75e262d..de2049cb4b 100644 --- a/tests/lean/string_imp2.lean +++ b/tests/lean/string_imp2.lean @@ -49,3 +49,8 @@ it₁.remainingToString ++ "-" ++ it₂.remainingToString #eval "abc" == "abd" #eval "αβγ".drop 1 #eval "αβγ".takeRight 1 + +def ss : Substring := "0123abcdαβγδ".toSubstring +#eval ss.drop 4 |>.takeRight 4 +#eval ss.drop 4 |>.take 4 +#eval ss.dropRight 4 |>.takeRight 4 \ No newline at end of file diff --git a/tests/lean/string_imp2.lean.expected.out b/tests/lean/string_imp2.lean.expected.out index c2c60493ed..8e18242b9e 100644 --- a/tests/lean/string_imp2.lean.expected.out +++ b/tests/lean/string_imp2.lean.expected.out @@ -37,3 +37,6 @@ true false "βγ" "γ" +"αβγδ".toSubstring +"abcd".toSubstring +"abcd".toSubstring