fix: substring APIs

This commit is contained in:
Wojciech Nawrocki 2021-01-15 12:46:18 -05:00 committed by Leonardo de Moura
parent fa7e679c73
commit f3ab908888
3 changed files with 29 additions and 19 deletions

View file

@ -9,12 +9,6 @@ import Init.Data.Char.Basic
import Init.Data.Option.Basic
universes u
/-- A byte position in a `String`. Internally, `String`s are UTF-8 encoded.
Codepoint positions (counting the Unicode codepoints rather than bytes)
are represented by plain `Nat`s instead.
Indexing a `String` by a byte position is constant-time, while codepoint
positions need to be translated internally to byte positions in linear-time. -/
def List.asString (s : List Char) : String :=
⟨s⟩
@ -332,44 +326,52 @@ namespace Substring
@[inline] def toIterator : Substring → String.Iterator
| ⟨s, b, _⟩ => ⟨s, b⟩
/-- Returns the codepoint at the given offset into the substring. -/
@[inline] def get : Substring → String.Pos → Char
| ⟨s, b, _⟩, p => s.get (b+p)
@[inline] def next : Substring → String.Pos → String.Pos
/-- Given an offset of a codepoint into the substring,
returns the offset there of the next codepoint. -/
@[inline] private def next : Substring → String.Pos → String.Pos
| ⟨s, b, e⟩, p =>
let p := s.next (b+p)
if p > e then e - b else p - b
let absP := b+p
if absP = e then p else s.next absP - b
@[inline] def prev : Substring → String.Pos → String.Pos
/-- Given an offset of a codepoint into the substring,
returns the offset there of the previous codepoint. -/
@[inline] private def prev : Substring → String.Pos → String.Pos
| ⟨s, b, _⟩, p =>
if p = b then p else s.prev (b+p) - b
let absP := b+p
if absP = b then p else s.prev absP - b
def nextn : Substring → Nat → String.Pos → String.Pos
private def nextn : Substring → Nat → String.Pos → String.Pos
| ss, 0, p => p
| ss, i+1, p => ss.nextn i (ss.next p)
def prevn : Substring → String.Pos → Nat → String.Pos
private def prevn : Substring → String.Pos → Nat → String.Pos
| ss, 0, p => p
| ss, i+1, p => ss.prevn i (ss.prev p)
@[inline] def front (s : Substring) : Char :=
s.get 0
/-- Returns the offset into `s` of the first occurence of `c` in `s`,
or `s.bsize` if `c` doesn't occur. -/
@[inline] def posOf (s : Substring) (c : Char) : String.Pos :=
match s with
| ⟨s, b, e⟩ => (String.posOfAux s c e b) - b
@[inline] def drop : Substring → Nat → Substring
| ss@⟨s, b, e⟩, n => ⟨s, ss.nextn n b, e⟩
| ss@⟨s, b, e⟩, n => ⟨s, b + ss.nextn n 0, e⟩
@[inline] def dropRight : Substring → Nat → Substring
| ss@⟨s, b, e⟩, n => ⟨s, b, ss.prevn n e⟩
| ss@⟨s, b, e⟩, n => ⟨s, b, b + ss.prevn n ss.bsize⟩
@[inline] def take : Substring → Nat → Substring
| ss@⟨s, b, e⟩, n => ⟨s, b, ss.nextn n b
| ss@⟨s, b, e⟩, n => ⟨s, b, b + ss.nextn n 0
@[inline] def takeRight : Substring → Nat → Substring
| ss@⟨s, b, e⟩, n => ⟨s, ss.prevn n e, e⟩
| ss@⟨s, b, e⟩, n => ⟨s, b + ss.prevn n ss.bsize, e⟩
@[inline] def atEnd : Substring → String.Pos → Bool
| ⟨s, b, e⟩, p => b + p == e
@ -419,7 +421,7 @@ partial def splitOn (s : Substring) (sep : String := " ") : List Substring :=
def contains (s : Substring) (c : Char) : Bool :=
s.any (fun a => a == c)
@[specialize] partial def takeWhileAux (s : String) (stopPos : String.Pos) (p : Char → Bool) (i : String.Pos) : String.Pos :=
@[specialize] private partial def takeWhileAux (s : String) (stopPos : String.Pos) (p : Char → Bool) (i : String.Pos) : String.Pos :=
if i == stopPos then i
else if p (s.get i) then takeWhileAux s stopPos p (s.next i)
else i
@ -434,7 +436,7 @@ def contains (s : Substring) (c : Char) : Bool :=
let b := takeWhileAux s e p b;
⟨s, b, e⟩
@[specialize] partial def takeRightWhileAux (s : String) (begPos : String.Pos) (p : Char → Bool) (i : String.Pos) : String.Pos :=
@[specialize] private partial def takeRightWhileAux (s : String) (begPos : String.Pos) (p : Char → Bool) (i : String.Pos) : String.Pos :=
if i == begPos then i
else
let i' := s.prev i

View file

@ -49,3 +49,8 @@ it₁.remainingToString ++ "-" ++ it₂.remainingToString
#eval "abc" == "abd"
#eval "αβγ".drop 1
#eval "αβγ".takeRight 1
def ss : Substring := "0123abcdαβγδ".toSubstring
#eval ss.drop 4 |>.takeRight 4
#eval ss.drop 4 |>.take 4
#eval ss.dropRight 4 |>.takeRight 4

View file

@ -37,3 +37,6 @@ true
false
"βγ"
"γ"
"αβγδ".toSubstring
"abcd".toSubstring
"abcd".toSubstring