This PR renames `String.getUtf8Byte` to `String.getUTF8Byte` in order to adhere to the standard library naming convention.
1201 lines
42 KiB
Text
1201 lines
42 KiB
Text
/-
|
||
Copyright (c) 2025 Lean FRO, LLC. All rights reserved.
|
||
Released under Apache 2.0 license as described in the file LICENSE.
|
||
Authors: Henrik Böving
|
||
-/
|
||
module
|
||
|
||
prelude
|
||
public import Init.Data.String.Pattern
|
||
public import Init.Data.Iterators.Consumers.Monadic.Collect
|
||
public import Init.Data.Ord.Basic
|
||
public import Init.Data.Iterators.Combinators.FilterMap
|
||
|
||
set_option doc.verso true
|
||
|
||
/-!
|
||
This module defines the programming API for {name}`String.Slice`. The API mostly consists of
|
||
functionality for searching for various kinds of pattern matches in slices to iterate over them,
|
||
provide subslices according to matches etc. The key design principles behind this module are:
|
||
- Instead of providing one function per kind of pattern the API is generic over various kinds of
|
||
patterns. Thus it only provides e.g. one kind of function for looking for the position of the
|
||
first occurence of a pattern. Currently the supported patterns are:
|
||
- {name}`Char`
|
||
- {lean}`Char → Bool`
|
||
- {name}`String` and {name}`String.Slice` (partially: doing non trivial searches backwards is not
|
||
supported yet)
|
||
- Whenever a slice gets mutated a new slice is returned to allow for repeated chaining of functions
|
||
with minimal allocations. If necessary the slice can ultimately be converted back to
|
||
{name}`String` using {name}`String.Slice.copy`
|
||
- Instead of allocating intermediate collections the operations that iterate over slices in various
|
||
ways (characters, positions etc.) return iterators that can be collected into other collections if
|
||
necessary.
|
||
- When sensible the API provides functionality for searching both in a forward and backward manner
|
||
-/
|
||
|
||
public section
|
||
|
||
namespace String.Slice
|
||
|
||
open Pattern
|
||
|
||
/--
|
||
Checks whether a slice is empty.
|
||
|
||
Empty slices have {name}`utf8ByteSize` {lean}`0`.
|
||
|
||
Examples:
|
||
* {lean}`"".toSlice.isEmpty = true`
|
||
* {lean}`" ".toSlice.isEmpty = false`
|
||
-/
|
||
@[inline]
|
||
def isEmpty (s : Slice) : Bool := s.utf8ByteSize == 0
|
||
|
||
/--
|
||
Checks whether {name}`s1` and {name}`s2` represent the same string, even if they are slices of
|
||
different base strings or different slices within the same string.
|
||
|
||
The implementation is an efficient equivalent of {lean}`s1.copy == s2.copy`
|
||
-/
|
||
def beq (s1 s2 : Slice) : Bool :=
|
||
if h : s1.utf8ByteSize = s2.utf8ByteSize then
|
||
have h1 := by simp [h, String.Pos.Raw.le_iff]
|
||
have h2 := by simp [h, String.Pos.Raw.le_iff]
|
||
Internal.memcmp s1 s2 s1.startPos.offset s2.startPos.offset s1.utf8ByteSize h1 h2
|
||
else
|
||
false
|
||
|
||
instance : BEq Slice where
|
||
beq := beq
|
||
|
||
@[extern "lean_slice_hash"]
|
||
opaque hash (s : @& Slice) : UInt64
|
||
|
||
instance : Hashable Slice where
|
||
hash := hash
|
||
|
||
instance : LT Slice where
|
||
lt x y := x.copy < y.copy
|
||
|
||
@[extern "lean_slice_dec_lt"]
|
||
instance (x y : @& Slice) : Decidable (x < y) :=
|
||
inferInstanceAs (Decidable (x.copy < y.copy))
|
||
|
||
instance : Ord Slice where
|
||
compare x y := compareOfLessAndBEq x y
|
||
|
||
instance : LE Slice where
|
||
le x y := ¬x < y
|
||
|
||
instance : DecidableLE Slice :=
|
||
fun x y => inferInstanceAs (Decidable (¬x < y))
|
||
|
||
section ForwardPatternUsers
|
||
|
||
variable {ρ : Type} {σ : Slice → Type}
|
||
variable [∀ s, Std.Iterators.Iterator (σ s) Id (SearchStep s)]
|
||
variable [∀ s, Std.Iterators.Finite (σ s) Id]
|
||
variable [∀ s, Std.Iterators.IteratorLoop (σ s) Id Id]
|
||
|
||
/--
|
||
Checks whether the slice ({name}`s`) begins with the pattern ({name}`pat`).
|
||
|
||
This function is generic over all currently supported patterns.
|
||
|
||
Examples:
|
||
* {lean}`"red green blue".toSlice.startsWith "red" = true`
|
||
* {lean}`"red green blue".toSlice.startsWith "green" = false`
|
||
* {lean}`"red green blue".toSlice.startsWith "" = true`
|
||
* {lean}`"red green blue".toSlice.startsWith 'r' = true`
|
||
* {lean}`"red green blue".toSlice.startsWith Char.isLower = true`
|
||
-/
|
||
@[inline]
|
||
def startsWith [ForwardPattern ρ] (s : Slice) (pat : ρ) : Bool :=
|
||
ForwardPattern.startsWith s pat
|
||
|
||
inductive SplitIterator (ρ : Type) [ToForwardSearcher ρ σ] where
|
||
| operating (s : Slice) (currPos : s.Pos) (searcher : Std.Iter (α := σ s) (SearchStep s))
|
||
| atEnd
|
||
deriving Inhabited
|
||
|
||
namespace SplitIterator
|
||
|
||
variable [ToForwardSearcher ρ σ]
|
||
|
||
instance [Pure m] : Std.Iterators.Iterator (SplitIterator ρ) m Slice where
|
||
IsPlausibleStep := fun _ _ => True
|
||
step := fun ⟨iter⟩ =>
|
||
match iter with
|
||
| .operating s currPos searcher =>
|
||
match Internal.nextMatch searcher with
|
||
| some (searcher, startPos, endPos) =>
|
||
let slice := s.replaceStartEnd! currPos startPos
|
||
let nextIt := ⟨.operating s endPos searcher⟩
|
||
pure ⟨.yield nextIt slice, by simp⟩
|
||
| none =>
|
||
let slice := s.replaceStart currPos
|
||
pure ⟨.yield ⟨.atEnd⟩ slice, by simp⟩
|
||
| .atEnd => pure ⟨.done, by simp⟩
|
||
|
||
-- TODO: Finiteness after we have a notion of lawful searcher
|
||
|
||
instance [Monad m] [Monad n] : Std.Iterators.IteratorCollect (SplitIterator ρ) m n :=
|
||
.defaultImplementation
|
||
|
||
instance [Monad m] [Monad n] : Std.Iterators.IteratorCollectPartial (SplitIterator ρ) m n :=
|
||
.defaultImplementation
|
||
|
||
instance [Monad m] [Monad n] : Std.Iterators.IteratorLoop (SplitIterator ρ) m n :=
|
||
.defaultImplementation
|
||
|
||
instance [Monad m] [Monad n] : Std.Iterators.IteratorLoopPartial (SplitIterator ρ) m n :=
|
||
.defaultImplementation
|
||
|
||
end SplitIterator
|
||
|
||
/--
|
||
Splits a slice at each subslice that matches the pattern {name}`pat`.
|
||
|
||
The subslices that matched the pattern are not included in any of the resulting subslices. If
|
||
multiple subslices in a row match the pattern, the resulting list will contain empty strings.
|
||
|
||
This function is generic over all currently supported patterns.
|
||
|
||
Examples:
|
||
* {lean}`("coffee tea water".toSlice.split Char.isWhitespace).allowNontermination.toList == ["coffee".toSlice, "tea".toSlice, "water".toSlice]`
|
||
* {lean}`("coffee tea water".toSlice.split ' ').allowNontermination.toList == ["coffee".toSlice, "tea".toSlice, "water".toSlice]`
|
||
* {lean}`("coffee tea water".toSlice.split " tea ").allowNontermination.toList == ["coffee".toSlice, "water".toSlice]`
|
||
* {lean}`("ababababa".toSlice.split "aba").allowNontermination.toList == ["coffee".toSlice, "water".toSlice]`
|
||
* {lean}`("baaab".toSlice.split "aa").allowNontermination.toList == ["b".toSlice, "ab".toSlice]`
|
||
-/
|
||
@[specialize pat]
|
||
def split [ToForwardSearcher ρ σ] (s : Slice) (pat : ρ) : Std.Iter (α := SplitIterator ρ) Slice :=
|
||
{ internalState := .operating s s.startPos (ToForwardSearcher.toSearcher s pat) }
|
||
|
||
inductive SplitInclusiveIterator (ρ : Type) [ToForwardSearcher ρ σ] where
|
||
| operating (s : Slice) (currPos : s.Pos) (searcher : Std.Iter (α := σ s) (SearchStep s))
|
||
| atEnd
|
||
deriving Inhabited
|
||
|
||
namespace SplitInclusiveIterator
|
||
|
||
variable [ToForwardSearcher ρ σ]
|
||
|
||
instance [Pure m] : Std.Iterators.Iterator (SplitInclusiveIterator ρ) m Slice where
|
||
IsPlausibleStep := fun _ _ => True
|
||
step := fun ⟨iter⟩ =>
|
||
match iter with
|
||
| .operating s currPos searcher =>
|
||
match Internal.nextMatch searcher with
|
||
| some (searcher, _, endPos) =>
|
||
let slice := s.replaceStartEnd! currPos endPos
|
||
let nextIt := ⟨.operating s endPos searcher⟩
|
||
pure ⟨.yield nextIt slice, by simp⟩
|
||
| none =>
|
||
if currPos != s.endPos then
|
||
let slice := s.replaceStart currPos
|
||
pure ⟨.yield ⟨.atEnd⟩ slice, by simp⟩
|
||
else
|
||
pure ⟨.done, by simp⟩
|
||
| .atEnd => pure ⟨.done, by simp⟩
|
||
|
||
-- TODO: Finiteness after we have a notion of lawful searcher
|
||
|
||
instance [Monad m] [Monad n] :
|
||
Std.Iterators.IteratorCollect (SplitInclusiveIterator ρ) m n :=
|
||
.defaultImplementation
|
||
|
||
instance [Monad m] [Monad n] :
|
||
Std.Iterators.IteratorCollectPartial (SplitInclusiveIterator ρ) m n :=
|
||
.defaultImplementation
|
||
|
||
instance [Monad m] [Monad n] :
|
||
Std.Iterators.IteratorLoop (SplitInclusiveIterator ρ) m n :=
|
||
.defaultImplementation
|
||
|
||
instance [Monad m] [Monad n] :
|
||
Std.Iterators.IteratorLoopPartial (SplitInclusiveIterator ρ) m n :=
|
||
.defaultImplementation
|
||
|
||
end SplitInclusiveIterator
|
||
|
||
/--
|
||
Splits a slice at each subslice that matches the pattern {name}`pat`. Unlike {name}`split` the
|
||
matched subslices are included at the end of each subslice.
|
||
|
||
This function is generic over all currently supported patterns.
|
||
|
||
Examples:
|
||
* {lean}`("coffee tea water".toSlice.splitInclusive Char.isWhitespace).allowNontermination.toList == ["coffee ".toSlice, "tea ".toSlice, "water".toSlice]`
|
||
* {lean}`("coffee tea water".toSlice.splitInclusive ' ').allowNontermination.toList == ["coffee ".toSlice, "tea ".toSlice, "water".toSlice]`
|
||
* {lean}`("coffee tea water".toSlice.splitInclusive " tea ").allowNontermination.toList == ["coffee tea ".toSlice, "water".toSlice]`
|
||
* {lean}`("baaab".toSlice.splitInclusive "aa").allowNontermination.toList == ["baa".toSlice, "ab".toSlice]`
|
||
-/
|
||
@[specialize pat]
|
||
def splitInclusive [ToForwardSearcher ρ σ] (s : Slice) (pat : ρ) :
|
||
Std.Iter (α := SplitInclusiveIterator ρ) Slice :=
|
||
{ internalState := .operating s s.startPos (ToForwardSearcher.toSearcher s pat) }
|
||
|
||
/--
|
||
If {name}`pat` matches a prefix of {name}`s`, returns the remainder. Returns {name}`none` otherwise.
|
||
|
||
Use {name (scope := "Init.Data.String.Slice")}`String.Slice.dropPrefix` to return the slice
|
||
unchanged when {name}`pat` does not match a prefix.
|
||
|
||
This function is generic over all currently supported patterns.
|
||
|
||
Examples:
|
||
* {lean}`"red green blue".toSlice.dropPrefix? "red " == some "green blue".toSlice`
|
||
* {lean}`"red green blue".toSlice.dropPrefix? "reed " == none`
|
||
* {lean}`"red green blue".toSlice.dropPrefix? 'r' == some "ed green blue".toSlice`
|
||
* {lean}`"red green blue".toSlice.dropPrefix? Char.isLower == some "ed green blue".toSlice`
|
||
-/
|
||
@[inline]
|
||
def dropPrefix? [ForwardPattern ρ] (s : Slice) (pat : ρ) : Option Slice :=
|
||
ForwardPattern.dropPrefix? s pat
|
||
|
||
/--
|
||
If {name}`pat` matches a prefix of {name}`s`, returns the remainder. Returns {name}`s` unmodified
|
||
otherwise.
|
||
|
||
Use {name}`String.Slice.dropPrefix?` to return {name}`none` when {name}`pat` does not match a prefix.
|
||
|
||
This function is generic over all currently supported patterns.
|
||
|
||
Examples:
|
||
* {lean}`"red green blue".toSlice.dropPrefix "red " == "green blue".toSlice`
|
||
* {lean}`"red green blue".toSlice.dropPrefix "reed " == "red green blue".toSlice`
|
||
* {lean}`"red green blue".toSlice.dropPrefix 'r' == "ed green blue".toSlice`
|
||
* {lean}`"red green blue".toSlice.dropPrefix Char.isLower == "ed green blue".toSlice`
|
||
-/
|
||
@[specialize pat]
|
||
def dropPrefix [ForwardPattern ρ] (s : Slice) (pat : ρ) : Slice :=
|
||
dropPrefix? s pat |>.getD s
|
||
|
||
/--
|
||
Removes the specified number of characters (Unicode code points) from the start of the slice.
|
||
|
||
If {name}`n` is greater than the amount of characters in {name}`s`, returns an empty slice.
|
||
|
||
Examples:
|
||
* {lean}`"red green blue".toSlice.drop 4 == "green blue".toSlice`
|
||
* {lean}`"red green blue".toSlice.drop 10 == "blue".toSlice`
|
||
* {lean}`"red green blue".toSlice.drop 50 == "".toSlice`
|
||
-/
|
||
@[inline]
|
||
def drop (s : Slice) (n : Nat) : Slice :=
|
||
s.replaceStart (s.startPos.nextn n)
|
||
|
||
/--
|
||
Creates a new slice that contains the longest prefix of {name}`s` for which {name}`pat` matched
|
||
(potentially repeatedly).
|
||
|
||
Examples:
|
||
* {lean}`"red green blue".toSlice.dropWhile Char.isLower == " green blue".toSlice`
|
||
* {lean}`"red green blue".toSlice.dropWhile 'r' == "ed green blue".toSlice`
|
||
* {lean}`"red red green blue".toSlice.dropWhile "red " == "green blue".toSlice`
|
||
* {lean}`"red green blue".toSlice.dropWhile (fun (_ : Char) => true) == "".toSlice`
|
||
-/
|
||
@[inline]
|
||
partial def dropWhile [ForwardPattern ρ] (s : Slice) (pat : ρ) : Slice :=
|
||
go s
|
||
where
|
||
@[specialize pat]
|
||
go (s : Slice) : Slice :=
|
||
if let some nextS := dropPrefix? s pat then
|
||
-- TODO: need lawful patterns to show this terminates
|
||
if s.startInclusive.offset < nextS.startInclusive.offset then
|
||
go nextS
|
||
else
|
||
s
|
||
else
|
||
s
|
||
|
||
/--
|
||
Removes leading whitespace from a slice by moving its start position to the first non-whitespace
|
||
character, or to its end position if there is no non-whitespace character.
|
||
|
||
“Whitespace” is defined as characters for which {name}`Char.isWhitespace` returns {name}`true`.
|
||
|
||
Examples:
|
||
* {lean}`"abc".toSlice.trimAsciiStart == "abc".toSlice`
|
||
* {lean}`" abc".toSlice.trimAsciiStart == "abc".toSlice`
|
||
* {lean}`"abc \t ".toSlice.trimAsciiStart == "abc \t ".toSlice`
|
||
* {lean}`" abc ".toSlice.trimAsciiStart == "abc ".toSlice`
|
||
* {lean}`"abc\ndef\n".toSlice.trimAsciiStart == "abc\ndef\n".toSlice`
|
||
-/
|
||
@[inline]
|
||
def trimAsciiStart (s : Slice) : Slice :=
|
||
-- If we want to optimize this can be pushed further by specialising for ASCII
|
||
dropWhile s Char.isWhitespace
|
||
|
||
/--
|
||
Creates a new slice that contains the first {name}`n` characters (Unicode code points) of {name}`s`.
|
||
|
||
If {name}`n` is greater than the amount of characters in {name}`s`, returns {name}`s`.
|
||
|
||
Examples:
|
||
* {lean}`"red green blue".toSlice.take 3 == "red".toSlice`
|
||
* {lean}`"red green blue".toSlice.take 1 == "r".toSlice`
|
||
* {lean}`"red green blue".toSlice.take 0 == "".toSlice`
|
||
* {lean}`"red green blue".toSlice.take 100 == "red green blue".toSlice`
|
||
-/
|
||
@[inline]
|
||
def take (s : Slice) (n : Nat) : Slice :=
|
||
s.replaceEnd (s.startPos.nextn n)
|
||
|
||
/--
|
||
Creates a new slice that contains the longest prefix of {name}`s` for which {name}`pat` matched
|
||
(potentially repeatedly).
|
||
|
||
This function is generic over all currently supported patterns.
|
||
|
||
Examples:
|
||
* {lean}`"red green blue".toSlice.takeWhile Char.isLower == "red".toSlice`
|
||
* {lean}`"red green blue".toSlice.takeWhile 'r' == "r".toSlice`
|
||
* {lean}`"red red green blue".toSlice.takeWhile "red " == "red red ".toSlice`
|
||
* {lean}`"red green blue".toSlice.takeWhile (fun (_ : Char) => true) == "red green blue".toSlice`
|
||
-/
|
||
@[inline]
|
||
partial def takeWhile [ForwardPattern ρ] (s : Slice) (pat : ρ) : Slice :=
|
||
go s
|
||
where
|
||
@[specialize pat]
|
||
go (curr : Slice) : Slice :=
|
||
if let some nextCurr := dropPrefix? curr pat then
|
||
if curr.startInclusive.offset < nextCurr.startInclusive.offset then
|
||
-- TODO: need lawful patterns to show this terminates
|
||
go nextCurr
|
||
else
|
||
s.replaceEnd <| s.pos! <| curr.startInclusive.offset
|
||
else
|
||
s.replaceEnd <| s.pos! <| curr.startInclusive.offset
|
||
|
||
/--
|
||
Finds the position of the first match of the pattern {name}`pat` in a slice {name}`true`. If there
|
||
is no match {name}`none` is returned.
|
||
|
||
This function is generic over all currently supported patterns.
|
||
|
||
Examples:
|
||
* {lean}`("coffee tea water".toSlice.find? Char.isWhitespace).map (·.get!) == some ' '`
|
||
* {lean}`"tea".toSlice.find? (fun (c : Char) => c == 'X') == none`
|
||
* {lean}`("coffee tea water".toSlice.find? "tea").map (·.get!) == some 't'`
|
||
-/
|
||
@[specialize pat]
|
||
def find? [ToForwardSearcher ρ σ] (s : Slice) (pat : ρ) : Option s.Pos :=
|
||
let searcher := ToForwardSearcher.toSearcher s pat
|
||
match Internal.nextMatch searcher with
|
||
| some (_, startPos, _) => some startPos
|
||
| none => none
|
||
|
||
/--
|
||
Checks whether a slice has a match of the pattern {name}`pat` anywhere.
|
||
|
||
This function is generic over all currently supported patterns.
|
||
|
||
Examples:
|
||
* {lean}`"coffee tea water".toSlice.contains Char.isWhitespace = true`
|
||
* {lean}`"tea".toSlice.contains (fun (c : Char) => c == 'X') = false`
|
||
* {lean}`"coffee tea water".toSlice.contains "tea" = true`
|
||
-/
|
||
@[specialize pat]
|
||
def contains [ToForwardSearcher ρ σ] (s : Slice) (pat : ρ) : Bool :=
|
||
let searcher := ToForwardSearcher.toSearcher s pat
|
||
Internal.nextMatch searcher |>.isSome
|
||
|
||
/--
|
||
Checks whether a slice only consists of matches of the pattern {name}`pat` anywhere.
|
||
|
||
Short-circuits at the first pattern mis-match.
|
||
|
||
This function is generic over all currently supported patterns.
|
||
|
||
Examples:
|
||
* {lean}`"brown".toSlice.all Char.isLower = true`
|
||
* {lean}`"brown and orange".toSlice.all Char.isLower = false`
|
||
* {lean}`"aaaaaa".toSlice.all 'a' = true`
|
||
* {lean}`"aaaaaa".toSlice.all "aa" = true`
|
||
-/
|
||
@[inline]
|
||
def all [ForwardPattern ρ] (s : Slice) (pat : ρ) : Bool :=
|
||
s.dropWhile pat |>.isEmpty
|
||
|
||
end ForwardPatternUsers
|
||
|
||
section BackwardPatternUsers
|
||
|
||
variable {σ : Slice → Type}
|
||
variable [∀ s, Std.Iterators.Iterator (σ s) Id (SearchStep s)]
|
||
variable [∀ s, Std.Iterators.Finite (σ s) Id]
|
||
variable [∀ s, Std.Iterators.IteratorLoop (σ s) Id Id]
|
||
|
||
/--
|
||
Checks whether the slice ({name}`s`) ends with the pattern ({name}`pat`).
|
||
|
||
This function is generic over all currently supported patterns.
|
||
|
||
Examples:
|
||
* {lean}`"red green blue".toSlice.endsWith "blue" = true`
|
||
* {lean}`"red green blue".toSlice.endsWith "green" = false`
|
||
* {lean}`"red green blue".toSlice.endsWith "" = true`
|
||
* {lean}`"red green blue".toSlice.endsWith 'e' = true`
|
||
* {lean}`"red green blue".toSlice.endsWith Char.isLower = true`
|
||
-/
|
||
@[inline]
|
||
def endsWith [BackwardPattern ρ] (s : Slice) (pat : ρ) : Bool :=
|
||
BackwardPattern.endsWith s pat
|
||
|
||
inductive RevSplitIterator (ρ : Type) [ToBackwardSearcher ρ σ] where
|
||
| operating (s : Slice) (currPos : s.Pos) (searcher : Std.Iter (α := σ s) (SearchStep s))
|
||
| atEnd
|
||
deriving Inhabited
|
||
|
||
namespace RevSplitIterator
|
||
|
||
variable [ToBackwardSearcher ρ σ]
|
||
|
||
instance [Pure m] : Std.Iterators.Iterator (RevSplitIterator ρ) m Slice where
|
||
IsPlausibleStep := fun _ _ => True
|
||
step := fun ⟨iter⟩ =>
|
||
match iter with
|
||
| .operating s currPos searcher =>
|
||
match Internal.nextMatch searcher with
|
||
| some (searcher, startPos, endPos) =>
|
||
let slice := s.replaceStartEnd! endPos currPos
|
||
let nextIt := ⟨.operating s startPos searcher⟩
|
||
pure ⟨.yield nextIt slice, by simp⟩
|
||
| none =>
|
||
if currPos ≠ s.startPos then
|
||
let slice := s.replaceEnd currPos
|
||
pure ⟨.yield ⟨.atEnd⟩ slice, by simp⟩
|
||
else
|
||
pure ⟨.done, by simp⟩
|
||
| .atEnd => pure ⟨.done, by simp⟩
|
||
|
||
-- TODO: Finiteness after we have a notion of lawful searcher
|
||
|
||
instance [Monad m] [Monad n] : Std.Iterators.IteratorCollect (RevSplitIterator ρ) m n :=
|
||
.defaultImplementation
|
||
|
||
instance [Monad m] [Monad n] :
|
||
Std.Iterators.IteratorCollectPartial (RevSplitIterator ρ) m n :=
|
||
.defaultImplementation
|
||
|
||
instance [Monad m] [Monad n] : Std.Iterators.IteratorLoop (RevSplitIterator ρ) m n :=
|
||
.defaultImplementation
|
||
|
||
instance [Monad m] [Monad n] : Std.Iterators.IteratorLoopPartial (RevSplitIterator ρ) m n :=
|
||
.defaultImplementation
|
||
|
||
end RevSplitIterator
|
||
|
||
/--
|
||
Splits a slice at each subslice that matches the pattern {name}`pat`, starting from the end of the
|
||
slice and traversing towards the start.
|
||
|
||
The subslices that matched the pattern are not included in any of the resulting subslices. If
|
||
multiple subslices in a row match the pattern, the resulting list will contain empty slices.
|
||
|
||
This function is generic over all currently supported patterns except
|
||
{name}`String`/{name}`String.Slice`.
|
||
|
||
Examples:
|
||
* {lean}`("coffee tea water".toSlice.revSplit Char.isWhitespace).allowNontermination.toList == ["water".toSlice, "tea".toSlice, "coffee".toSlice]`
|
||
* {lean}`("coffee tea water".toSlice.revSplit ' ').allowNontermination.toList == ["water".toSlice, "tea".toSlice, "coffee".toSlice]`
|
||
-/
|
||
@[specialize pat]
|
||
def revSplit [ToBackwardSearcher ρ σ] (s : Slice) (pat : ρ) :
|
||
Std.Iter (α := RevSplitIterator ρ) Slice :=
|
||
{ internalState := .operating s s.endPos (ToBackwardSearcher.toSearcher s pat) }
|
||
|
||
/--
|
||
If {name}`pat` matches a suffix of {name}`s`, returns the remainder. Returns {name}`none` otherwise.
|
||
|
||
Use {name (scope := "Init.Data.String.Slice")}`String.Slice.dropSuffix` to return the slice
|
||
unchanged when {name}`pat` does not match a prefix.
|
||
|
||
This function is generic over all currently supported patterns.
|
||
|
||
Examples:
|
||
* {lean}`"red green blue".toSlice.dropSuffix? " blue" == some "red green".toSlice`
|
||
* {lean}`"red green blue".toSlice.dropSuffix? "bluu " == none`
|
||
* {lean}`"red green blue".toSlice.dropSuffix? 'e' == some "red green blu".toSlice`
|
||
* {lean}`"red green blue".toSlice.dropSuffix? Char.isLower == some "red green blu".toSlice`
|
||
-/
|
||
@[inline]
|
||
def dropSuffix? [BackwardPattern ρ] (s : Slice) (pat : ρ) : Option Slice :=
|
||
BackwardPattern.dropSuffix? s pat
|
||
|
||
/--
|
||
If {name}`pat` matches a suffix of {name}`s`, returns the remainder. Returns {name}`s` unmodified
|
||
otherwise.
|
||
|
||
Use {name}`String.Slice.dropSuffix?` to return {name}`none` when {name}`pat` does not match a
|
||
prefix.
|
||
|
||
This function is generic over all currently supported patterns.
|
||
|
||
Examples:
|
||
* {lean}`"red green blue".toSlice.dropSuffix " blue" == "red green".toSlice`
|
||
* {lean}`"red green blue".toSlice.dropSuffix "bluu " == "red green blue".toSlice`
|
||
* {lean}`"red green blue".toSlice.dropSuffix 'e' == "red green blu".toSlice`
|
||
* {lean}`"red green blue".toSlice.dropSuffix Char.isLower == "red green blu".toSlice`
|
||
-/
|
||
@[specialize pat]
|
||
def dropSuffix [BackwardPattern ρ] (s : Slice) (pat : ρ) : Slice :=
|
||
dropSuffix? s pat |>.getD s
|
||
|
||
/--
|
||
Removes the specified number of characters (Unicode code points) from the end of the slice.
|
||
|
||
If {name}`n` is greater than the amount of characters in {name}`s`, returns an empty slice.
|
||
|
||
Examples:
|
||
* {lean}`"red green blue".toSlice.dropEnd 5 == "red green".toSlice`
|
||
* {lean}`"red green blue".toSlice.dropEnd 11 == "red".toSlice`
|
||
* {lean}`"red green blue".toSlice.dropEnd 50 == "".toSlice`
|
||
-/
|
||
@[inline]
|
||
def dropEnd (s : Slice) (n : Nat) : Slice :=
|
||
s.replaceEnd (s.endPos.prevn n)
|
||
|
||
/--
|
||
Creates a new slice that contains the longest suffix of {name}`s` for which {name}`pat` matched
|
||
(potentially repeatedly).
|
||
|
||
Examples:
|
||
* {lean}`"red green blue".toSlice.dropEndWhile Char.isLower == "red green ".toSlice`
|
||
* {lean}`"red green blue".toSlice.dropEndWhile 'e' == "red green blu".toSlice`
|
||
* {lean}`"red green blue".toSlice.dropEndWhile (fun (_ : Char) => true) == "".toSlice`
|
||
-/
|
||
@[inline]
|
||
partial def dropEndWhile [BackwardPattern ρ] (s : Slice) (pat : ρ) : Slice :=
|
||
go s
|
||
where
|
||
@[specialize pat]
|
||
go (s : Slice) : Slice :=
|
||
if let some nextS := dropSuffix? s pat then
|
||
-- TODO: need lawful patterns to show this terminates
|
||
if nextS.endExclusive.offset < s.endExclusive.offset then
|
||
go nextS
|
||
else
|
||
s
|
||
else
|
||
s
|
||
|
||
/--
|
||
Removes trailing whitespace from a slice by moving its start position to the first non-whitespace
|
||
character, or to its end position if there is no non-whitespace character.
|
||
|
||
“Whitespace” is defined as characters for which {name}`Char.isWhitespace` returns {name}`true`.
|
||
|
||
Examples:
|
||
* {lean}`"abc".toSlice.trimAsciiEnd == "abc".toSlice`
|
||
* {lean}`" abc".toSlice.trimAsciiEnd == " abc".toSlice`
|
||
* {lean}`"abc \t ".toSlice.trimAsciiEnd == "abc".toSlice`
|
||
* {lean}`" abc ".toSlice.trimAsciiEnd == " abc".toSlice`
|
||
* {lean}`"abc\ndef\n".toSlice.trimAsciiEnd == "abc\ndef".toSlice`
|
||
-/
|
||
@[inline]
|
||
def trimAsciiEnd (s : Slice) : Slice :=
|
||
-- If we want to optimize this can be pushed further by specialising for ASCII
|
||
dropEndWhile s Char.isWhitespace
|
||
|
||
/--
|
||
Creates a new slice that contains the last {name}`n` characters (Unicode code points) of {name}`s`.
|
||
|
||
If {name}`n` is greater than the amount of characters in {name}`s`, returns {name}`s`.
|
||
|
||
Examples:
|
||
* {lean}`"red green blue".toSlice.takeEnd 4 == "blue".toSlice`
|
||
* {lean}`"red green blue".toSlice.takeEnd 1 == "e".toSlice`
|
||
* {lean}`"red green blue".toSlice.takeEnd 0 == "".toSlice`
|
||
* {lean}`"red green blue".toSlice.takeEnd 100 == "red green blue".toSlice`
|
||
-/
|
||
@[inline]
|
||
def takeEnd (s : Slice) (n : Nat) : Slice :=
|
||
s.replaceStart (s.endPos.prevn n)
|
||
|
||
/--
|
||
Creates a new slice that contains the suffix prefix of {name}`s` for which {name}`pat` matched
|
||
(potentially repeatedly).
|
||
|
||
This function is generic over all currently supported patterns.
|
||
|
||
Examples:
|
||
* {lean}`"red green blue".toSlice.takeEndWhile Char.isLower == "blue".toSlice`
|
||
* {lean}`"red green blue".toSlice.takeEndWhile 'e' == "e".toSlice`
|
||
* {lean}`"red green blue".toSlice.takeEndWhile (fun (_ : Char) => true) == "red green blue".toSlice`
|
||
-/
|
||
@[inline]
|
||
partial def takeEndWhile [BackwardPattern ρ] (s : Slice) (pat : ρ) : Slice :=
|
||
go s
|
||
where
|
||
@[specialize pat]
|
||
go (curr : Slice) : Slice :=
|
||
if let some nextCurr := dropSuffix? curr pat then
|
||
if nextCurr.endExclusive.offset < curr.endExclusive.offset then
|
||
-- TODO: need lawful patterns to show this terminates
|
||
go nextCurr
|
||
else
|
||
s.replaceStart <| s.pos! <| curr.endExclusive.offset
|
||
else
|
||
s.replaceStart <| s.pos! <| curr.endExclusive.offset
|
||
|
||
/--
|
||
Finds the position of the first match of the pattern {name}`pat` in a slice {name}`true`, starting
|
||
from the end of the slice and traversing towards the start. If there is no match {name}`none` is
|
||
returned.
|
||
|
||
This function is generic over all currently supported patterns except
|
||
{name}`String`/{name}`String.Slice`.
|
||
|
||
Examples:
|
||
* {lean}`("coffee tea water".toSlice.find? Char.isWhitespace).map (·.get!) == some ' '`
|
||
* {lean}`"tea".toSlice.find? (fun (c : Char) => c == 'X') == none`
|
||
* {lean}`("coffee tea water".toSlice.find? "tea").map (·.get!) == some 't'`
|
||
-/
|
||
@[specialize pat]
|
||
def revFind? [ToBackwardSearcher ρ σ] (s : Slice) (pat : ρ) : Option s.Pos :=
|
||
let searcher := ToBackwardSearcher.toSearcher s pat
|
||
match Internal.nextMatch searcher with
|
||
| some (_, startPos, _) => some startPos
|
||
| none => none
|
||
|
||
end BackwardPatternUsers
|
||
|
||
/--
|
||
Removes leading and trailing whitespace from a slice.
|
||
|
||
“Whitespace” is defined as characters for which {name}`Char.isWhitespace` returns {name}`true`.
|
||
|
||
Examples:
|
||
* {lean}`"abc".toSlice.trimAscii == "abc".toSlice`
|
||
* {lean}`" abc".toSlice.trimAscii == "abc".toSlice`
|
||
* {lean}`"abc \t ".toSlice.trimAscii == "abc".toSlice`
|
||
* {lean}`" abc ".toSlice.trimAscii == "abc".toSlice`
|
||
* {lean}`"abc\ndef\n".toSlice.trimAscii == "abc\ndef".toSlice`
|
||
-/
|
||
def trimAscii (s : Slice) : Slice :=
|
||
s.trimAsciiStart.trimAsciiEnd
|
||
|
||
/--
|
||
Checks whether {lean}`s1 == s2` if ASCII upper/lowercase are ignored.
|
||
-/
|
||
def eqIgnoreAsciiCase (s1 s2 : Slice) : Bool :=
|
||
s1.utf8ByteSize == s2.utf8ByteSize && go s1 s1.startPos.offset s2 s2.startPos.offset
|
||
where
|
||
go (s1 : Slice) (s1Curr : String.Pos.Raw) (s2 : Slice) (s2Curr : String.Pos.Raw) : Bool :=
|
||
if h : s1Curr < s1.utf8ByteSize ∧ s2Curr < s2.utf8ByteSize then
|
||
let c1 := (s1.getUTF8Byte s1Curr h.left).toAsciiLower
|
||
let c2 := (s2.getUTF8Byte s2Curr h.right).toAsciiLower
|
||
if c1 == c2 then
|
||
go s1 s1Curr.inc s2 s2Curr.inc
|
||
else
|
||
false
|
||
else
|
||
s1Curr == s1.utf8ByteSize && s2Curr == s2.utf8ByteSize
|
||
termination_by s1.endPos.offset.byteIdx - s1Curr.byteIdx
|
||
decreasing_by
|
||
simp at h ⊢
|
||
omega
|
||
|
||
structure PosIterator (s : Slice) where
|
||
currPos : s.Pos
|
||
deriving Inhabited
|
||
|
||
set_option doc.verso false
|
||
/--
|
||
Creates and iterator over all valid positions within {name}`s`.
|
||
|
||
Examples
|
||
* {lean}`("abc".toSlice.positions.map (fun ⟨p, h⟩ => p.get h) |>.toList) = ['a', 'b', 'c']`
|
||
* {lean}`("abc".toSlice.positions.map (·.val.offset.byteIdx) |>.toList) = [0, 1, 2]`
|
||
* {lean}`("ab∀c".toSlice.positions.map (fun ⟨p, h⟩ => p.get h) |>.toList) = ['a', 'b', '∀', 'c']`
|
||
* {lean}`("ab∀c".toSlice.positions.map (·.val.offset.byteIdx) |>.toList) = [0, 1, 2, 5]`
|
||
-/
|
||
def positions (s : Slice) : Std.Iter (α := PosIterator s) { p : s.Pos // p ≠ s.endPos } :=
|
||
{ internalState := { currPos := s.startPos }}
|
||
|
||
set_option doc.verso true
|
||
|
||
namespace PosIterator
|
||
|
||
instance [Pure m] :
|
||
Std.Iterators.Iterator (PosIterator s) m { p : s.Pos // p ≠ s.endPos } where
|
||
IsPlausibleStep it
|
||
| .yield it' out =>
|
||
∃ h : it.internalState.currPos ≠ s.endPos,
|
||
it'.internalState.currPos = it.internalState.currPos.next h ∧
|
||
it.internalState.currPos = out
|
||
| .skip _ => False
|
||
| .done => it.internalState.currPos = s.endPos
|
||
step := fun ⟨⟨currPos⟩⟩ =>
|
||
if h : currPos = s.endPos then
|
||
pure ⟨.done, by simp [h]⟩
|
||
else
|
||
pure ⟨.yield ⟨⟨currPos.next h⟩⟩ ⟨currPos, h⟩, by simp [h]⟩
|
||
|
||
private def finitenessRelation [Pure m] :
|
||
Std.Iterators.FinitenessRelation (PosIterator s) m where
|
||
rel := InvImage WellFoundedRelation.rel
|
||
(fun it => s.utf8ByteSize.byteIdx - it.internalState.currPos.offset.byteIdx)
|
||
wf := InvImage.wf _ WellFoundedRelation.wf
|
||
subrelation {it it'} h := by
|
||
simp_wf
|
||
obtain ⟨step, h, h'⟩ := h
|
||
cases step
|
||
· cases h
|
||
obtain ⟨h1, h2, _⟩ := h'
|
||
have h3 := Char.utf8Size_pos (it.internalState.currPos.get h1)
|
||
have h4 := it.internalState.currPos.isValidForSlice.le_utf8ByteSize
|
||
simp [Pos.ext_iff, String.Pos.Raw.ext_iff, Pos.Raw.le_iff] at h1 h2 h4
|
||
omega
|
||
· cases h'
|
||
· cases h
|
||
|
||
@[no_expose]
|
||
instance [Pure m] : Std.Iterators.Finite (PosIterator s) m :=
|
||
.of_finitenessRelation finitenessRelation
|
||
|
||
instance [Monad m] [Monad n] : Std.Iterators.IteratorCollect (PosIterator s) m n :=
|
||
.defaultImplementation
|
||
|
||
instance [Monad m] [Monad n] : Std.Iterators.IteratorCollectPartial (PosIterator s) m n :=
|
||
.defaultImplementation
|
||
|
||
instance [Monad m] [Monad n] : Std.Iterators.IteratorLoop (PosIterator s) m n :=
|
||
.defaultImplementation
|
||
|
||
instance [Monad m] [Monad n] : Std.Iterators.IteratorLoopPartial (PosIterator s) m n :=
|
||
.defaultImplementation
|
||
|
||
docs_to_verso positions
|
||
|
||
end PosIterator
|
||
|
||
/--
|
||
Creates and iterator over all characters (Unicode code points) in {name}`s`.
|
||
|
||
Examples:
|
||
* {lean}`"abc".toSlice.chars.toList = ['a', 'b', 'c']`
|
||
* {lean}`"ab∀c".toSlice.chars.toList = ['a', 'b', '∀', 'c']`
|
||
-/
|
||
@[expose, inline]
|
||
def chars (s : Slice) :=
|
||
Std.Iterators.Iter.map (fun ⟨pos, h⟩ => pos.get h) (positions s)
|
||
|
||
structure RevPosIterator (s : Slice) where
|
||
currPos : s.Pos
|
||
deriving Inhabited
|
||
|
||
set_option doc.verso false
|
||
/--
|
||
Creates and iterator over all valid positions within {name}`s`, starting from the last valid
|
||
position and iterating towards the first one.
|
||
|
||
Examples
|
||
* {lean}`("abc".toSlice.revPositions.map (fun ⟨p, h⟩ => p.get h) |>.toList) = ['c', 'b', 'a']`
|
||
* {lean}`("abc".toSlice.revPositions.map (·.val.offset.byteIdx) |>.toList) = [2, 1, 0]`
|
||
* {lean}`("ab∀c".toSlice.revPositions.map (fun ⟨p, h⟩ => p.get h) |>.toList) = ['c', '∀', 'b', 'a']`
|
||
* {lean}`("ab∀c".toSlice.revPositions.map (·.val.offset.byteIdx) |>.toList) = [5, 2, 1, 0]`
|
||
-/
|
||
def revPositions (s : Slice) : Std.Iter (α := RevPosIterator s) { p : s.Pos // p ≠ s.endPos } :=
|
||
{ internalState := { currPos := s.endPos }}
|
||
|
||
set_option doc.verso true
|
||
|
||
namespace RevPosIterator
|
||
|
||
instance [Pure m] :
|
||
Std.Iterators.Iterator (RevPosIterator s) m { p : s.Pos // p ≠ s.endPos } where
|
||
IsPlausibleStep it
|
||
| .yield it' out =>
|
||
∃ h : it.internalState.currPos ≠ s.startPos,
|
||
it'.internalState.currPos = it.internalState.currPos.prev h ∧
|
||
it.internalState.currPos.prev h = out
|
||
| .skip _ => False
|
||
| .done => it.internalState.currPos = s.startPos
|
||
step := fun ⟨⟨currPos⟩⟩ =>
|
||
if h : currPos = s.startPos then
|
||
pure ⟨.done, by simp [h]⟩
|
||
else
|
||
let prevPos := currPos.prev h
|
||
pure ⟨.yield ⟨⟨prevPos⟩⟩ ⟨prevPos, Pos.prev_ne_endPos⟩, by simp [h, prevPos]⟩
|
||
|
||
private def finitenessRelation [Pure m] :
|
||
Std.Iterators.FinitenessRelation (RevPosIterator s) m where
|
||
rel := InvImage WellFoundedRelation.rel
|
||
(fun it => it.internalState.currPos.offset.byteIdx)
|
||
wf := InvImage.wf _ WellFoundedRelation.wf
|
||
subrelation {it it'} h := by
|
||
simp_wf
|
||
obtain ⟨step, h, h'⟩ := h
|
||
cases step
|
||
· cases h
|
||
obtain ⟨h1, h2, _⟩ := h'
|
||
have h3 := Pos.offset_prev_lt_offset (h := h1)
|
||
simp [Pos.ext_iff, String.Pos.Raw.ext_iff] at h2 h3
|
||
omega
|
||
· cases h'
|
||
· cases h
|
||
|
||
@[no_expose]
|
||
instance [Pure m] : Std.Iterators.Finite (RevPosIterator s) m :=
|
||
.of_finitenessRelation finitenessRelation
|
||
|
||
instance [Monad m] [Monad n] : Std.Iterators.IteratorCollect (RevPosIterator s) m n :=
|
||
.defaultImplementation
|
||
|
||
instance [Monad m] [Monad n] :
|
||
Std.Iterators.IteratorCollectPartial (RevPosIterator s) m n :=
|
||
.defaultImplementation
|
||
|
||
instance [Monad m] [Monad n] : Std.Iterators.IteratorLoop (RevPosIterator s) m n :=
|
||
.defaultImplementation
|
||
|
||
instance [Monad m] [Monad n] : Std.Iterators.IteratorLoopPartial (RevPosIterator s) m n :=
|
||
.defaultImplementation
|
||
|
||
docs_to_verso revPositions
|
||
|
||
end RevPosIterator
|
||
|
||
/--
|
||
Creates and iterator over all characters (Unicode code points) in {name}`s`, starting from the end
|
||
of the slice and iterating towards the start.
|
||
|
||
Example:
|
||
* {lean}`"abc".toSlice.revChars.toList = ['c', 'b', 'a']`
|
||
* {lean}`"ab∀c".toSlice.revChars.toList = ['c', '∀', 'b', 'a']`
|
||
-/
|
||
@[expose, inline]
|
||
def revChars (s : Slice) :=
|
||
Std.Iterators.Iter.map (fun ⟨pos, h⟩ => pos.get h) (revPositions s)
|
||
|
||
structure ByteIterator where
|
||
s : Slice
|
||
offset : String.Pos.Raw
|
||
deriving Inhabited
|
||
|
||
set_option doc.verso false
|
||
/--
|
||
Creates and iterator over all bytes in {name}`s`.
|
||
|
||
Examples:
|
||
* {lean}`"abc".toSlice.bytes.toList = [97, 98, 99]`
|
||
* {lean}`"ab∀c".toSlice.bytes.toList = [97, 98, 226, 136, 128, 99]`
|
||
-/
|
||
def bytes (s : Slice) : Std.Iter (α := ByteIterator) UInt8 :=
|
||
{ internalState := { s, offset := s.startPos.offset }}
|
||
|
||
set_option doc.verso true
|
||
|
||
namespace ByteIterator
|
||
|
||
instance [Pure m] : Std.Iterators.Iterator ByteIterator m UInt8 where
|
||
IsPlausibleStep it
|
||
| .yield it' out =>
|
||
∃ h1 : it.internalState.offset < it.internalState.s.utf8ByteSize,
|
||
it.internalState.s = it'.internalState.s ∧
|
||
it'.internalState.offset = it.internalState.offset.inc ∧
|
||
it.internalState.s.getUTF8Byte it.internalState.offset h1 = out
|
||
| .skip _ => False
|
||
| .done => ¬ it.internalState.offset < it.internalState.s.utf8ByteSize
|
||
step := fun ⟨s, offset⟩ =>
|
||
if h : offset < s.utf8ByteSize then
|
||
pure ⟨.yield ⟨s, offset.inc⟩ (s.getUTF8Byte offset h), by simp [h]⟩
|
||
else
|
||
pure ⟨.done, by simp [h]⟩
|
||
|
||
private def finitenessRelation [Pure m] :
|
||
Std.Iterators.FinitenessRelation (ByteIterator) m where
|
||
rel := InvImage WellFoundedRelation.rel
|
||
(fun it => it.internalState.s.utf8ByteSize.byteIdx - it.internalState.offset.byteIdx)
|
||
wf := InvImage.wf _ WellFoundedRelation.wf
|
||
subrelation {it it'} h := by
|
||
simp_wf
|
||
obtain ⟨step, h, h'⟩ := h
|
||
cases step
|
||
· cases h
|
||
obtain ⟨h1, h2, h3, h4⟩ := h'
|
||
clear h4
|
||
generalize it'.internalState.s = s at *
|
||
cases h2
|
||
simp [String.Pos.Raw.ext_iff] at h1 h3
|
||
omega
|
||
· cases h'
|
||
· cases h
|
||
|
||
@[no_expose]
|
||
instance [Pure m] : Std.Iterators.Finite ByteIterator m :=
|
||
.of_finitenessRelation finitenessRelation
|
||
|
||
instance [Monad m] [Monad n] : Std.Iterators.IteratorCollect ByteIterator m n :=
|
||
.defaultImplementation
|
||
|
||
instance [Monad m] [Monad n] : Std.Iterators.IteratorCollectPartial ByteIterator m n :=
|
||
.defaultImplementation
|
||
|
||
instance [Monad m] [Monad n] : Std.Iterators.IteratorLoop ByteIterator m n :=
|
||
.defaultImplementation
|
||
|
||
instance [Monad m] [Monad n] : Std.Iterators.IteratorLoopPartial ByteIterator m n :=
|
||
.defaultImplementation
|
||
|
||
docs_to_verso bytes
|
||
|
||
end ByteIterator
|
||
|
||
structure RevByteIterator where
|
||
s : Slice
|
||
offset : String.Pos.Raw
|
||
hinv : offset ≤ s.utf8ByteSize
|
||
|
||
set_option doc.verso false
|
||
/--
|
||
Creates and iterator over all bytes in {name}`s`, starting from the last one and iterating towards
|
||
the first one.
|
||
|
||
Examples:
|
||
* {lean}`"abc".toSlice.revBytes.toList = [99, 98, 97]`
|
||
* {lean}`"ab∀c".toSlice.revBytes.toList = [99, 128, 136, 226, 98, 97]`
|
||
-/
|
||
def revBytes (s : Slice) : Std.Iter (α := RevByteIterator) UInt8 :=
|
||
{ internalState := { s, offset := s.endPos.offset, hinv := by simp }}
|
||
|
||
set_option doc.verso true
|
||
|
||
instance : Inhabited RevByteIterator where
|
||
default :=
|
||
let s := default
|
||
{ s := s, offset := s.endPos.offset, hinv := by simp}
|
||
|
||
namespace RevByteIterator
|
||
|
||
instance [Pure m] : Std.Iterators.Iterator RevByteIterator m UInt8 where
|
||
IsPlausibleStep it
|
||
| .yield it' out =>
|
||
∃ h1 : it.internalState.offset.dec < it.internalState.s.utf8ByteSize,
|
||
it.internalState.s = it'.internalState.s ∧
|
||
it.internalState.offset ≠ 0 ∧
|
||
it'.internalState.offset = it.internalState.offset.dec ∧
|
||
it.internalState.s.getUTF8Byte it.internalState.offset.dec h1 = out
|
||
| .skip _ => False
|
||
| .done => it.internalState.offset = 0
|
||
step := fun ⟨s, offset, hinv⟩ =>
|
||
if h : offset ≠ 0 then
|
||
let nextOffset := offset.dec
|
||
have hbound := by
|
||
simp [String.Pos.Raw.le_iff, nextOffset] at h hinv ⊢
|
||
omega
|
||
have hinv := by
|
||
simp [String.Pos.Raw.le_iff, nextOffset] at hinv ⊢
|
||
omega
|
||
have hiter := by simp [nextOffset, hbound, h]
|
||
pure ⟨.yield ⟨s, nextOffset, hinv⟩ (s.getUTF8Byte nextOffset hbound), hiter⟩
|
||
else
|
||
pure ⟨.done, by simpa using h⟩
|
||
|
||
private def finitenessRelation [Pure m] :
|
||
Std.Iterators.FinitenessRelation (RevByteIterator) m where
|
||
rel := InvImage WellFoundedRelation.rel
|
||
(fun it => it.internalState.offset.byteIdx)
|
||
wf := InvImage.wf _ WellFoundedRelation.wf
|
||
subrelation {it it'} h := by
|
||
simp_wf
|
||
obtain ⟨step, h, h'⟩ := h
|
||
cases step
|
||
· cases h
|
||
obtain ⟨h1, h2, h3, h4, h5⟩ := h'
|
||
rw [h4]
|
||
simp at h1 h3 ⊢
|
||
omega
|
||
· cases h'
|
||
· cases h
|
||
|
||
@[no_expose]
|
||
instance [Pure m] : Std.Iterators.Finite RevByteIterator m :=
|
||
.of_finitenessRelation finitenessRelation
|
||
|
||
instance [Monad m] [Monad n] : Std.Iterators.IteratorCollect RevByteIterator m n :=
|
||
.defaultImplementation
|
||
|
||
instance [Monad m] [Monad n] : Std.Iterators.IteratorCollectPartial RevByteIterator m n :=
|
||
.defaultImplementation
|
||
|
||
instance [Monad m] [Monad n] : Std.Iterators.IteratorLoop RevByteIterator m n :=
|
||
.defaultImplementation
|
||
|
||
instance [Monad m] [Monad n] : Std.Iterators.IteratorLoopPartial RevByteIterator m n :=
|
||
.defaultImplementation
|
||
|
||
docs_to_verso revBytes
|
||
|
||
end RevByteIterator
|
||
|
||
def lines.lineMap (s : Slice) : Slice :=
|
||
if let some s := s.dropSuffix? '\n' then
|
||
if let some s := s.dropSuffix? '\r' then
|
||
s
|
||
else
|
||
s
|
||
else
|
||
s
|
||
|
||
/--
|
||
Creates an iterator over all lines in {name}`s` with the line ending characters `\r\n` or `\n` being
|
||
stripped.
|
||
|
||
Examples:
|
||
* {lean}`"foo\r\nbar\n\nbaz\n".toSlice.lines.allowNontermination.toList == ["foo".toSlice, "bar".toSlice, "".toSlice, "baz".toSlice]`
|
||
* {lean}`"foo\r\nbar\n\nbaz".toSlice.lines.allowNontermination.toList == ["foo".toSlice, "bar".toSlice, "".toSlice, "baz".toSlice]`
|
||
* {lean}`"foo\r\nbar\n\nbaz\r".toSlice.lines.allowNontermination.toList == ["foo".toSlice, "bar".toSlice, "".toSlice, "baz\r".toSlice]`
|
||
-/
|
||
def lines (s : Slice) :=
|
||
s.splitInclusive '\n' |>.map lines.lineMap
|
||
|
||
/--
|
||
Folds a function over a slice from the start, accumulating a value starting with {name}`init`. The
|
||
accumulated value is combined with each character in order, using {name}`f`.
|
||
|
||
Examples:
|
||
* {lean}`"coffee tea water".toSlice.foldl (fun n c => if c.isWhitespace then n + 1 else n) 0 = 2`
|
||
* {lean}`"coffee tea and water".toSlice.foldl (fun n c => if c.isWhitespace then n + 1 else n) 0 = 3`
|
||
* {lean}`"coffee tea water".toSlice.foldl (·.push ·) "" = "coffee tea water"`
|
||
-/
|
||
@[inline]
|
||
def foldl {α : Type u} (f : α → Char → α) (init : α) (s : Slice) : α :=
|
||
Std.Iterators.Iter.fold f init (chars s)
|
||
|
||
/--
|
||
Folds a function over a slice from the end, accumulating a value starting with {name}`init`. The
|
||
accumulated value is combined with each character in reverse order, using {name}`f`.
|
||
|
||
Examples:
|
||
* {lean}`"coffee tea water".toSlice.foldr (fun c n => if c.isWhitespace then n + 1 else n) 0 = 2`
|
||
* {lean}`"coffee tea and water".toSlice.foldr (fun c n => if c.isWhitespace then n + 1 else n) 0 = 3`
|
||
* {lean}`"coffee tea water".toSlice.foldr (fun c s => s.push c) "" = "retaw aet eeffoc"`
|
||
-/
|
||
@[inline]
|
||
def foldr {α : Type u} (f : Char → α → α) (init : α) (s : Slice) : α :=
|
||
Std.Iterators.Iter.fold (flip f) init (revChars s)
|
||
|
||
/--
|
||
Checks whether the slice can be interpreted as the decimal representation of a natural number.
|
||
|
||
A slice can be interpreted as a decimal natural number if it is not empty and all the characters in
|
||
it are digits.
|
||
|
||
Use {name (scope := "Init.Data.String.Slice")}`toNat?` or
|
||
{name (scope := "Init.Data.String.Slice")}`toNat!` to convert such a slice to a natural number.
|
||
|
||
Examples:
|
||
* {lean}`"".toSlice.isNat = false`
|
||
* {lean}`"0".toSlice.isNat = true`
|
||
* {lean}`"5".toSlice.isNat = true`
|
||
* {lean}`"05".toSlice.isNat = true`
|
||
* {lean}`"587".toSlice.isNat = true`
|
||
* {lean}`"-587".toSlice.isNat = false`
|
||
* {lean}`" 5".toSlice.isNat = false`
|
||
* {lean}`"2+3".toSlice.isNat = false`
|
||
* {lean}`"0xff".toSlice.isNat = false`
|
||
-/
|
||
@[inline]
|
||
def isNat (s : Slice) : Bool :=
|
||
!s.isEmpty && s.all Char.isDigit
|
||
|
||
/--
|
||
Interprets a slice as the decimal representation of a natural number, returning it. Returns
|
||
{name}`none` if the slice does not contain a decimal natural number.
|
||
|
||
A slice can be interpreted as a decimal natural number if it is not empty and all the characters in
|
||
it are digits.
|
||
|
||
Use {name}`isNat` to check whether {name}`toNat?` would return {name}`some`.
|
||
{name (scope := "Init.Data.String.Slice")}`toNat!` is an alternative that panics instead of
|
||
returning {name}`none` when the slice is not a natural number.
|
||
|
||
Examples:
|
||
* {lean}`"".toSlice.toNat? = none`
|
||
* {lean}`"0".toSlice.toNat? = some 0`
|
||
* {lean}`"5".toSlice.toNat? = some 5`
|
||
* {lean}`"587".toSlice.toNat? = some 587`
|
||
* {lean}`"-587".toSlice.toNat? = none`
|
||
* {lean}`" 5".toSlice.toNat? = none`
|
||
* {lean}`"2+3".toSlice.toNat? = none`
|
||
* {lean}`"0xff".toSlice.toNat? = none`
|
||
-/
|
||
def toNat? (s : Slice) : Option Nat :=
|
||
if s.isNat then
|
||
some <| s.foldl (fun n c => n * 10 + (c.toNat - '0'.toNat)) 0
|
||
else
|
||
none
|
||
|
||
/--
|
||
Interprets a slice as the decimal representation of a natural number, returning it. Panics if the
|
||
slice does not contain a decimal natural number.
|
||
|
||
A slice can be interpreted as a decimal natural number if it is not empty and all the characters in
|
||
it are digits.
|
||
|
||
Use {name}`isNat` to check whether {name}`toNat!` would return a value. {name}`toNat?` is a safer
|
||
alternative that returns {name}`none` instead of panicking when the string is not a natural number.
|
||
|
||
Examples:
|
||
* {lean}`"0".toSlice.toNat! = 0`
|
||
* {lean}`"5".toSlice.toNat! = 5`
|
||
* {lean}`"587".toSlice.toNat! = 587`
|
||
-/
|
||
def toNat! (s : Slice) : Nat :=
|
||
if s.isNat then
|
||
s.foldl (fun n c => n * 10 + (c.toNat - '0'.toNat)) 0
|
||
else
|
||
panic! "Nat expected"
|
||
|
||
/--
|
||
Returns the first character in {name}`s`. If {name}`s` is empty, {name}`none`.
|
||
|
||
Examples:
|
||
* {lean}`"abc".toSlice.front? = some 'a'`
|
||
* {lean}`"".toSlice.front? = none`
|
||
-/
|
||
@[inline]
|
||
def front? (s : Slice) : Option Char :=
|
||
s.startPos.get?
|
||
|
||
/--
|
||
Returns the first character in {name}`s`. If {name}`s` is empty, returns {lean}`(default : Char)`.
|
||
|
||
Examples:
|
||
* {lean}`"abc".toSlice.front = 'a'`
|
||
* {lean}`"".toSlice.front = (default : Char)`
|
||
-/
|
||
@[inline]
|
||
def front (s : Slice) : Char :=
|
||
s.front?.getD default
|
||
|
||
/--
|
||
Returns the last character in {name}`s`. If {name}`s` is empty, returns {name}`none`.
|
||
|
||
Examples:
|
||
* {lean}`"abc".toSlice.back? = some 'c'`
|
||
* {lean}`"".toSlice.back? = none`
|
||
-/
|
||
@[inline]
|
||
def back? (s : Slice) : Option Char :=
|
||
s.endPos.prev? |>.bind (·.get?)
|
||
|
||
/--
|
||
Returns the last character in {name}`s`. If {name}`s` is empty, returns {lean}`(default : Char)`.
|
||
|
||
Examples:
|
||
* {lean}`"abc".toSlice.back = 'c'`
|
||
* {lean}`"".toSlice.back = (default : Char)`
|
||
-/
|
||
@[inline]
|
||
def back (s : Slice) : Char :=
|
||
s.back?.getD default
|
||
|
||
end String.Slice
|