From 5fd8c1b94d095cbee4fbbcc885f31a239cdec07e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Henrik=20B=C3=B6ving?= Date: Thu, 25 Sep 2025 14:18:52 +0200 Subject: [PATCH] feat: new String.Slice API (#10514) This PR defines the new `String.Slice` API. Many of the core design principles of the API are taken over from Rust's [string library](https://doc.rust-lang.org/stable/std/string/struct.String.html). --- src/Init/Data/String.lean | 2 + src/Init/Data/String/Pattern.lean | 12 + src/Init/Data/String/Pattern/Basic.lean | 221 ++++ src/Init/Data/String/Pattern/Char.lean | 160 +++ src/Init/Data/String/Pattern/Pred.lean | 162 +++ src/Init/Data/String/Pattern/String.lean | 273 +++++ src/Init/Data/String/Slice.lean | 1201 ++++++++++++++++++++++ src/Init/Data/UInt/Basic.lean | 8 + src/include/lean/lean.h | 3 + src/runtime/object.cpp | 39 + tests/lean/run/string_slice.lean | 206 ++++ 11 files changed, 2287 insertions(+) create mode 100644 src/Init/Data/String/Pattern.lean create mode 100644 src/Init/Data/String/Pattern/Basic.lean create mode 100644 src/Init/Data/String/Pattern/Char.lean create mode 100644 src/Init/Data/String/Pattern/Pred.lean create mode 100644 src/Init/Data/String/Pattern/String.lean create mode 100644 src/Init/Data/String/Slice.lean create mode 100644 tests/lean/run/string_slice.lean diff --git a/src/Init/Data/String.lean b/src/Init/Data/String.lean index c4c104ce89..d509b062c2 100644 --- a/src/Init/Data/String.lean +++ b/src/Init/Data/String.lean @@ -13,5 +13,7 @@ public import Init.Data.String.Extra public import Init.Data.String.Lemmas public import Init.Data.String.Repr public import Init.Data.String.Bootstrap +public import Init.Data.String.Slice +public import Init.Data.String.Pattern public section diff --git a/src/Init/Data/String/Pattern.lean b/src/Init/Data/String/Pattern.lean new file mode 100644 index 0000000000..6c1451b9c6 --- /dev/null +++ b/src/Init/Data/String/Pattern.lean @@ -0,0 +1,12 @@ +/- +Copyright (c) 2025 Lean FRO, LLC. All rights reserved. +Released under Apache 2.0 license as described in the file LICENSE. +Authors: Henrik Böving +-/ +module + +prelude +public import Init.Data.String.Pattern.Basic +public import Init.Data.String.Pattern.Char +public import Init.Data.String.Pattern.String +public import Init.Data.String.Pattern.Pred diff --git a/src/Init/Data/String/Pattern/Basic.lean b/src/Init/Data/String/Pattern/Basic.lean new file mode 100644 index 0000000000..4a9983c1d9 --- /dev/null +++ b/src/Init/Data/String/Pattern/Basic.lean @@ -0,0 +1,221 @@ +/- +Copyright (c) 2025 Lean FRO, LLC. All rights reserved. +Released under Apache 2.0 license as described in the file LICENSE. +Authors: Henrik Böving +-/ +module + +prelude +public import Init.Data.String.Basic +public import Init.Data.Iterators.Basic + +set_option doc.verso true + +/-! +This module defines the notion of patterns which is central to the {name}`String.Slice` and +{name}`String` API. All functions on {name}`String.Slice` and {name}`String` that +"search for something" are polymorphic over a pattern instead of taking just one particular kind +of pattern such as a {name}`Char`. The key components are: +- {name (scope := "Init.Data.String.Pattern.Basic")}`ToForwardSearcher` +- {name (scope := "Init.Data.String.Pattern.Basic")}`ForwardPattern` +- {name (scope := "Init.Data.String.Pattern.Basic")}`ToBackwardSearcher` +- {name (scope := "Init.Data.String.Pattern.Basic")}`SuffixPattern` +-/ + +public section + +namespace String.Slice.Pattern + +/-- +A step taken during the traversal of a {name}`Slice` by a forward or backward searcher. +-/ +inductive SearchStep (s : Slice) where + /-- + The subslice starting at {name}`startPos` and ending at {name}`endPos` did not match the pattern. + -/ + | rejected (startPos endPos : s.Pos) + /-- + The subslice starting at {name}`startPos` and ending at {name}`endPos` did not match the pattern. + -/ + | matched (startPos endPos : s.Pos) +deriving Inhabited + +/-- +Provides a conversion from a pattern to an iterator of {name}`SearchStep` searching for matches of +the pattern from the start towards the end of a {name}`Slice`. +-/ +class ToForwardSearcher (ρ : Type) (σ : outParam (Slice → Type)) where + /-- + Build an iterator of {name}`SearchStep` corresponding to matches of {name}`pat` along the slice + {name}`s`. The {name}`SearchStep`s returned by this iterator must contain ranges that are + adjacent, non-overlapping and cover all of {name}`s`. + -/ + toSearcher : (s : Slice) → (pat : ρ) → Std.Iter (α := σ s) (SearchStep s) + +/-- +Provides simple pattern matching capabilities from the start of a {name}`Slice`. + +While these operations can be implemented on top of {name}`ToForwardSearcher` some patterns allow +for more efficient implementations so this class can be used to specialise for them. If there is no +need to specialise in this fashion +{name (scope := "Init.Data.String.Pattern.Basic")}`ForwardPattern.defaultImplementation` can be used +to automatically derive an instance. +-/ +class ForwardPattern (ρ : Type) where + /-- + Checks whether the slice starts with the pattern. + -/ + startsWith : Slice → ρ → Bool + /-- + Checks whether the slice starts with the pattern, if it does return slice with the prefix removed, + otherwise {name}`none`. + -/ + dropPrefix? : Slice → ρ → Option Slice + +namespace Internal + +@[extern "lean_slice_memcmp"] +def memcmp (lhs rhs : @& Slice) (lstart : @& String.Pos) (rstart : @& String.Pos) + (len : @& String.Pos) (h1 : lstart + len ≤ lhs.utf8ByteSize) + (h2 : rstart + len ≤ rhs.utf8ByteSize) : Bool := + go 0 +where + go (curr : String.Pos) : Bool := + if h : curr < len then + have hl := by + simp [Pos.le_iff] at h h1 ⊢ + omega + have hr := by + simp [Pos.le_iff] at h h2 ⊢ + omega + if lhs.getUtf8Byte (lstart + curr) hl == rhs.getUtf8Byte (rstart + curr) hr then + go curr.inc + else + false + else + true + termination_by len.byteIdx - curr.byteIdx + decreasing_by + simp at h ⊢ + omega + +variable {ρ : Type} {σ : Slice → Type} +variable [∀ s, Std.Iterators.Iterator (σ s) Id (SearchStep s)] +variable [∀ s, Std.Iterators.Finite (σ s) Id] + +/-- +Tries to skip the {name}`searcher` until the next {name}`SearchStep.matched` and return it. If no +match is found until the end returns {name}`none`. +-/ +@[inline] +def nextMatch (searcher : Std.Iter (α := σ s) (SearchStep s)) : + Option (Std.Iter (α := σ s) (SearchStep s) × s.Pos × s.Pos) := + go searcher +where + go [∀ s, Std.Iterators.Finite (σ s) Id] (searcher : Std.Iter (α := σ s) (SearchStep s)) : + Option (Std.Iter (α := σ s) (SearchStep s) × s.Pos × s.Pos) := + match searcher.step with + | .yield it (.matched startPos endPos) _ => some (it, startPos, endPos) + | .yield it (.rejected ..) _ | .skip it .. => go it + | .done .. => none + termination_by Std.Iterators.Iter.finitelyManySteps searcher + +/-- +Tries to skip the {name}`searcher` until the next {name}`SearchStep.rejected` and return it. If no +reject is found until the end returns {name}`none`. +-/ +@[inline] +def nextReject (searcher : Std.Iter (α := σ s) (SearchStep s)) : + Option (Std.Iter (α := σ s) (SearchStep s) × s.Pos × s.Pos) := + go searcher +where + go [∀ s, Std.Iterators.Finite (σ s) Id] (searcher : Std.Iter (α := σ s) (SearchStep s)) : + Option (Std.Iter (α := σ s) (SearchStep s) × s.Pos × s.Pos) := + match searcher.step with + | .yield it (.rejected startPos endPos) _ => some (it, startPos, endPos) + | .yield it (.matched ..) _ | .skip it .. => go it + | .done .. => none + termination_by Std.Iterators.Iter.finitelyManySteps searcher + +end Internal + +namespace ForwardPattern + +variable {ρ : Type} {σ : Slice → Type} +variable [∀ s, Std.Iterators.Iterator (σ s) Id (SearchStep s)] +variable [ToForwardSearcher ρ σ] + +@[specialize pat] +def defaultStartsWith (s : Slice) (pat : ρ) : Bool := + let searcher := ToForwardSearcher.toSearcher s pat + match searcher.step with + | .yield _ (.matched start ..) _ => s.startPos = start + | _ => false + +@[specialize pat] +def defaultDropPrefix? (s : Slice) (pat : ρ) : Option Slice := + let searcher := ToForwardSearcher.toSearcher s pat + match searcher.step with + | .yield _ (.matched _ endPos) _ => some (s.replaceStart endPos) + | _ => none + +@[always_inline, inline] +def defaultImplementation : ForwardPattern ρ where + startsWith := defaultStartsWith + dropPrefix? := defaultDropPrefix? + +end ForwardPattern + +/-- +Provides a conversion from a pattern to an iterator of {name}`SearchStep` searching for matches of +the pattern from the end towards the start of a {name}`Slice`. +-/ +class ToBackwardSearcher (ρ : Type) (σ : outParam (Slice → Type)) where + /-- + Build an iterator of {name}`SearchStep` corresponding to matches of {lean}`pat` along the slice + {name}`s`. The {name}`SearchStep`s returned by this iterator must contain ranges that are + adjacent, non-overlapping and cover all of {name}`s`. + -/ + toSearcher : (s : Slice) → (pat : ρ) → Std.Iter (α := σ s) (SearchStep s) + +/-- +Provides simple pattern matching capabilities from the end of a {name}`Slice`. + +While these operations can be implemented on top of {name}`ToBackwardSearcher` some patterns allow +for more efficient implementations so this class can be used to specialise for them. If there is no +need to specialise in this fashion +{name (scope := "Init.Data.String.Pattern.Basic")}`BackwardPattern.defaultImplementation` can be +used to automatically derive an instance. +-/ +class BackwardPattern (ρ : Type) where + endsWith : Slice → ρ → Bool + dropSuffix? : Slice → ρ → Option Slice + +namespace ToBackwardSearcher + +variable {ρ : Type} {σ : Slice → Type} +variable [∀ s, Std.Iterators.Iterator (σ s) Id (SearchStep s)] +variable [ToBackwardSearcher ρ σ] + +@[specialize pat] +def defaultEndsWith (s : Slice) (pat : ρ) : Bool := + let searcher := ToBackwardSearcher.toSearcher s pat + match searcher.step with + | .yield _ (.matched _ endPos) _ => s.endPos = endPos + | _ => false + +@[specialize pat] +def defaultDropSuffix? (s : Slice) (pat : ρ) : Option Slice := + let searcher := ToBackwardSearcher.toSearcher s pat + match searcher.step with + | .yield _ (.matched startPos _) _ => some (s.replaceEnd startPos) + | _ => none + +@[always_inline, inline] +def defaultImplementation : BackwardPattern ρ where + endsWith := defaultEndsWith + dropSuffix? := defaultDropSuffix? + +end ToBackwardSearcher + +end String.Slice.Pattern diff --git a/src/Init/Data/String/Pattern/Char.lean b/src/Init/Data/String/Pattern/Char.lean new file mode 100644 index 0000000000..11e56ff7a4 --- /dev/null +++ b/src/Init/Data/String/Pattern/Char.lean @@ -0,0 +1,160 @@ +/- +Copyright (c) 2025 Lean FRO, LLC. All rights reserved. +Released under Apache 2.0 license as described in the file LICENSE. +Authors: Henrik Böving +-/ +module + +prelude +public import Init.Data.String.Pattern.Basic +public import Init.Data.Iterators.Internal.Termination +public import Init.Data.Iterators.Consumers.Monadic.Loop + +set_option doc.verso true + +/-! +This module defines the necessary instances to register {name}`Char` with the pattern framework. +-/ + +public section + +namespace String.Slice.Pattern + +structure ForwardCharSearcher (s : Slice) where + currPos : s.Pos + needle : Char +deriving Inhabited + +namespace ForwardCharSearcher + +@[inline] +def iter (s : Slice) (c : Char) : Std.Iter (α := ForwardCharSearcher s) (SearchStep s) := + { internalState := { currPos := s.startPos, needle := c }} + +instance (s : Slice) : Std.Iterators.Iterator (ForwardCharSearcher s) Id (SearchStep s) where + IsPlausibleStep it + | .yield it' out => + it.internalState.needle = it'.internalState.needle ∧ + ∃ h1 : it.internalState.currPos ≠ s.endPos, + it'.internalState.currPos = it.internalState.currPos.next h1 ∧ + match out with + | .matched startPos endPos => + it.internalState.currPos = startPos ∧ + it'.internalState.currPos = endPos ∧ + it.internalState.currPos.get h1 = it.internalState.needle + | .rejected startPos endPos => + it.internalState.currPos = startPos ∧ + it'.internalState.currPos = endPos ∧ + it.internalState.currPos.get h1 ≠ it.internalState.needle + | .skip _ => False + | .done => it.internalState.currPos = s.endPos + step := fun ⟨currPos, needle⟩ => + if h1 : currPos = s.endPos then + pure ⟨.done, by simp [h1]⟩ + else + let nextPos := currPos.next h1 + let nextIt := ⟨nextPos, needle⟩ + if h2 : currPos.get h1 = needle then + pure ⟨.yield nextIt (.matched currPos nextPos), by simp [h1, h2, nextIt, nextPos]⟩ + else + pure ⟨.yield nextIt (.rejected currPos nextPos), by simp [h1, h2, nextIt, nextPos]⟩ + +def finitenessRelation : Std.Iterators.FinitenessRelation (ForwardCharSearcher s) Id where + rel := InvImage WellFoundedRelation.rel + (fun it => s.utf8ByteSize.byteIdx - it.internalState.currPos.offset.byteIdx) + wf := InvImage.wf _ WellFoundedRelation.wf + subrelation {it it'} h := by + simp_wf + obtain ⟨step, h, h'⟩ := h + cases step + · cases h + obtain ⟨_, h1, h2, _⟩ := h' + have h3 := Char.utf8Size_pos (it.internalState.currPos.get h1) + have h4 := it.internalState.currPos.isValidForSlice.le_utf8ByteSize + simp [Pos.ext_iff, String.Pos.ext_iff, Pos.le_iff] at h1 h2 h4 + omega + · cases h' + · cases h + +instance : Std.Iterators.Finite (ForwardCharSearcher s) Id := + .of_finitenessRelation finitenessRelation + +instance : Std.Iterators.IteratorLoop (ForwardCharSearcher s) Id Id := + .defaultImplementation + +instance : ToForwardSearcher Char ForwardCharSearcher where + toSearcher := iter + +instance : ForwardPattern Char := .defaultImplementation + +end ForwardCharSearcher + +structure BackwardCharSearcher (s : Slice) where + currPos : s.Pos + needle : Char +deriving Inhabited + +namespace BackwardCharSearcher + +@[inline] +def iter (s : Slice) (c : Char) : Std.Iter (α := BackwardCharSearcher s) (SearchStep s) := + { internalState := { currPos := s.endPos, needle := c }} + +instance (s : Slice) : Std.Iterators.Iterator (BackwardCharSearcher s) Id (SearchStep s) where + IsPlausibleStep it + | .yield it' out => + it.internalState.needle = it'.internalState.needle ∧ + ∃ h1 : it.internalState.currPos ≠ s.startPos, + it'.internalState.currPos = it.internalState.currPos.prev h1 ∧ + match out with + | .matched startPos endPos => + it.internalState.currPos = endPos ∧ + it'.internalState.currPos = startPos ∧ + (it.internalState.currPos.prev h1).get Pos.prev_ne_endPos = it.internalState.needle + | .rejected startPos endPos => + it.internalState.currPos = endPos ∧ + it'.internalState.currPos = startPos ∧ + (it.internalState.currPos.prev h1).get Pos.prev_ne_endPos ≠ it.internalState.needle + | .skip _ => False + | .done => it.internalState.currPos = s.startPos + step := fun ⟨currPos, needle⟩ => + if h1 : currPos = s.startPos then + pure ⟨.done, by simp [h1]⟩ + else + let nextPos := currPos.prev h1 + let nextIt := ⟨nextPos, needle⟩ + if h2 : nextPos.get Pos.prev_ne_endPos = needle then + pure ⟨.yield nextIt (.matched nextPos currPos), by simp [h1, h2, nextIt, nextPos]⟩ + else + pure ⟨.yield nextIt (.rejected nextPos currPos), by simp [h1, h2, nextIt, nextPos]⟩ + +def finitenessRelation : Std.Iterators.FinitenessRelation (BackwardCharSearcher s) Id where + rel := InvImage WellFoundedRelation.rel + (fun it => it.internalState.currPos.offset.byteIdx) + wf := InvImage.wf _ WellFoundedRelation.wf + subrelation {it it'} h := by + simp_wf + obtain ⟨step, h, h'⟩ := h + cases step + · cases h + obtain ⟨_, h1, h2, _⟩ := h' + have h3 := Pos.offset_prev_lt_offset (h := h1) + simp [Pos.ext_iff, String.Pos.ext_iff] at h2 h3 + omega + · cases h' + · cases h + +instance : Std.Iterators.Finite (BackwardCharSearcher s) Id := + .of_finitenessRelation finitenessRelation + +instance : Std.Iterators.IteratorLoop (BackwardCharSearcher s) Id Id := + .defaultImplementation + +instance : ToBackwardSearcher Char BackwardCharSearcher where + toSearcher := iter + +instance : BackwardPattern Char := ToBackwardSearcher.defaultImplementation + +end BackwardCharSearcher + +end String.Slice.Pattern diff --git a/src/Init/Data/String/Pattern/Pred.lean b/src/Init/Data/String/Pattern/Pred.lean new file mode 100644 index 0000000000..79bbfe37c4 --- /dev/null +++ b/src/Init/Data/String/Pattern/Pred.lean @@ -0,0 +1,162 @@ +/- +Copyright (c) 2025 Lean FRO, LLC. All rights reserved. +Released under Apache 2.0 license as described in the file LICENSE. +Authors: Henrik Böving +-/ +module + +prelude +public import Init.Data.String.Pattern.Basic +public import Init.Data.Iterators.Internal.Termination +public import Init.Data.Iterators.Consumers.Monadic.Loop + +set_option doc.verso true + +/-! +This module defines the necessary instances to register {lean}`Char → Bool` with the pattern +framework. +-/ + +public section + +namespace String.Slice.Pattern + +structure ForwardCharPredSearcher (s : Slice) where + currPos : s.Pos + needle : Char → Bool +deriving Inhabited + +namespace ForwardCharPredSearcher + +@[inline] +def iter (s : Slice) (p : Char → Bool) : Std.Iter (α := ForwardCharPredSearcher s) (SearchStep s) := + { internalState := { currPos := s.startPos, needle := p }} + +instance (s : Slice) : Std.Iterators.Iterator (ForwardCharPredSearcher s) Id (SearchStep s) where + IsPlausibleStep it + | .yield it' out => + it.internalState.needle = it'.internalState.needle ∧ + ∃ h1 : it.internalState.currPos ≠ s.endPos, + it'.internalState.currPos = it.internalState.currPos.next h1 ∧ + match out with + | .matched startPos endPos => + it.internalState.currPos = startPos ∧ + it'.internalState.currPos = endPos ∧ + it.internalState.needle (it.internalState.currPos.get h1) + | .rejected startPos endPos => + it.internalState.currPos = startPos ∧ + it'.internalState.currPos = endPos ∧ + ¬ it.internalState.needle (it.internalState.currPos.get h1) + | .skip _ => False + | .done => it.internalState.currPos = s.endPos + step := fun ⟨currPos, needle⟩ => + if h1 : currPos = s.endPos then + pure ⟨.done, by simp [h1]⟩ + else + let nextPos := currPos.next h1 + let nextIt := ⟨nextPos, needle⟩ + if h2 : needle <| currPos.get h1 then + pure ⟨.yield nextIt (.matched currPos nextPos), by simp [h1, h2, nextPos, nextIt]⟩ + else + pure ⟨.yield nextIt (.rejected currPos nextPos), by simp [h1, h2, nextPos, nextIt]⟩ + + +def finitenessRelation : Std.Iterators.FinitenessRelation (ForwardCharPredSearcher s) Id where + rel := InvImage WellFoundedRelation.rel + (fun it => s.utf8ByteSize.byteIdx - it.internalState.currPos.offset.byteIdx) + wf := InvImage.wf _ WellFoundedRelation.wf + subrelation {it it'} h := by + simp_wf + obtain ⟨step, h, h'⟩ := h + cases step + · cases h + obtain ⟨_, h1, h2, _⟩ := h' + have h3 := Char.utf8Size_pos (it.internalState.currPos.get h1) + have h4 := it.internalState.currPos.isValidForSlice.le_utf8ByteSize + simp [Pos.ext_iff, String.Pos.ext_iff, Pos.le_iff] at h1 h2 h4 + omega + · cases h' + · cases h + +instance : Std.Iterators.Finite (ForwardCharPredSearcher s) Id := + .of_finitenessRelation finitenessRelation + +instance : Std.Iterators.IteratorLoop (ForwardCharPredSearcher s) Id Id := + .defaultImplementation + +instance : ToForwardSearcher (Char → Bool) ForwardCharPredSearcher where + toSearcher := iter + +instance : ForwardPattern (Char → Bool) := .defaultImplementation + +end ForwardCharPredSearcher + +structure BackwardCharPredSearcher (s : Slice) where + currPos : s.Pos + needle : Char → Bool +deriving Inhabited + +namespace BackwardCharPredSearcher + +@[inline] +def iter (s : Slice) (c : Char → Bool) : Std.Iter (α := BackwardCharPredSearcher s) (SearchStep s) := + { internalState := { currPos := s.endPos, needle := c }} + +instance (s : Slice) : Std.Iterators.Iterator (BackwardCharPredSearcher s) Id (SearchStep s) where + IsPlausibleStep it + | .yield it' out => + it.internalState.needle = it'.internalState.needle ∧ + ∃ h1 : it.internalState.currPos ≠ s.startPos, + it'.internalState.currPos = it.internalState.currPos.prev h1 ∧ + match out with + | .matched startPos endPos => + it.internalState.currPos = endPos ∧ + it'.internalState.currPos = startPos ∧ + it.internalState.needle ((it.internalState.currPos.prev h1).get Pos.prev_ne_endPos) + | .rejected startPos endPos => + it.internalState.currPos = endPos ∧ + it'.internalState.currPos = startPos ∧ + ¬ it.internalState.needle ((it.internalState.currPos.prev h1).get Pos.prev_ne_endPos) + | .skip _ => False + | .done => it.internalState.currPos = s.startPos + step := fun ⟨currPos, needle⟩ => + if h1 : currPos = s.startPos then + pure ⟨.done, by simp [h1]⟩ + else + let nextPos := currPos.prev h1 + let nextIt := ⟨nextPos, needle⟩ + if h2 : needle <| nextPos.get Pos.prev_ne_endPos then + pure ⟨.yield nextIt (.matched nextPos currPos), by simp [h1, h2, nextIt, nextPos]⟩ + else + pure ⟨.yield nextIt (.rejected nextPos currPos), by simp [h1, h2, nextIt, nextPos]⟩ + +def finitenessRelation : Std.Iterators.FinitenessRelation (BackwardCharPredSearcher s) Id where + rel := InvImage WellFoundedRelation.rel + (fun it => it.internalState.currPos.offset.byteIdx) + wf := InvImage.wf _ WellFoundedRelation.wf + subrelation {it it'} h := by + simp_wf + obtain ⟨step, h, h'⟩ := h + cases step + · cases h + obtain ⟨_, h1, h2, _⟩ := h' + have h3 := Pos.offset_prev_lt_offset (h := h1) + simp [Pos.ext_iff, String.Pos.ext_iff] at h2 h3 + omega + · cases h' + · cases h + +instance : Std.Iterators.Finite (BackwardCharPredSearcher s) Id := + .of_finitenessRelation finitenessRelation + +instance : Std.Iterators.IteratorLoop (BackwardCharPredSearcher s) Id Id := + .defaultImplementation + +instance : ToBackwardSearcher (Char → Bool) BackwardCharPredSearcher where + toSearcher := iter + +instance : BackwardPattern (Char → Bool) := ToBackwardSearcher.defaultImplementation + +end BackwardCharPredSearcher + +end String.Slice.Pattern diff --git a/src/Init/Data/String/Pattern/String.lean b/src/Init/Data/String/Pattern/String.lean new file mode 100644 index 0000000000..f4dbc11b14 --- /dev/null +++ b/src/Init/Data/String/Pattern/String.lean @@ -0,0 +1,273 @@ +/- +Copyright (c) 2025 Lean FRO, LLC. All rights reserved. +Released under Apache 2.0 license as described in the file LICENSE. +Authors: Henrik Böving +-/ +module + +prelude +public import Init.Data.String.Pattern.Basic +public import Init.Data.Iterators.Internal.Termination +public import Init.Data.Iterators.Consumers.Monadic.Loop + +set_option doc.verso true + +/-! +This module defines the necessary instances to register {name}`String` and {name}`String.Slice` +with the pattern framework. +-/ + +public section + +namespace String.Slice.Pattern + +inductive ForwardSliceSearcher (s : Slice) where + | empty (pos : s.Pos) + | proper (needle : Slice) (table : Array String.Pos) (stackPos : String.Pos) (needlePos : String.Pos) + | atEnd +deriving Inhabited + +namespace ForwardSliceSearcher + +partial def buildTable (pat : Slice) : Array String.Pos := + if pat.utf8ByteSize == 0 then + #[] + else + let arr := Array.emptyWithCapacity pat.utf8ByteSize.byteIdx + let arr := arr.push 0 + go ⟨1⟩ arr +where + go (pos : String.Pos) (table : Array String.Pos) := + if h : pos < pat.utf8ByteSize then + let patByte := pat.getUtf8Byte pos h + let distance := computeDistance table[table.size - 1]! patByte table + let distance := if patByte = pat.getUtf8Byte! distance then distance.inc else distance + go pos.inc (table.push distance) + else + table + + computeDistance (distance : String.Pos) (patByte : UInt8) (table : Array String.Pos) : + String.Pos := + if distance > 0 && patByte != pat.getUtf8Byte! distance then + computeDistance table[distance.byteIdx - 1]! patByte table + else + distance + +@[inline] +def iter (s : Slice) (pat : Slice) : Std.Iter (α := ForwardSliceSearcher s) (SearchStep s) := + if pat.utf8ByteSize == 0 then + { internalState := .empty s.startPos } + else + { internalState := .proper pat (buildTable pat) s.startPos.offset pat.startPos.offset } + +partial def backtrackIfNecessary (pat : Slice) (table : Array String.Pos) (stackByte : UInt8) + (needlePos : String.Pos) : String.Pos := + if needlePos != 0 && stackByte != pat.getUtf8Byte! needlePos then + backtrackIfNecessary pat table stackByte table[needlePos.byteIdx - 1]! + else + needlePos + +instance (s : Slice) : Std.Iterators.Iterator (ForwardSliceSearcher s) Id (SearchStep s) where + IsPlausibleStep it + | .yield it' out => + match it.internalState with + | .empty pos => + (∃ newPos, pos.offset < newPos.offset ∧ it'.internalState = .empty newPos) ∨ + it'.internalState = .atEnd + | .proper needle table stackPos needlePos => + (∃ newStackPos newNeedlePos, + stackPos < newStackPos ∧ + newStackPos ≤ s.utf8ByteSize ∧ + it'.internalState = .proper needle table newStackPos newNeedlePos) ∨ + it'.internalState = .atEnd + | .atEnd => False + | .skip _ => False + | .done => True + step := fun ⟨iter⟩ => + match iter with + | .empty pos => + let res := .matched pos pos + if h : pos ≠ s.endPos then + pure ⟨.yield ⟨.empty (pos.next h)⟩ res, by simp [Char.utf8Size_pos]⟩ + else + pure ⟨.yield ⟨.atEnd⟩ res, by simp⟩ + | .proper needle table stackPos needlePos => + let rec findNext (startPos : String.Pos) + (currStackPos : String.Pos) (needlePos : String.Pos) (h : stackPos ≤ currStackPos) := + if h1 : currStackPos < s.utf8ByteSize then + let stackByte := s.getUtf8Byte currStackPos h1 + let needlePos := backtrackIfNecessary needle table stackByte needlePos + let patByte := needle.getUtf8Byte! needlePos + if stackByte != patByte then + let nextStackPos := s.findNextPos currStackPos h1 |>.offset + let res := .rejected (s.pos! startPos) (s.pos! nextStackPos) + have hiter := by + left + exists nextStackPos + have haux := lt_offset_findNextPos h1 + simp only [pos_lt_eq, proper.injEq, true_and, exists_and_left, exists_eq', and_true, + nextStackPos] + constructor + · simp [String.Pos.le_iff] at h haux ⊢ + omega + · apply Pos.IsValidForSlice.le_utf8ByteSize + apply Pos.isValidForSlice + ⟨.yield ⟨.proper needle table nextStackPos needlePos⟩ res, hiter⟩ + else + let needlePos := needlePos.inc + if needlePos == needle.utf8ByteSize then + let nextStackPos := currStackPos.inc + let res := .matched (s.pos! startPos) (s.pos! nextStackPos) + have hiter := by + left + exists nextStackPos + simp only [pos_lt_eq, Pos.byteIdx_inc, proper.injEq, true_and, exists_and_left, + exists_eq', and_true, nextStackPos] + constructor + · simp [String.Pos.le_iff] at h ⊢ + omega + · simp [String.Pos.le_iff] at h1 ⊢ + omega + ⟨.yield ⟨.proper needle table nextStackPos 0⟩ res, hiter⟩ + else + have hinv := by + simp [String.Pos.le_iff] at h ⊢ + omega + findNext startPos currStackPos.inc needlePos hinv + else + if startPos != s.utf8ByteSize then + let res := .rejected (s.pos! startPos) (s.pos! currStackPos) + ⟨.yield ⟨.atEnd⟩ res, by simp⟩ + else + ⟨.done, by simp⟩ + termination_by s.utf8ByteSize.byteIdx - currStackPos.byteIdx + decreasing_by + simp at h1 ⊢ + omega + + findNext stackPos stackPos needlePos (by simp) + | .atEnd => pure ⟨.done, by simp⟩ + +private def toPair : ForwardSliceSearcher s → (Nat × Nat) + | .empty pos => (1, s.utf8ByteSize.byteIdx - pos.offset.byteIdx) + | .proper _ _ sp _ => (1, s.utf8ByteSize.byteIdx - sp.byteIdx) + | .atEnd => (0, 0) + +private instance : WellFoundedRelation (ForwardSliceSearcher s) where + rel s1 s2 := Prod.Lex (· < ·) (· < ·) s1.toPair s2.toPair + wf := by + apply InvImage.wf + apply (Prod.lex _ _).wf + +private def finitenessRelation : + Std.Iterators.FinitenessRelation (ForwardSliceSearcher s) Id where + rel := InvImage WellFoundedRelation.rel (fun it => it.internalState) + wf := InvImage.wf _ WellFoundedRelation.wf + subrelation {it it'} h := by + simp_wf + obtain ⟨step, h, h'⟩ := h + cases step + · cases h + simp only [Std.Iterators.IterM.IsPlausibleStep, Std.Iterators.Iterator.IsPlausibleStep] at h' + split at h' + · next heq => + rw [heq] + rcases h' with ⟨np, h1', h2'⟩ | h' + · rw [h2'] + apply Prod.Lex.right' + · simp + · have haux := np.isValidForSlice.le_utf8ByteSize + simp [String.Pos.le_iff] at h1' haux ⊢ + omega + · apply Prod.Lex.left + simp [h'] + · next heq => + rw [heq] + rcases h' with ⟨np, sp, h1', h2', h3'⟩ | h' + · rw [h3'] + apply Prod.Lex.right' + · simp + · simp [String.Pos.le_iff] at h1' h2' ⊢ + omega + · apply Prod.Lex.left + simp [h'] + · contradiction + · cases h' + · cases h + +@[no_expose] +instance : Std.Iterators.Finite (ForwardSliceSearcher s) Id := + .of_finitenessRelation finitenessRelation + +instance : Std.Iterators.IteratorLoop (ForwardSliceSearcher s) Id Id := + .defaultImplementation + +instance : ToForwardSearcher Slice ForwardSliceSearcher where + toSearcher := iter + +@[inline] +def startsWith (s : Slice) (pat : Slice) : Bool := + if h : pat.utf8ByteSize ≤ s.utf8ByteSize then + have hs := by + simp [Pos.le_iff] at h ⊢ + omega + have hp := by + simp [Pos.le_iff] + Internal.memcmp s pat s.startPos.offset pat.startPos.offset pat.utf8ByteSize hs hp + else + false + +@[inline] +def dropPrefix? (s : Slice) (pat : Slice) : Option Slice := + if startsWith s pat then + some <| s.replaceStart <| s.pos! <| s.startPos.offset + pat.utf8ByteSize + else + none + +instance : ForwardPattern Slice where + startsWith := startsWith + dropPrefix? := dropPrefix? + +instance : ToForwardSearcher String ForwardSliceSearcher where + toSearcher slice pat := iter slice pat.toSlice + +instance : ForwardPattern String where + startsWith s pat := startsWith s pat.toSlice + dropPrefix? s pat := dropPrefix? s pat.toSlice + +end ForwardSliceSearcher + +namespace BackwardSliceSearcher + +@[inline] +def endsWith (s : Slice) (pat : Slice) : Bool := + if h : pat.utf8ByteSize ≤ s.utf8ByteSize then + let sStart := s.endPos.offset - pat.utf8ByteSize + let patStart := pat.startPos.offset + have hs := by + simp [sStart, Pos.le_iff] at h ⊢ + omega + have hp := by + simp [patStart, Pos.le_iff] at h ⊢ + Internal.memcmp s pat sStart patStart pat.utf8ByteSize hs hp + else + false + +@[inline] +def dropSuffix? (s : Slice) (pat : Slice) : Option Slice := + if endsWith s pat then + some <| s.replaceEnd <| s.pos! <| s.endPos.offset - pat.utf8ByteSize + else + none + +instance : BackwardPattern Slice where + endsWith := endsWith + dropSuffix? := dropSuffix? + +instance : BackwardPattern String where + endsWith s pat := endsWith s pat.toSlice + dropSuffix? s pat := dropSuffix? s pat.toSlice + +end BackwardSliceSearcher + +end String.Slice.Pattern diff --git a/src/Init/Data/String/Slice.lean b/src/Init/Data/String/Slice.lean new file mode 100644 index 0000000000..c4b0ee73cd --- /dev/null +++ b/src/Init/Data/String/Slice.lean @@ -0,0 +1,1201 @@ +/- +Copyright (c) 2025 Lean FRO, LLC. All rights reserved. +Released under Apache 2.0 license as described in the file LICENSE. +Authors: Henrik Böving +-/ +module + +prelude +public import Init.Data.String.Pattern +public import Init.Data.Iterators.Consumers.Monadic.Collect +public import Init.Data.Ord.Basic +import Init.Data.Iterators.Combinators.FilterMap + +set_option doc.verso true + +/-! +This module defines the programming API for {name}`String.Slice`. The API mostly consists of +functionality for searching for various kinds of pattern matches in slices to iterate over them, +provide subslices according to matches etc. The key design principles behind this module are: +- Instead of providing one function per kind of pattern the API is generic over various kinds of + patterns. Thus it only provides e.g. one kind of function for looking for the position of the + first occurence of a pattern. Currently the supported patterns are: + - {name}`Char` + - {lean}`Char → Bool` + - {name}`String` and {name}`String.Slice` (partially: doing non trivial searches backwards is not + supported yet) +- Whenever a slice gets mutated a new slice is returned to allow for repeated chaining of functions + with minimal allocations. If necessary the slice can ultimately be converted back to + {name}`String` using {name}`String.Slice.copy` +- Instead of allocating intermediate collections the operations that iterate over slices in various + ways (characters, positions etc.) return iterators that can be collected into other collections if + necessary. +- When sensible the API provides functionality for searching both in a forward and backward manner +-/ + +public section + +namespace String.Slice + +open Pattern + +/-- +Checks whether a slice is empty. + +Empty slices have {name}`utf8ByteSize` {lean}`0`. + +Examples: + * {lean}`"".toSlice.isEmpty = true` + * {lean}`" ".toSlice.isEmpty = false` +-/ +@[inline] +def isEmpty (s : Slice) : Bool := s.utf8ByteSize == 0 + +/-- +Checks whether {name}`s1` and {name}`s2` represent the same string, even if they are slices of +different base strings or different slices within the same string. + +The implementation is an efficient equivalent of {lean}`s1.copy == s2.copy` +-/ +def beq (s1 s2 : Slice) : Bool := + if h : s1.utf8ByteSize = s2.utf8ByteSize then + have h1 := by simp [h, String.Pos.le_iff] + have h2 := by simp [h, String.Pos.le_iff] + Internal.memcmp s1 s2 s1.startPos.offset s2.startPos.offset s1.utf8ByteSize h1 h2 + else + false + +instance : BEq Slice where + beq := beq + +@[extern "lean_slice_hash"] +opaque hash (s : @& Slice) : UInt64 + +instance : Hashable Slice where + hash := hash + +instance : LT Slice where + lt x y := x.copy < y.copy + +@[extern "lean_slice_dec_lt"] +instance (x y : @& Slice) : Decidable (x < y) := + inferInstanceAs (Decidable (x.copy < y.copy)) + +instance : Ord Slice where + compare x y := compareOfLessAndBEq x y + +instance : LE Slice where + le x y := ¬x < y + +instance : DecidableLE Slice := + fun x y => inferInstanceAs (Decidable (¬x < y)) + +section ForwardPatternUsers + +variable {ρ : Type} {σ : Slice → Type} +variable [∀ s, Std.Iterators.Iterator (σ s) Id (SearchStep s)] +variable [∀ s, Std.Iterators.Finite (σ s) Id] +variable [∀ s, Std.Iterators.IteratorLoop (σ s) Id Id] + +/-- +Checks whether the slice ({name}`s`) begins with the pattern ({name}`pat`). + +This function is generic over all currently supported patterns. + +Examples: + * {lean}`"red green blue".toSlice.startsWith "red" = true` + * {lean}`"red green blue".toSlice.startsWith "green" = false` + * {lean}`"red green blue".toSlice.startsWith "" = true` + * {lean}`"red green blue".toSlice.startsWith 'r' = true` + * {lean}`"red green blue".toSlice.startsWith Char.isLower = true` +-/ +@[inline] +def startsWith [ForwardPattern ρ] (s : Slice) (pat : ρ) : Bool := + ForwardPattern.startsWith s pat + +inductive SplitIterator (ρ : Type) [ToForwardSearcher ρ σ] where + | operating (s : Slice) (currPos : s.Pos) (searcher : Std.Iter (α := σ s) (SearchStep s)) + | atEnd +deriving Inhabited + +namespace SplitIterator + +variable [ToForwardSearcher ρ σ] + +instance [Pure m] : Std.Iterators.Iterator (SplitIterator ρ) m Slice where + IsPlausibleStep := fun _ _ => True + step := fun ⟨iter⟩ => + match iter with + | .operating s currPos searcher => + match Internal.nextMatch searcher with + | some (searcher, startPos, endPos) => + let slice := s.replaceStartEnd! currPos startPos + let nextIt := ⟨.operating s endPos searcher⟩ + pure ⟨.yield nextIt slice, by simp⟩ + | none => + let slice := s.replaceStart currPos + pure ⟨.yield ⟨.atEnd⟩ slice, by simp⟩ + | .atEnd => pure ⟨.done, by simp⟩ + +-- TODO: Finiteness after we have a notion of lawful searcher + +instance [Monad m] [Monad n] : Std.Iterators.IteratorCollect (SplitIterator ρ) m n := + .defaultImplementation + +instance [Monad m] [Monad n] : Std.Iterators.IteratorCollectPartial (SplitIterator ρ) m n := + .defaultImplementation + +instance [Monad m] [Monad n] : Std.Iterators.IteratorLoop (SplitIterator ρ) m n := + .defaultImplementation + +instance [Monad m] [Monad n] : Std.Iterators.IteratorLoopPartial (SplitIterator ρ) m n := + .defaultImplementation + +end SplitIterator + +/-- +Splits a slice at each subslice that matches the pattern {name}`pat`. + +The subslices that matched the pattern are not included in any of the resulting subslices. If +multiple subslices in a row match the pattern, the resulting list will contain empty strings. + +This function is generic over all currently supported patterns. + +Examples: + * {lean}`("coffee tea water".toSlice.split Char.isWhitespace).allowNontermination.toList == ["coffee".toSlice, "tea".toSlice, "water".toSlice]` + * {lean}`("coffee tea water".toSlice.split ' ').allowNontermination.toList == ["coffee".toSlice, "tea".toSlice, "water".toSlice]` + * {lean}`("coffee tea water".toSlice.split " tea ").allowNontermination.toList == ["coffee".toSlice, "water".toSlice]` + * {lean}`("ababababa".toSlice.split "aba").allowNontermination.toList == ["coffee".toSlice, "water".toSlice]` + * {lean}`("baaab".toSlice.split "aa").allowNontermination.toList == ["b".toSlice, "ab".toSlice]` +-/ +@[specialize pat] +def split [ToForwardSearcher ρ σ] (s : Slice) (pat : ρ) : Std.Iter (α := SplitIterator ρ) Slice := + { internalState := .operating s s.startPos (ToForwardSearcher.toSearcher s pat) } + +inductive SplitInclusiveIterator (ρ : Type) [ToForwardSearcher ρ σ] where + | operating (s : Slice) (currPos : s.Pos) (searcher : Std.Iter (α := σ s) (SearchStep s)) + | atEnd +deriving Inhabited + +namespace SplitInclusiveIterator + +variable [ToForwardSearcher ρ σ] + +instance [Pure m] : Std.Iterators.Iterator (SplitInclusiveIterator ρ) m Slice where + IsPlausibleStep := fun _ _ => True + step := fun ⟨iter⟩ => + match iter with + | .operating s currPos searcher => + match Internal.nextMatch searcher with + | some (searcher, _, endPos) => + let slice := s.replaceStartEnd! currPos endPos + let nextIt := ⟨.operating s endPos searcher⟩ + pure ⟨.yield nextIt slice, by simp⟩ + | none => + if currPos != s.endPos then + let slice := s.replaceStart currPos + pure ⟨.yield ⟨.atEnd⟩ slice, by simp⟩ + else + pure ⟨.done, by simp⟩ + | .atEnd => pure ⟨.done, by simp⟩ + +-- TODO: Finiteness after we have a notion of lawful searcher + +instance [Monad m] [Monad n] : + Std.Iterators.IteratorCollect (SplitInclusiveIterator ρ) m n := + .defaultImplementation + +instance [Monad m] [Monad n] : + Std.Iterators.IteratorCollectPartial (SplitInclusiveIterator ρ) m n := + .defaultImplementation + +instance [Monad m] [Monad n] : + Std.Iterators.IteratorLoop (SplitInclusiveIterator ρ) m n := + .defaultImplementation + +instance [Monad m] [Monad n] : + Std.Iterators.IteratorLoopPartial (SplitInclusiveIterator ρ) m n := + .defaultImplementation + +end SplitInclusiveIterator + +/-- +Splits a slice at each subslice that matches the pattern {name}`pat`. Unlike {name}`split` the +matched subslices are included at the end of each subslice. + +This function is generic over all currently supported patterns. + +Examples: + * {lean}`("coffee tea water".toSlice.splitInclusive Char.isWhitespace).allowNontermination.toList == ["coffee ".toSlice, "tea ".toSlice, "water".toSlice]` + * {lean}`("coffee tea water".toSlice.splitInclusive ' ').allowNontermination.toList == ["coffee ".toSlice, "tea ".toSlice, "water".toSlice]` + * {lean}`("coffee tea water".toSlice.splitInclusive " tea ").allowNontermination.toList == ["coffee tea ".toSlice, "water".toSlice]` + * {lean}`("baaab".toSlice.splitInclusive "aa").allowNontermination.toList == ["baa".toSlice, "ab".toSlice]` +-/ +@[specialize pat] +def splitInclusive [ToForwardSearcher ρ σ] (s : Slice) (pat : ρ) : + Std.Iter (α := SplitInclusiveIterator ρ) Slice := + { internalState := .operating s s.startPos (ToForwardSearcher.toSearcher s pat) } + +/-- +If {name}`pat` matches a prefix of {name}`s`, returns the remainder. Returns {name}`none` otherwise. + +Use {name (scope := "Init.Data.String.Slice")}`String.Slice.dropPrefix` to return the slice +unchanged when {name}`pat` does not match a prefix. + +This function is generic over all currently supported patterns. + +Examples: + * {lean}`"red green blue".toSlice.dropPrefix? "red " == some "green blue".toSlice` + * {lean}`"red green blue".toSlice.dropPrefix? "reed " == none` + * {lean}`"red green blue".toSlice.dropPrefix? 'r' == some "ed green blue".toSlice` + * {lean}`"red green blue".toSlice.dropPrefix? Char.isLower == some "ed green blue".toSlice` +-/ +@[inline] +def dropPrefix? [ForwardPattern ρ] (s : Slice) (pat : ρ) : Option Slice := + ForwardPattern.dropPrefix? s pat + +/-- +If {name}`pat` matches a prefix of {name}`s`, returns the remainder. Returns {name}`s` unmodified +otherwise. + +Use {name}`String.Slice.dropPrefix?` to return {name}`none` when {name}`pat` does not match a prefix. + +This function is generic over all currently supported patterns. + +Examples: + * {lean}`"red green blue".toSlice.dropPrefix "red " == "green blue".toSlice` + * {lean}`"red green blue".toSlice.dropPrefix "reed " == "red green blue".toSlice` + * {lean}`"red green blue".toSlice.dropPrefix 'r' == "ed green blue".toSlice` + * {lean}`"red green blue".toSlice.dropPrefix Char.isLower == "ed green blue".toSlice` +-/ +@[specialize pat] +def dropPrefix [ForwardPattern ρ] (s : Slice) (pat : ρ) : Slice := + dropPrefix? s pat |>.getD s + +/-- +Removes the specified number of characters (Unicode code points) from the start of the slice. + +If {name}`n` is greater than the amount of characters in {name}`s`, returns an empty slice. + +Examples: + * {lean}`"red green blue".toSlice.drop 4 == "green blue".toSlice` + * {lean}`"red green blue".toSlice.drop 10 == "blue".toSlice` + * {lean}`"red green blue".toSlice.drop 50 == "".toSlice` +-/ +@[inline] +def drop (s : Slice) (n : Nat) : Slice := + s.replaceStart (s.startPos.nextn n) + +/-- +Creates a new slice that contains the longest prefix of {name}`s` for which {name}`pat` matched +(potentially repeatedly). + +Examples: + * {lean}`"red green blue".toSlice.dropWhile Char.isLower == " green blue".toSlice` + * {lean}`"red green blue".toSlice.dropWhile 'r' == "ed green blue".toSlice` + * {lean}`"red red green blue".toSlice.dropWhile "red " == "green blue".toSlice` + * {lean}`"red green blue".toSlice.dropWhile (fun (_ : Char) => true) == "".toSlice` +-/ +@[inline] +partial def dropWhile [ForwardPattern ρ] (s : Slice) (pat : ρ) : Slice := + go s +where + @[specialize pat] + go (s : Slice) : Slice := + if let some nextS := dropPrefix? s pat then + -- TODO: need lawful patterns to show this terminates + if s.startInclusive.offset < nextS.startInclusive.offset then + go nextS + else + s + else + s + +/-- +Removes leading whitespace from a slice by moving its start position to the first non-whitespace +character, or to its end position if there is no non-whitespace character. + +“Whitespace” is defined as characters for which {name}`Char.isWhitespace` returns {name}`true`. + +Examples: + * {lean}`"abc".toSlice.trimAsciiStart == "abc".toSlice` + * {lean}`" abc".toSlice.trimAsciiStart == "abc".toSlice` + * {lean}`"abc \t ".toSlice.trimAsciiStart == "abc \t ".toSlice` + * {lean}`" abc ".toSlice.trimAsciiStart == "abc ".toSlice` + * {lean}`"abc\ndef\n".toSlice.trimAsciiStart == "abc\ndef\n".toSlice` +-/ +@[inline] +def trimAsciiStart (s : Slice) : Slice := + -- If we want to optimize this can be pushed further by specialising for ASCII + dropWhile s Char.isWhitespace + +/-- +Creates a new slice that contains the first {name}`n` characters (Unicode code points) of {name}`s`. + +If {name}`n` is greater than the amount of characters in {name}`s`, returns {name}`s`. + +Examples: + * {lean}`"red green blue".toSlice.take 3 == "red".toSlice` + * {lean}`"red green blue".toSlice.take 1 == "r".toSlice` + * {lean}`"red green blue".toSlice.take 0 == "".toSlice` + * {lean}`"red green blue".toSlice.take 100 == "red green blue".toSlice` +-/ +@[inline] +def take (s : Slice) (n : Nat) : Slice := + s.replaceEnd (s.startPos.nextn n) + +/-- +Creates a new slice that contains the longest prefix of {name}`s` for which {name}`pat` matched +(potentially repeatedly). + +This function is generic over all currently supported patterns. + +Examples: + * {lean}`"red green blue".toSlice.takeWhile Char.isLower == "red".toSlice` + * {lean}`"red green blue".toSlice.takeWhile 'r' == "r".toSlice` + * {lean}`"red red green blue".toSlice.takeWhile "red " == "red red ".toSlice` + * {lean}`"red green blue".toSlice.takeWhile (fun (_ : Char) => true) == "red green blue".toSlice` +-/ +@[inline] +partial def takeWhile [ForwardPattern ρ] (s : Slice) (pat : ρ) : Slice := + go s +where + @[specialize pat] + go (curr : Slice) : Slice := + if let some nextCurr := dropPrefix? curr pat then + if curr.startInclusive.offset < nextCurr.startInclusive.offset then + -- TODO: need lawful patterns to show this terminates + go nextCurr + else + s.replaceEnd <| s.pos! <| curr.startInclusive.offset + else + s.replaceEnd <| s.pos! <| curr.startInclusive.offset + +/-- +Finds the position of the first match of the pattern {name}`pat` in a slice {name}`true`. If there +is no match {name}`none` is returned. + +This function is generic over all currently supported patterns. + +Examples: + * {lean}`("coffee tea water".toSlice.find? Char.isWhitespace).map (·.get!) == some ' '` + * {lean}`"tea".toSlice.find? (fun (c : Char) => c == 'X') == none` + * {lean}`("coffee tea water".toSlice.find? "tea").map (·.get!) == some 't'` +-/ +@[specialize pat] +def find? [ToForwardSearcher ρ σ] (s : Slice) (pat : ρ) : Option s.Pos := + let searcher := ToForwardSearcher.toSearcher s pat + match Internal.nextMatch searcher with + | some (_, startPos, _) => some startPos + | none => none + +/-- +Checks whether a slice has a match of the pattern {name}`pat` anywhere. + +This function is generic over all currently supported patterns. + +Examples: + * {lean}`"coffee tea water".toSlice.contains Char.isWhitespace = true` + * {lean}`"tea".toSlice.contains (fun (c : Char) => c == 'X') = false` + * {lean}`"coffee tea water".toSlice.contains "tea" = true` +-/ +@[specialize pat] +def contains [ToForwardSearcher ρ σ] (s : Slice) (pat : ρ) : Bool := + let searcher := ToForwardSearcher.toSearcher s pat + Internal.nextMatch searcher |>.isSome + +/-- +Checks whether a slice only consists of matches of the pattern {name}`pat` anywhere. + +Short-circuits at the first pattern mis-match. + +This function is generic over all currently supported patterns. + +Examples: + * {lean}`"brown".toSlice.all Char.isLower = true` + * {lean}`"brown and orange".toSlice.all Char.isLower = false` + * {lean}`"aaaaaa".toSlice.all 'a' = true` + * {lean}`"aaaaaa".toSlice.all "aa" = true` +-/ +@[inline] +def all [ForwardPattern ρ] (s : Slice) (pat : ρ) : Bool := + s.dropWhile pat |>.isEmpty + +end ForwardPatternUsers + +section BackwardPatternUsers + +variable {σ : Slice → Type} +variable [∀ s, Std.Iterators.Iterator (σ s) Id (SearchStep s)] +variable [∀ s, Std.Iterators.Finite (σ s) Id] +variable [∀ s, Std.Iterators.IteratorLoop (σ s) Id Id] + +/-- +Checks whether the slice ({name}`s`) ends with the pattern ({name}`pat`). + +This function is generic over all currently supported patterns. + +Examples: + * {lean}`"red green blue".toSlice.endsWith "blue" = true` + * {lean}`"red green blue".toSlice.endsWith "green" = false` + * {lean}`"red green blue".toSlice.endsWith "" = true` + * {lean}`"red green blue".toSlice.endsWith 'e' = true` + * {lean}`"red green blue".toSlice.endsWith Char.isLower = true` +-/ +@[inline] +def endsWith [BackwardPattern ρ] (s : Slice) (pat : ρ) : Bool := + BackwardPattern.endsWith s pat + +inductive RevSplitIterator (ρ : Type) [ToBackwardSearcher ρ σ] where + | operating (s : Slice) (currPos : s.Pos) (searcher : Std.Iter (α := σ s) (SearchStep s)) + | atEnd +deriving Inhabited + +namespace RevSplitIterator + +variable [ToBackwardSearcher ρ σ] + +instance [Pure m] : Std.Iterators.Iterator (RevSplitIterator ρ) m Slice where + IsPlausibleStep := fun _ _ => True + step := fun ⟨iter⟩ => + match iter with + | .operating s currPos searcher => + match Internal.nextMatch searcher with + | some (searcher, startPos, endPos) => + let slice := s.replaceStartEnd! endPos currPos + let nextIt := ⟨.operating s startPos searcher⟩ + pure ⟨.yield nextIt slice, by simp⟩ + | none => + if currPos ≠ s.startPos then + let slice := s.replaceEnd currPos + pure ⟨.yield ⟨.atEnd⟩ slice, by simp⟩ + else + pure ⟨.done, by simp⟩ + | .atEnd => pure ⟨.done, by simp⟩ + +-- TODO: Finiteness after we have a notion of lawful searcher + +instance [Monad m] [Monad n] : Std.Iterators.IteratorCollect (RevSplitIterator ρ) m n := + .defaultImplementation + +instance [Monad m] [Monad n] : + Std.Iterators.IteratorCollectPartial (RevSplitIterator ρ) m n := + .defaultImplementation + +instance [Monad m] [Monad n] : Std.Iterators.IteratorLoop (RevSplitIterator ρ) m n := + .defaultImplementation + +instance [Monad m] [Monad n] : Std.Iterators.IteratorLoopPartial (RevSplitIterator ρ) m n := + .defaultImplementation + +end RevSplitIterator + +/-- +Splits a slice at each subslice that matches the pattern {name}`pat`, starting from the end of the +slice and traversing towards the start. + +The subslices that matched the pattern are not included in any of the resulting subslices. If +multiple subslices in a row match the pattern, the resulting list will contain empty slices. + +This function is generic over all currently supported patterns except +{name}`String`/{name}`String.Slice`. + +Examples: + * {lean}`("coffee tea water".toSlice.revSplit Char.isWhitespace).allowNontermination.toList == ["water".toSlice, "tea".toSlice, "coffee".toSlice]` + * {lean}`("coffee tea water".toSlice.revSplit ' ').allowNontermination.toList == ["water".toSlice, "tea".toSlice, "coffee".toSlice]` +-/ +@[specialize pat] +def revSplit [ToBackwardSearcher ρ σ] (s : Slice) (pat : ρ) : + Std.Iter (α := RevSplitIterator ρ) Slice := + { internalState := .operating s s.endPos (ToBackwardSearcher.toSearcher s pat) } + +/-- +If {name}`pat` matches a suffix of {name}`s`, returns the remainder. Returns {name}`none` otherwise. + +Use {name (scope := "Init.Data.String.Slice")}`String.Slice.dropSuffix` to return the slice +unchanged when {name}`pat` does not match a prefix. + +This function is generic over all currently supported patterns. + +Examples: + * {lean}`"red green blue".toSlice.dropSuffix? " blue" == some "red green".toSlice` + * {lean}`"red green blue".toSlice.dropSuffix? "bluu " == none` + * {lean}`"red green blue".toSlice.dropSuffix? 'e' == some "red green blu".toSlice` + * {lean}`"red green blue".toSlice.dropSuffix? Char.isLower == some "red green blu".toSlice` +-/ +@[inline] +def dropSuffix? [BackwardPattern ρ] (s : Slice) (pat : ρ) : Option Slice := + BackwardPattern.dropSuffix? s pat + +/-- +If {name}`pat` matches a suffix of {name}`s`, returns the remainder. Returns {name}`s` unmodified +otherwise. + +Use {name}`String.Slice.dropSuffix?` to return {name}`none` when {name}`pat` does not match a +prefix. + +This function is generic over all currently supported patterns. + +Examples: + * {lean}`"red green blue".toSlice.dropSuffix " blue" == "red green".toSlice` + * {lean}`"red green blue".toSlice.dropSuffix "bluu " == "red green blue".toSlice` + * {lean}`"red green blue".toSlice.dropSuffix 'e' == "red green blu".toSlice` + * {lean}`"red green blue".toSlice.dropSuffix Char.isLower == "red green blu".toSlice` +-/ +@[specialize pat] +def dropSuffix [BackwardPattern ρ] (s : Slice) (pat : ρ) : Slice := + dropSuffix? s pat |>.getD s + +/-- +Removes the specified number of characters (Unicode code points) from the end of the slice. + +If {name}`n` is greater than the amount of characters in {name}`s`, returns an empty slice. + +Examples: + * {lean}`"red green blue".toSlice.dropEnd 5 == "red green".toSlice` + * {lean}`"red green blue".toSlice.dropEnd 11 == "red".toSlice` + * {lean}`"red green blue".toSlice.dropEnd 50 == "".toSlice` +-/ +@[inline] +def dropEnd (s : Slice) (n : Nat) : Slice := + s.replaceEnd (s.endPos.prevn n) + +/-- +Creates a new slice that contains the longest suffix of {name}`s` for which {name}`pat` matched +(potentially repeatedly). + +Examples: + * {lean}`"red green blue".toSlice.dropEndWhile Char.isLower == "red green ".toSlice` + * {lean}`"red green blue".toSlice.dropEndWhile 'e' == "red green blu".toSlice` + * {lean}`"red green blue".toSlice.dropEndWhile (fun (_ : Char) => true) == "".toSlice` +-/ +@[inline] +partial def dropEndWhile [BackwardPattern ρ] (s : Slice) (pat : ρ) : Slice := + go s +where + @[specialize pat] + go (s : Slice) : Slice := + if let some nextS := dropSuffix? s pat then + -- TODO: need lawful patterns to show this terminates + if nextS.endExclusive.offset < s.endExclusive.offset then + go nextS + else + s + else + s + +/-- +Removes trailing whitespace from a slice by moving its start position to the first non-whitespace +character, or to its end position if there is no non-whitespace character. + +“Whitespace” is defined as characters for which {name}`Char.isWhitespace` returns {name}`true`. + +Examples: + * {lean}`"abc".toSlice.trimAsciiEnd == "abc".toSlice` + * {lean}`" abc".toSlice.trimAsciiEnd == " abc".toSlice` + * {lean}`"abc \t ".toSlice.trimAsciiEnd == "abc".toSlice` + * {lean}`" abc ".toSlice.trimAsciiEnd == " abc".toSlice` + * {lean}`"abc\ndef\n".toSlice.trimAsciiEnd == "abc\ndef".toSlice` +-/ +@[inline] +def trimAsciiEnd (s : Slice) : Slice := + -- If we want to optimize this can be pushed further by specialising for ASCII + dropEndWhile s Char.isWhitespace + +/-- +Creates a new slice that contains the last {name}`n` characters (Unicode code points) of {name}`s`. + +If {name}`n` is greater than the amount of characters in {name}`s`, returns {name}`s`. + +Examples: + * {lean}`"red green blue".toSlice.takeEnd 4 == "blue".toSlice` + * {lean}`"red green blue".toSlice.takeEnd 1 == "e".toSlice` + * {lean}`"red green blue".toSlice.takeEnd 0 == "".toSlice` + * {lean}`"red green blue".toSlice.takeEnd 100 == "red green blue".toSlice` +-/ +@[inline] +def takeEnd (s : Slice) (n : Nat) : Slice := + s.replaceStart (s.endPos.prevn n) + +/-- +Creates a new slice that contains the suffix prefix of {name}`s` for which {name}`pat` matched +(potentially repeatedly). + +This function is generic over all currently supported patterns. + +Examples: + * {lean}`"red green blue".toSlice.takeEndWhile Char.isLower == "blue".toSlice` + * {lean}`"red green blue".toSlice.takeEndWhile 'e' == "e".toSlice` + * {lean}`"red green blue".toSlice.takeEndWhile (fun (_ : Char) => true) == "red green blue".toSlice` +-/ +@[inline] +partial def takeEndWhile [BackwardPattern ρ] (s : Slice) (pat : ρ) : Slice := + go s +where + @[specialize pat] + go (curr : Slice) : Slice := + if let some nextCurr := dropSuffix? curr pat then + if nextCurr.endExclusive.offset < curr.endExclusive.offset then + -- TODO: need lawful patterns to show this terminates + go nextCurr + else + s.replaceStart <| s.pos! <| curr.endExclusive.offset + else + s.replaceStart <| s.pos! <| curr.endExclusive.offset + +/-- +Finds the position of the first match of the pattern {name}`pat` in a slice {name}`true`, starting +from the end of the slice and traversing towards the start. If there is no match {name}`none` is +returned. + +This function is generic over all currently supported patterns except +{name}`String`/{name}`String.Slice`. + +Examples: + * {lean}`("coffee tea water".toSlice.find? Char.isWhitespace).map (·.get!) == some ' '` + * {lean}`"tea".toSlice.find? (fun (c : Char) => c == 'X') == none` + * {lean}`("coffee tea water".toSlice.find? "tea").map (·.get!) == some 't'` +-/ +@[specialize pat] +def revFind? [ToBackwardSearcher ρ σ] (s : Slice) (pat : ρ) : Option s.Pos := + let searcher := ToBackwardSearcher.toSearcher s pat + match Internal.nextMatch searcher with + | some (_, startPos, _) => some startPos + | none => none + +end BackwardPatternUsers + +/-- +Removes leading and trailing whitespace from a slice. + +“Whitespace” is defined as characters for which {name}`Char.isWhitespace` returns {name}`true`. + +Examples: + * {lean}`"abc".toSlice.trimAscii == "abc".toSlice` + * {lean}`" abc".toSlice.trimAscii == "abc".toSlice` + * {lean}`"abc \t ".toSlice.trimAscii == "abc".toSlice` + * {lean}`" abc ".toSlice.trimAscii == "abc".toSlice` + * {lean}`"abc\ndef\n".toSlice.trimAscii == "abc\ndef".toSlice` +-/ +def trimAscii (s : Slice) : Slice := + s.trimAsciiStart.trimAsciiEnd + +/-- +Checks whether {lean}`s1 == s2` if ASCII upper/lowercase are ignored. +-/ +def eqIgnoreAsciiCase (s1 s2 : Slice) : Bool := + s1.utf8ByteSize == s2.utf8ByteSize && go s1 s1.startPos.offset s2 s2.startPos.offset +where + go (s1 : Slice) (s1Curr : String.Pos) (s2 : Slice) (s2Curr : String.Pos) : Bool := + if h : s1Curr < s1.utf8ByteSize ∧ s2Curr < s2.utf8ByteSize then + let c1 := (s1.getUtf8Byte s1Curr h.left).toAsciiLower + let c2 := (s2.getUtf8Byte s2Curr h.right).toAsciiLower + if c1 == c2 then + go s1 s1Curr.inc s2 s2Curr.inc + else + false + else + s1Curr == s1.utf8ByteSize && s2Curr == s2.utf8ByteSize + termination_by s1.endPos.offset.byteIdx - s1Curr.byteIdx + decreasing_by + simp at h ⊢ + omega + +structure PosIterator (s : Slice) where + currPos : s.Pos +deriving Inhabited + +set_option doc.verso false +/-- +Creates and iterator over all valid positions within {name}`s`. + +Examples + * {lean}`("abc".toSlice.positions.map (fun ⟨p, h⟩ => p.get h) |>.toList) = ['a', 'b', 'c']` + * {lean}`("abc".toSlice.positions.map (·.val.offset.byteIdx) |>.toList) = [0, 1, 2]` + * {lean}`("ab∀c".toSlice.positions.map (fun ⟨p, h⟩ => p.get h) |>.toList) = ['a', 'b', '∀', 'c']` + * {lean}`("ab∀c".toSlice.positions.map (·.val.offset.byteIdx) |>.toList) = [0, 1, 2, 5]` +-/ +def positions (s : Slice) : Std.Iter (α := PosIterator s) { p : s.Pos // p ≠ s.endPos } := + { internalState := { currPos := s.startPos }} + +set_option doc.verso true + +namespace PosIterator + +instance [Pure m] : + Std.Iterators.Iterator (PosIterator s) m { p : s.Pos // p ≠ s.endPos } where + IsPlausibleStep it + | .yield it' out => + ∃ h : it.internalState.currPos ≠ s.endPos, + it'.internalState.currPos = it.internalState.currPos.next h ∧ + it.internalState.currPos = out + | .skip _ => False + | .done => it.internalState.currPos = s.endPos + step := fun ⟨⟨currPos⟩⟩ => + if h : currPos = s.endPos then + pure ⟨.done, by simp [h]⟩ + else + pure ⟨.yield ⟨⟨currPos.next h⟩⟩ ⟨currPos, h⟩, by simp [h]⟩ + +private def finitenessRelation [Pure m] : + Std.Iterators.FinitenessRelation (PosIterator s) m where + rel := InvImage WellFoundedRelation.rel + (fun it => s.utf8ByteSize.byteIdx - it.internalState.currPos.offset.byteIdx) + wf := InvImage.wf _ WellFoundedRelation.wf + subrelation {it it'} h := by + simp_wf + obtain ⟨step, h, h'⟩ := h + cases step + · cases h + obtain ⟨h1, h2, _⟩ := h' + have h3 := Char.utf8Size_pos (it.internalState.currPos.get h1) + have h4 := it.internalState.currPos.isValidForSlice.le_utf8ByteSize + simp [Pos.ext_iff, String.Pos.ext_iff, Pos.le_iff] at h1 h2 h4 + omega + · cases h' + · cases h + +@[no_expose] +instance [Pure m] : Std.Iterators.Finite (PosIterator s) m := + .of_finitenessRelation finitenessRelation + +instance [Monad m] [Monad n] : Std.Iterators.IteratorCollect (PosIterator s) m n := + .defaultImplementation + +instance [Monad m] [Monad n] : Std.Iterators.IteratorCollectPartial (PosIterator s) m n := + .defaultImplementation + +instance [Monad m] [Monad n] : Std.Iterators.IteratorLoop (PosIterator s) m n := + .defaultImplementation + +instance [Monad m] [Monad n] : Std.Iterators.IteratorLoopPartial (PosIterator s) m n := + .defaultImplementation + +docs_to_verso positions + +end PosIterator + +/-- +Creates and iterator over all characters (Unicode code points) in {name}`s`. + +Examples: + * {lean}`"abc".toSlice.chars.toList = ['a', 'b', 'c']` + * {lean}`"ab∀c".toSlice.chars.toList = ['a', 'b', '∀', 'c']` +-/ +@[expose, inline] +def chars (s : Slice) := + Std.Iterators.Iter.map (fun ⟨pos, h⟩ => pos.get h) (positions s) + +structure RevPosIterator (s : Slice) where + currPos : s.Pos +deriving Inhabited + +set_option doc.verso false +/-- +Creates and iterator over all valid positions within {name}`s`, starting from the last valid +position and iterating towards the first one. + +Examples + * {lean}`("abc".toSlice.revPositions.map (fun ⟨p, h⟩ => p.get h) |>.toList) = ['c', 'b', 'a']` + * {lean}`("abc".toSlice.revPositions.map (·.val.offset.byteIdx) |>.toList) = [2, 1, 0]` + * {lean}`("ab∀c".toSlice.revPositions.map (fun ⟨p, h⟩ => p.get h) |>.toList) = ['c', '∀', 'b', 'a']` + * {lean}`("ab∀c".toSlice.revPositions.map (·.val.offset.byteIdx) |>.toList) = [5, 2, 1, 0]` +-/ +def revPositions (s : Slice) : Std.Iter (α := RevPosIterator s) { p : s.Pos // p ≠ s.endPos } := + { internalState := { currPos := s.endPos }} + +set_option doc.verso true + +namespace RevPosIterator + +instance [Pure m] : + Std.Iterators.Iterator (RevPosIterator s) m { p : s.Pos // p ≠ s.endPos } where + IsPlausibleStep it + | .yield it' out => + ∃ h : it.internalState.currPos ≠ s.startPos, + it'.internalState.currPos = it.internalState.currPos.prev h ∧ + it.internalState.currPos.prev h = out + | .skip _ => False + | .done => it.internalState.currPos = s.startPos + step := fun ⟨⟨currPos⟩⟩ => + if h : currPos = s.startPos then + pure ⟨.done, by simp [h]⟩ + else + let prevPos := currPos.prev h + pure ⟨.yield ⟨⟨prevPos⟩⟩ ⟨prevPos, Pos.prev_ne_endPos⟩, by simp [h, prevPos]⟩ + +private def finitenessRelation [Pure m] : + Std.Iterators.FinitenessRelation (RevPosIterator s) m where + rel := InvImage WellFoundedRelation.rel + (fun it => it.internalState.currPos.offset.byteIdx) + wf := InvImage.wf _ WellFoundedRelation.wf + subrelation {it it'} h := by + simp_wf + obtain ⟨step, h, h'⟩ := h + cases step + · cases h + obtain ⟨h1, h2, _⟩ := h' + have h3 := Pos.offset_prev_lt_offset (h := h1) + simp [Pos.ext_iff, String.Pos.ext_iff] at h2 h3 + omega + · cases h' + · cases h + +@[no_expose] +instance [Pure m] : Std.Iterators.Finite (RevPosIterator s) m := + .of_finitenessRelation finitenessRelation + +instance [Monad m] [Monad n] : Std.Iterators.IteratorCollect (RevPosIterator s) m n := + .defaultImplementation + +instance [Monad m] [Monad n] : + Std.Iterators.IteratorCollectPartial (RevPosIterator s) m n := + .defaultImplementation + +instance [Monad m] [Monad n] : Std.Iterators.IteratorLoop (RevPosIterator s) m n := + .defaultImplementation + +instance [Monad m] [Monad n] : Std.Iterators.IteratorLoopPartial (RevPosIterator s) m n := + .defaultImplementation + +docs_to_verso revPositions + +end RevPosIterator + +/-- +Creates and iterator over all characters (Unicode code points) in {name}`s`, starting from the end +of the slice and iterating towards the start. + +Example: + * {lean}`"abc".toSlice.revChars.toList = ['c', 'b', 'a']` + * {lean}`"ab∀c".toSlice.revChars.toList = ['c', '∀', 'b', 'a']` +-/ +@[expose, inline] +def revChars (s : Slice) := + Std.Iterators.Iter.map (fun ⟨pos, h⟩ => pos.get h) (revPositions s) + +structure ByteIterator where + s : Slice + offset : String.Pos +deriving Inhabited + +set_option doc.verso false +/-- +Creates and iterator over all bytes in {name}`s`. + +Examples: + * {lean}`"abc".toSlice.bytes.toList = [97, 98, 99]` + * {lean}`"ab∀c".toSlice.bytes.toList = [97, 98, 226, 136, 128, 99]` +-/ +def bytes (s : Slice) : Std.Iter (α := ByteIterator) UInt8 := + { internalState := { s, offset := s.startPos.offset }} + +set_option doc.verso true + +namespace ByteIterator + +instance [Pure m] : Std.Iterators.Iterator ByteIterator m UInt8 where + IsPlausibleStep it + | .yield it' out => + ∃ h1 : it.internalState.offset < it.internalState.s.utf8ByteSize, + it.internalState.s = it'.internalState.s ∧ + it'.internalState.offset = it.internalState.offset.inc ∧ + it.internalState.s.getUtf8Byte it.internalState.offset h1 = out + | .skip _ => False + | .done => ¬ it.internalState.offset < it.internalState.s.utf8ByteSize + step := fun ⟨s, offset⟩ => + if h : offset < s.utf8ByteSize then + pure ⟨.yield ⟨s, offset.inc⟩ (s.getUtf8Byte offset h), by simp [h]⟩ + else + pure ⟨.done, by simp [h]⟩ + +private def finitenessRelation [Pure m] : + Std.Iterators.FinitenessRelation (ByteIterator) m where + rel := InvImage WellFoundedRelation.rel + (fun it => it.internalState.s.utf8ByteSize.byteIdx - it.internalState.offset.byteIdx) + wf := InvImage.wf _ WellFoundedRelation.wf + subrelation {it it'} h := by + simp_wf + obtain ⟨step, h, h'⟩ := h + cases step + · cases h + obtain ⟨h1, h2, h3, h4⟩ := h' + clear h4 + generalize it'.internalState.s = s at * + cases h2 + simp [String.Pos.ext_iff] at h1 h3 + omega + · cases h' + · cases h + +@[no_expose] +instance [Pure m] : Std.Iterators.Finite ByteIterator m := + .of_finitenessRelation finitenessRelation + +instance [Monad m] [Monad n] : Std.Iterators.IteratorCollect ByteIterator m n := + .defaultImplementation + +instance [Monad m] [Monad n] : Std.Iterators.IteratorCollectPartial ByteIterator m n := + .defaultImplementation + +instance [Monad m] [Monad n] : Std.Iterators.IteratorLoop ByteIterator m n := + .defaultImplementation + +instance [Monad m] [Monad n] : Std.Iterators.IteratorLoopPartial ByteIterator m n := + .defaultImplementation + +docs_to_verso bytes + +end ByteIterator + +structure RevByteIterator where + s : Slice + offset : String.Pos + hinv : offset ≤ s.utf8ByteSize + +set_option doc.verso false +/-- +Creates and iterator over all bytes in {name}`s`, starting from the last one and iterating towards +the first one. + +Examples: + * {lean}`"abc".toSlice.revBytes.toList = [99, 98, 97]` + * {lean}`"ab∀c".toSlice.revBytes.toList = [99, 128, 136, 226, 98, 97]` +-/ +def revBytes (s : Slice) : Std.Iter (α := RevByteIterator) UInt8 := + { internalState := { s, offset := s.endPos.offset, hinv := by simp }} + +set_option doc.verso true + +instance : Inhabited RevByteIterator where + default := + let s := default + { s := s, offset := s.endPos.offset, hinv := by simp} + +namespace RevByteIterator + +instance [Pure m] : Std.Iterators.Iterator RevByteIterator m UInt8 where + IsPlausibleStep it + | .yield it' out => + ∃ h1 : it.internalState.offset.dec < it.internalState.s.utf8ByteSize, + it.internalState.s = it'.internalState.s ∧ + it.internalState.offset ≠ 0 ∧ + it'.internalState.offset = it.internalState.offset.dec ∧ + it.internalState.s.getUtf8Byte it.internalState.offset.dec h1 = out + | .skip _ => False + | .done => it.internalState.offset = 0 + step := fun ⟨s, offset, hinv⟩ => + if h : offset ≠ 0 then + let nextOffset := offset.dec + have hbound := by + simp [String.Pos.le_iff, nextOffset] at h hinv ⊢ + omega + have hinv := by + simp [String.Pos.le_iff, nextOffset] at hinv ⊢ + omega + have hiter := by simp [nextOffset, hbound, h] + pure ⟨.yield ⟨s, nextOffset, hinv⟩ (s.getUtf8Byte nextOffset hbound), hiter⟩ + else + pure ⟨.done, by simpa using h⟩ + +private def finitenessRelation [Pure m] : + Std.Iterators.FinitenessRelation (RevByteIterator) m where + rel := InvImage WellFoundedRelation.rel + (fun it => it.internalState.offset.byteIdx) + wf := InvImage.wf _ WellFoundedRelation.wf + subrelation {it it'} h := by + simp_wf + obtain ⟨step, h, h'⟩ := h + cases step + · cases h + obtain ⟨h1, h2, h3, h4, h5⟩ := h' + rw [h4] + simp at h1 h3 ⊢ + omega + · cases h' + · cases h + +@[no_expose] +instance [Pure m] : Std.Iterators.Finite RevByteIterator m := + .of_finitenessRelation finitenessRelation + +instance [Monad m] [Monad n] : Std.Iterators.IteratorCollect RevByteIterator m n := + .defaultImplementation + +instance [Monad m] [Monad n] : Std.Iterators.IteratorCollectPartial RevByteIterator m n := + .defaultImplementation + +instance [Monad m] [Monad n] : Std.Iterators.IteratorLoop RevByteIterator m n := + .defaultImplementation + +instance [Monad m] [Monad n] : Std.Iterators.IteratorLoopPartial RevByteIterator m n := + .defaultImplementation + +docs_to_verso revBytes + +end RevByteIterator + +def lines.lineMap (s : Slice) : Slice := + if let some s := s.dropSuffix? '\n' then + if let some s := s.dropSuffix? '\r' then + s + else + s + else + s + +/-- +Creates an iterator over all lines in {name}`s` with the line ending characters `\r\n` or `\n` being +stripped. + +Examples: + * {lean}`"foo\r\nbar\n\nbaz\n".toSlice.lines.allowNontermination.toList == ["foo".toSlice, "bar".toSlice, "".toSlice, "baz".toSlice]` + * {lean}`"foo\r\nbar\n\nbaz".toSlice.lines.allowNontermination.toList == ["foo".toSlice, "bar".toSlice, "".toSlice, "baz".toSlice]` + * {lean}`"foo\r\nbar\n\nbaz\r".toSlice.lines.allowNontermination.toList == ["foo".toSlice, "bar".toSlice, "".toSlice, "baz\r".toSlice]` +-/ +def lines (s : Slice) := + s.splitInclusive '\n' |>.map lines.lineMap + +/-- +Folds a function over a slice from the start, accumulating a value starting with {name}`init`. The +accumulated value is combined with each character in order, using {name}`f`. + +Examples: + * {lean}`"coffee tea water".toSlice.foldl (fun n c => if c.isWhitespace then n + 1 else n) 0 = 2` + * {lean}`"coffee tea and water".toSlice.foldl (fun n c => if c.isWhitespace then n + 1 else n) 0 = 3` + * {lean}`"coffee tea water".toSlice.foldl (·.push ·) "" = "coffee tea water"` +-/ +@[inline] +def foldl {α : Type u} (f : α → Char → α) (init : α) (s : Slice) : α := + Std.Iterators.Iter.fold f init (chars s) + +/-- +Folds a function over a slice from the end, accumulating a value starting with {name}`init`. The +accumulated value is combined with each character in reverse order, using {name}`f`. + +Examples: + * {lean}`"coffee tea water".toSlice.foldr (fun c n => if c.isWhitespace then n + 1 else n) 0 = 2` + * {lean}`"coffee tea and water".toSlice.foldr (fun c n => if c.isWhitespace then n + 1 else n) 0 = 3` + * {lean}`"coffee tea water".toSlice.foldr (fun c s => s.push c) "" = "retaw aet eeffoc"` +-/ +@[inline] +def foldr {α : Type u} (f : Char → α → α) (init : α) (s : Slice) : α := + Std.Iterators.Iter.fold (flip f) init (revChars s) + +/-- +Checks whether the slice can be interpreted as the decimal representation of a natural number. + +A slice can be interpreted as a decimal natural number if it is not empty and all the characters in +it are digits. + +Use {name (scope := "Init.Data.String.Slice")}`toNat?` or +{name (scope := "Init.Data.String.Slice")}`toNat!` to convert such a slice to a natural number. + +Examples: + * {lean}`"".toSlice.isNat = false` + * {lean}`"0".toSlice.isNat = true` + * {lean}`"5".toSlice.isNat = true` + * {lean}`"05".toSlice.isNat = true` + * {lean}`"587".toSlice.isNat = true` + * {lean}`"-587".toSlice.isNat = false` + * {lean}`" 5".toSlice.isNat = false` + * {lean}`"2+3".toSlice.isNat = false` + * {lean}`"0xff".toSlice.isNat = false` +-/ +@[inline] +def isNat (s : Slice) : Bool := + !s.isEmpty && s.all Char.isDigit + +/-- +Interprets a slice as the decimal representation of a natural number, returning it. Returns +{name}`none` if the slice does not contain a decimal natural number. + +A slice can be interpreted as a decimal natural number if it is not empty and all the characters in +it are digits. + +Use {name}`isNat` to check whether {name}`toNat?` would return {name}`some`. +{name (scope := "Init.Data.String.Slice")}`toNat!` is an alternative that panics instead of +returning {name}`none` when the slice is not a natural number. + +Examples: + * {lean}`"".toSlice.toNat? = none` + * {lean}`"0".toSlice.toNat? = some 0` + * {lean}`"5".toSlice.toNat? = some 5` + * {lean}`"587".toSlice.toNat? = some 587` + * {lean}`"-587".toSlice.toNat? = none` + * {lean}`" 5".toSlice.toNat? = none` + * {lean}`"2+3".toSlice.toNat? = none` + * {lean}`"0xff".toSlice.toNat? = none` +-/ +def toNat? (s : Slice) : Option Nat := + if s.isNat then + some <| s.foldl (fun n c => n * 10 + (c.toNat - '0'.toNat)) 0 + else + none + +/-- +Interprets a slice as the decimal representation of a natural number, returning it. Panics if the +slice does not contain a decimal natural number. + +A slice can be interpreted as a decimal natural number if it is not empty and all the characters in +it are digits. + +Use {name}`isNat` to check whether {name}`toNat!` would return a value. {name}`toNat?` is a safer +alternative that returns {name}`none` instead of panicking when the string is not a natural number. + +Examples: + * {lean}`"0".toSlice.toNat! = 0` + * {lean}`"5".toSlice.toNat! = 5` + * {lean}`"587".toSlice.toNat! = 587` +-/ +def toNat! (s : Slice) : Nat := + if s.isNat then + s.foldl (fun n c => n * 10 + (c.toNat - '0'.toNat)) 0 + else + panic! "Nat expected" + +/-- +Returns the first character in {name}`s`. If {name}`s` is empty, {name}`none`. + +Examples: +* {lean}`"abc".toSlice.front? = some 'a'` +* {lean}`"".toSlice.front? = none` +-/ +@[inline] +def front? (s : Slice) : Option Char := + s.startPos.get? + +/-- +Returns the first character in {name}`s`. If {name}`s` is empty, returns {lean}`(default : Char)`. + +Examples: +* {lean}`"abc".toSlice.front = 'a'` +* {lean}`"".toSlice.front = (default : Char)` +-/ +@[inline] +def front (s : Slice) : Char := + s.front?.getD default + +/-- +Returns the last character in {name}`s`. If {name}`s` is empty, returns {name}`none`. + +Examples: +* {lean}`"abc".toSlice.back? = some 'c'` +* {lean}`"".toSlice.back? = none` +-/ +@[inline] +def back? (s : Slice) : Option Char := + s.endPos.prev? |>.bind (·.get?) + +/-- +Returns the last character in {name}`s`. If {name}`s` is empty, returns {lean}`(default : Char)`. + +Examples: +* {lean}`"abc".toSlice.back = 'c'` +* {lean}`"".toSlice.back = (default : Char)` +-/ +@[inline] +def back (s : Slice) : Char := + s.back?.getD default + +end String.Slice diff --git a/src/Init/Data/UInt/Basic.lean b/src/Init/Data/UInt/Basic.lean index 486055c39a..4a0f4e3b0d 100644 --- a/src/Init/Data/UInt/Basic.lean +++ b/src/Init/Data/UInt/Basic.lean @@ -183,6 +183,14 @@ def Bool.toUInt8 (b : Bool) : UInt8 := if b then 1 else 0 instance : Max UInt8 := maxOfLe instance : Min UInt8 := minOfLe +/-- +If `b` is the ASCII value of an uppercase character return the corresponding +lowercase value, otherwise leave it untouched. +-/ +@[inline] +def UInt8.toAsciiLower (b : UInt8) : UInt8 := + if b >= 65 && b <= 90 then (b + 32) else b + /-- Converts a `Fin UInt16.size` into the corresponding `UInt16`. -/ @[inline] def UInt16.ofFin (a : Fin UInt16.size) : UInt16 := ⟨⟨a⟩⟩ diff --git a/src/include/lean/lean.h b/src/include/lean/lean.h index 3442a19eb4..0e028df7a7 100644 --- a/src/include/lean/lean.h +++ b/src/include/lean/lean.h @@ -1155,6 +1155,9 @@ static inline uint8_t lean_string_dec_eq(b_lean_obj_arg s1, b_lean_obj_arg s2) { static inline uint8_t lean_string_dec_lt(b_lean_obj_arg s1, b_lean_obj_arg s2) { return lean_string_lt(s1, s2); } LEAN_EXPORT uint64_t lean_string_hash(b_lean_obj_arg); LEAN_EXPORT lean_obj_res lean_string_of_usize(size_t); +LEAN_EXPORT uint8_t lean_slice_memcmp(b_lean_obj_arg s1, b_lean_obj_arg s2, b_lean_obj_arg lstart, b_lean_obj_arg rstart, b_lean_obj_arg len); +LEAN_EXPORT uint64_t lean_slice_hash(b_lean_obj_arg); +LEAN_EXPORT uint8_t lean_slice_dec_lt(b_lean_obj_arg s1, b_lean_obj_arg s2); /* Thunks */ diff --git a/src/runtime/object.cpp b/src/runtime/object.cpp index 8e068ece86..100b1f5b23 100644 --- a/src/runtime/object.cpp +++ b/src/runtime/object.cpp @@ -2331,6 +2331,45 @@ extern "C" LEAN_EXPORT obj_res lean_string_of_usize(size_t n) { return mk_ascii_string_unchecked(std::to_string(n)); } +size_t lean_slice_size(b_obj_arg slice) { + b_obj_res start = lean_ctor_get(slice, 1); + lean_assert(lean_is_scalar(start)); + b_obj_res end = lean_ctor_get(slice, 2); + lean_assert(lean_is_scalar(end)); + return lean_unbox(end) - lean_unbox(start); +} + +char const * lean_slice_base(b_obj_arg slice) { + b_obj_res string = lean_ctor_get(slice, 0); + b_obj_res offset = lean_ctor_get(slice, 1); + lean_assert(lean_is_scalar(offset)); + return lean_string_cstr(string) + lean_unbox(offset); +} + +extern "C" LEAN_EXPORT uint8_t lean_slice_memcmp(b_obj_arg s1, b_obj_arg s2, b_obj_arg lstart, b_obj_arg rstart, b_obj_arg len) { + // Thanks to the proof arguments we know that lstart, rstart and len are all scalars. + lean_assert(lean_is_scalar(lstart)); + lean_assert(lean_is_scalar(rstart)); + lean_assert(lean_is_scalar(len)); + + char const * lbase = lean_slice_base(s1) + lean_unbox(lstart); + char const * rbase = lean_slice_base(s2) + lean_unbox(rstart); + return std::memcmp(lbase, rbase, lean_unbox(len)) == 0; +} + +extern "C" LEAN_EXPORT uint64_t lean_slice_hash(b_obj_arg s) { + size_t sz = lean_slice_size(s); + char const * str = lean_slice_base(s); + return hash_str(sz, (unsigned char const *) str, 11); +} + +extern "C" LEAN_EXPORT uint8_t lean_slice_dec_lt(object * s1, object * s2) { + size_t sz1 = lean_slice_size(s1); + size_t sz2 = lean_slice_size(s2); + int r = std::memcmp(lean_slice_base(s1), lean_slice_base(s2), std::min(sz1, sz2)); + return r < 0 || (r == 0 && sz1 < sz2); +} + // ======================================= // ByteArray & FloatArray diff --git a/tests/lean/run/string_slice.lean b/tests/lean/run/string_slice.lean new file mode 100644 index 0000000000..cbca34218b --- /dev/null +++ b/tests/lean/run/string_slice.lean @@ -0,0 +1,206 @@ +module +import Std.Data.HashSet.Basic + +/-! +Tests for `String.Slice` functions +-/ + +#guard "".toSlice.isEmpty = true +#guard " ".toSlice.isEmpty = false +#guard (" ".toSlice.drop 1).isEmpty = true + +#guard "abc".toSlice == "abc".toSlice +#guard "abcdefg".toSlice.dropEnd 4 == "abc".toSlice +#guard "abcdefg".toSlice.dropEnd 3 != "abc".toSlice + +#guard "red green blue".toSlice.startsWith "red" = true +#guard "red green blue".toSlice.startsWith "green" = false +#guard "red green blue".toSlice.startsWith "" = true +#guard "red green blue".toSlice.startsWith 'r' = true +#guard "red green blue".toSlice.startsWith Char.isLower = true + +#guard ("coffee tea water".toSlice.split Char.isWhitespace).allowNontermination.toList == ["coffee".toSlice, "tea".toSlice, "water".toSlice] +#guard ("coffee tea water".toSlice.split ' ').allowNontermination.toList == ["coffee".toSlice, "tea".toSlice, "water".toSlice] +#guard ("coffee tea water".toSlice.split " tea ").allowNontermination.toList == ["coffee".toSlice, "water".toSlice] +#guard ("baaab".toSlice.split "aa").allowNontermination.toList == ["b".toSlice, "ab".toSlice] + +#guard ("coffee tea water".toSlice.splitInclusive Char.isWhitespace).allowNontermination.toList == ["coffee ".toSlice, "tea ".toSlice, "water".toSlice] +#guard ("coffee tea water".toSlice.splitInclusive ' ').allowNontermination.toList == ["coffee ".toSlice, "tea ".toSlice, "water".toSlice] +#guard ("coffee tea water".toSlice.splitInclusive " tea ").allowNontermination.toList == ["coffee tea ".toSlice, "water".toSlice] +#guard ("a".toSlice.splitInclusive (fun (_ : Char) => true)).allowNontermination.toList == ["a".toSlice] +#guard ("baaab".toSlice.splitInclusive "aa").allowNontermination.toList == ["baa".toSlice, "ab".toSlice] + +#guard "red green blue".toSlice.drop 4 == "green blue".toSlice +#guard "red green blue".toSlice.drop 10 == "blue".toSlice +#guard "red green blue".toSlice.drop 50 == "".toSlice + +#guard "red green blue".toSlice.dropWhile Char.isLower == " green blue".toSlice +#guard "red green blue".toSlice.dropWhile 'r' == "ed green blue".toSlice +#guard "red red green blue".toSlice.dropWhile "red " == "green blue".toSlice +#guard "red green blue".toSlice.dropWhile (fun (_ : Char) => true) == "".toSlice + +#guard "abc".toSlice.trimAsciiStart == "abc".toSlice +#guard " abc".toSlice.trimAsciiStart == "abc".toSlice +#guard "abc \t ".toSlice.trimAsciiStart == "abc \t ".toSlice +#guard " abc ".toSlice.trimAsciiStart == "abc ".toSlice +#guard "abc\ndef\n".toSlice.trimAsciiStart == "abc\ndef\n".toSlice + +#guard "red green blue".toSlice.take 3 == "red".toSlice +#guard "red green blue".toSlice.take 1 == "r".toSlice +#guard "red green blue".toSlice.take 0 == "".toSlice +#guard "red green blue".toSlice.take 100 == "red green blue".toSlice + +#guard "red green blue".toSlice.takeWhile Char.isLower == "red".toSlice +#guard "red green blue".toSlice.takeWhile 'r' == "r".toSlice +#guard "red red green blue".toSlice.takeWhile "red " == "red red ".toSlice +#guard "red green blue".toSlice.takeWhile (fun (_ : Char) => true) == "red green blue".toSlice + +#guard "red green blue".toSlice.dropPrefix? "red " == some "green blue".toSlice +#guard "red green blue".toSlice.dropPrefix? "reed " == none +#guard "red green blue".toSlice.dropPrefix? 'r' == some "ed green blue".toSlice +#guard "red green blue".toSlice.dropPrefix? Char.isLower == some "ed green blue".toSlice + +#guard "red green blue".toSlice.dropPrefix "red " == "green blue".toSlice +#guard "red green blue".toSlice.dropPrefix "reed " == "red green blue".toSlice +#guard "red green blue".toSlice.dropPrefix 'r' == "ed green blue".toSlice +#guard "red green blue".toSlice.dropPrefix Char.isLower == "ed green blue".toSlice + +#guard ("coffee tea water".toSlice.find? Char.isWhitespace).map (·.get!) == some ' ' +#guard "tea".toSlice.find? (fun (c : Char) => c == 'X') == none +#guard ("coffee tea water".toSlice.find? "tea").map (·.get!) == some 't' + +#guard "coffee tea water".toSlice.contains Char.isWhitespace = true +#guard "tea".toSlice.contains (fun (c : Char) => c == 'X') = false +#guard "coffee tea water".toSlice.contains "tea" = true + +#guard "brown".toSlice.all Char.isLower = true +#guard "brown and orange".toSlice.all Char.isLower = false +#guard "aaaaaa".toSlice.all 'a' = true +#guard "aaaaaa".toSlice.all "aa" = true + +#guard "red green blue".toSlice.endsWith "blue" = true +#guard "red green blue".toSlice.endsWith "green" = false +#guard "red green blue".toSlice.endsWith "" = true +#guard "red green blue".toSlice.endsWith 'e' = true +#guard "red green blue".toSlice.endsWith Char.isLower = true + +#guard ("coffee tea water".toSlice.revSplit Char.isWhitespace).allowNontermination.toList == ["water".toSlice, "tea".toSlice, "coffee".toSlice] +#guard ("coffee tea water".toSlice.revSplit ' ').allowNontermination.toList == ["water".toSlice, "tea".toSlice, "coffee".toSlice] + +#guard "red green blue".toSlice.dropEnd 5 == "red green".toSlice +#guard "red green blue".toSlice.dropEnd 11 == "red".toSlice +#guard "red green blue".toSlice.dropEnd 50 == "".toSlice + +#guard "red green blue".toSlice.dropEndWhile Char.isLower == "red green ".toSlice +#guard "red green blue".toSlice.dropEndWhile 'e' == "red green blu".toSlice +#guard "red green blue".toSlice.dropEndWhile (fun (_ : Char) => true) == "".toSlice + +#guard "abc".toSlice.trimAsciiEnd == "abc".toSlice +#guard " abc".toSlice.trimAsciiEnd == " abc".toSlice +#guard "abc \t ".toSlice.trimAsciiEnd == "abc".toSlice +#guard " abc ".toSlice.trimAsciiEnd == " abc".toSlice +#guard "abc\ndef\n".toSlice.trimAsciiEnd == "abc\ndef".toSlice + +#guard "red green blue".toSlice.takeEnd 4 == "blue".toSlice +#guard "red green blue".toSlice.takeEnd 1 == "e".toSlice +#guard "red green blue".toSlice.takeEnd 0 == "".toSlice +#guard "red green blue".toSlice.takeEnd 100 == "red green blue".toSlice + +#guard "red green blue".toSlice.takeEndWhile Char.isLower == "blue".toSlice +#guard "red green blue".toSlice.takeEndWhile 'e' == "e".toSlice +#guard "red green blue".toSlice.takeEndWhile (fun (_ : Char) => true) == "red green blue".toSlice + +#guard "red green blue".toSlice.dropSuffix? " blue" == some "red green".toSlice +#guard "red green blue".toSlice.dropSuffix? "bluu " == none +#guard "red green blue".toSlice.dropSuffix? 'e' == some "red green blu".toSlice +#guard "red green blue".toSlice.dropSuffix? Char.isLower == some "red green blu".toSlice + +#guard "red green blue".toSlice.dropSuffix " blue" == "red green".toSlice +#guard "red green blue".toSlice.dropSuffix "bluu " == "red green blue".toSlice +#guard "red green blue".toSlice.dropSuffix 'e' == "red green blu".toSlice +#guard "red green blue".toSlice.dropSuffix Char.isLower == "red green blu".toSlice + +#guard "abc".toSlice.trimAscii == "abc".toSlice +#guard " abc".toSlice.trimAscii == "abc".toSlice +#guard "abc \t ".toSlice.trimAscii == "abc".toSlice +#guard " abc ".toSlice.trimAscii == "abc".toSlice +#guard "abc\ndef\n".toSlice.trimAscii == "abc\ndef".toSlice + + +#guard ({} : Std.HashSet _).insert "abc".toSlice |>.contains "abc".toSlice +#guard (({} : Std.HashSet _).insert "abc".toSlice |>.insert "abc".toSlice |>.size) == 1 + +#guard "abc".toSlice ≤ "abc".toSlice +#guard "abc".toSlice < "abcd".toSlice +#guard "abc".toSlice < "zbc".toSlice +#guard "".toSlice < "zbc".toSlice +#guard "".toSlice ≤ "".toSlice + +#guard "abc".toSlice.chars.toList = ['a', 'b', 'c'] +#guard "ab∀c".toSlice.chars.toList = ['a', 'b', '∀', 'c'] + +#guard "abc".toSlice.revChars.toList = ['c', 'b', 'a'] +#guard "ab∀c".toSlice.revChars.toList = ['c', '∀', 'b', 'a'] + +#guard ("abc".toSlice.positions.map (fun ⟨p, h⟩ => p.get h) |>.toList) = ['a', 'b', 'c'] +#guard ("abc".toSlice.positions.map (·.val.offset.byteIdx) |>.toList) = [0, 1, 2] +#guard ("ab∀c".toSlice.positions.map (fun ⟨p, h⟩ => p.get h) |>.toList) = ['a', 'b', '∀', 'c'] +#guard ("ab∀c".toSlice.positions.map (·.val.offset.byteIdx) |>.toList) = [0, 1, 2, 5] + +#guard ("abc".toSlice.revPositions.map (fun ⟨p, h⟩ => p.get h) |>.toList) = ['c', 'b', 'a'] +#guard ("abc".toSlice.revPositions.map (·.val.offset.byteIdx) |>.toList) = [2, 1, 0] +#guard ("ab∀c".toSlice.revPositions.map (fun ⟨p, h⟩ => p.get h) |>.toList) = ['c', '∀', 'b', 'a'] +#guard ("ab∀c".toSlice.revPositions.map (·.val.offset.byteIdx) |>.toList) = [5, 2, 1, 0] + +#guard "abc".toSlice.bytes.toList = [97, 98, 99] +#guard "ab∀c".toSlice.bytes.toList = [97, 98, 226, 136, 128, 99] + +#guard "abc".toSlice.revBytes.toList = [99, 98, 97] +#guard "ab∀c".toSlice.revBytes.toList = [99, 128, 136, 226, 98, 97] + +#guard "foo\r\nbar\n\nbaz\n".toSlice.lines.allowNontermination.toList == ["foo".toSlice, "bar".toSlice, "".toSlice, "baz".toSlice] +#guard "foo\r\nbar\n\nbaz".toSlice.lines.allowNontermination.toList == ["foo".toSlice, "bar".toSlice, "".toSlice, "baz".toSlice] +#guard "foo\r\nbar\n\nbaz\r".toSlice.lines.allowNontermination.toList == ["foo".toSlice, "bar".toSlice, "".toSlice, "baz\r".toSlice] + +#guard "coffee tea water".toSlice.foldl (fun n c => if c.isWhitespace then n + 1 else n) 0 = 2 +#guard "coffee tea and water".toSlice.foldl (fun n c => if c.isWhitespace then n + 1 else n) 0 = 3 +#guard "coffee tea water".toSlice.foldl (·.push ·) "" = "coffee tea water" + +#guard "coffee tea water".toSlice.foldr (fun c n => if c.isWhitespace then n + 1 else n) 0 = 2 +#guard "coffee tea and water".toSlice.foldr (fun c n => if c.isWhitespace then n + 1 else n) 0 = 3 +#guard "coffee tea water".toSlice.foldr (fun c s => s.push c) "" = "retaw aet eeffoc" + +#guard "".toSlice.isNat = false +#guard "0".toSlice.isNat = true +#guard "5".toSlice.isNat = true +#guard "05".toSlice.isNat = true +#guard "587".toSlice.isNat = true +#guard "-587".toSlice.isNat = false +#guard " 5".toSlice.isNat = false +#guard "2+3".toSlice.isNat = false +#guard "0xff".toSlice.isNat = false + +#guard "".toSlice.toNat? = none +#guard "0".toSlice.toNat? = some 0 +#guard "5".toSlice.toNat? = some 5 +#guard "587".toSlice.toNat? = some 587 +#guard "-587".toSlice.toNat? = none +#guard " 5".toSlice.toNat? = none +#guard "2+3".toSlice.toNat? = none +#guard "0xff".toSlice.toNat? = none + +#guard "0".toSlice.toNat! = 0 +#guard "5".toSlice.toNat! = 5 +#guard "587".toSlice.toNat! = 587 + +#guard "abc".toSlice.front? = some 'a' +#guard "".toSlice.front? = none + +#guard "abc".toSlice.front = 'a' +#guard "".toSlice.front = (default : Char) + +#guard "abc".toSlice.back? = some 'c' +#guard "".toSlice.back? = none + +#guard "abc".toSlice.back = 'c' +#guard "".toSlice.back = (default : Char)