lean4-htt/src/Init/Data/String/Pattern/Basic.lean
Markus Himmel 9c5d2bf62e
refactor: rename ForwardPattern.dropPrefix? to ForwardPattern.skipPrefix? (#12984)
This PR renames the function `ForwardPattern.dropPrefix?` to
`ForwardPattern.skipPrefix`?

This function `(s : String.Slice) -> Option s.Pos` is not to be confused
with `String.Slice.dropPrefix? : (s : String.Slice) -> Option
String.Slice`.
2026-03-19 13:05:55 +00:00

464 lines
19 KiB
Text
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

/-
Copyright (c) 2025 Lean FRO, LLC. All rights reserved.
Released under Apache 2.0 license as described in the file LICENSE.
Authors: Henrik Böving, Markus Himmel
-/
module
prelude
public import Init.Data.Iterators.Consumers.Monadic.Loop
public import Init.Data.String.Defs
public import Init.Data.String.Basic
public import Init.Data.String.FindPos
import Init.Data.String.Lemmas.FindPos
import Init.Data.Iterators.Consumers.Loop
import Init.Omega
import Init.Data.String.Lemmas.IsEmpty
import Init.Data.String.Termination
import Init.Data.String.OrderInstances
import Init.Data.String.Lemmas.Order
set_option doc.verso true
/-!
This module defines the notion of patterns which is central to the {name}`String.Slice` and
{name}`String` API. All functions on {name}`String.Slice` and {name}`String` that
“search for something” are polymorphic over a pattern instead of taking just one particular kind
of pattern such as a {name}`Char`. The key components are:
- {name (scope := "Init.Data.String.Pattern.Basic")}`ToForwardSearcher`
- {name (scope := "Init.Data.String.Pattern.Basic")}`ForwardPattern`
- {name (scope := "Init.Data.String.Pattern.Basic")}`ToBackwardSearcher`
- {name (scope := "Init.Data.String.Pattern.Basic")}`BackwardPattern`
-/
public section
namespace String.Slice.Pattern
/--
A step taken during the traversal of a {name}`Slice` by a forward or backward searcher.
-/
inductive SearchStep (s : Slice) where
/--
The subslice starting at {name}`startPos` and ending at {name}`endPos` did not match the pattern.
-/
| rejected (startPos endPos : s.Pos)
/--
The subslice starting at {name}`startPos` and ending at {name}`endPos` matches the pattern.
-/
| matched (startPos endPos : s.Pos)
deriving Inhabited, BEq
namespace SearchStep
/-- The start position of a {name}`SearchStep`. -/
@[inline]
def startPos {s : Slice} (st : SearchStep s) : s.Pos :=
match st with
| .rejected startPos _ => startPos
| .matched startPos _ => startPos
@[simp]
theorem startPos_rejected {s : Slice} {p q : s.Pos} : (SearchStep.rejected p q).startPos = p := (rfl)
@[simp]
theorem startPos_matched {s : Slice} {p q : s.Pos} : (SearchStep.matched p q).startPos = p := (rfl)
/-- The end position of a {name}`SearchStep`. -/
@[inline]
def endPos {s : Slice} (st : SearchStep s) : s.Pos :=
match st with
| .rejected _ endPos => endPos
| .matched _ endPos => endPos
@[simp]
theorem endPos_rejected {s : Slice} {p q : s.Pos} : (SearchStep.rejected p q).endPos = q := (rfl)
@[simp]
theorem endPos_matched {s : Slice} {p q : s.Pos} : (SearchStep.matched p q).endPos = q := (rfl)
/--
Converts a {lean}`SearchStep (s.sliceFrom p)` into a {lean}`SearchStep s` by applying
{name}`Slice.Pos.ofSliceFrom` to the start and end position.
-/
def ofSliceFrom {s : Slice} {p : s.Pos} (st : SearchStep (s.sliceFrom p)) : SearchStep s :=
match st with
| .rejected l r => .rejected (Slice.Pos.ofSliceFrom l) (Slice.Pos.ofSliceFrom r)
| .matched l r => .matched (Slice.Pos.ofSliceFrom l) (Slice.Pos.ofSliceFrom r)
@[simp]
theorem startPos_ofSliceFrom {s : Slice} {p : s.Pos} {st : SearchStep (s.sliceFrom p)} :
st.ofSliceFrom.startPos = Slice.Pos.ofSliceFrom st.startPos := by
cases st <;> simp [ofSliceFrom]
@[simp]
theorem endPos_ofSliceFrom {s : Slice} {p : s.Pos} {st : SearchStep (s.sliceFrom p)} :
st.ofSliceFrom.endPos = Slice.Pos.ofSliceFrom st.endPos := by
cases st <;> simp [ofSliceFrom]
end SearchStep
/--
Provides simple pattern matching capabilities from the start of a {name}`Slice`.
-/
class ForwardPattern {ρ : Type} (pat : ρ) where
/--
Checks whether the slice starts with the pattern. If it does, the slice is returned with the
prefix removed; otherwise the result is {name}`none`.
-/
skipPrefix? : (s : Slice) → Option s.Pos
/--
Checks whether the slice starts with the pattern. If it does, the slice is returned with the
prefix removed; otherwise the result is {name}`none`.
-/
skipPrefixOfNonempty? : (s : Slice) → (h : s.isEmpty = false) → Option s.Pos := fun s _ => skipPrefix? s
/--
Checks whether the slice starts with the pattern.
-/
startsWith : (s : Slice) → Bool := fun s => (skipPrefix? s).isSome
@[deprecated ForwardPattern.dropPrefix? (since := "2026-03-19")]
def ForwardPattern.dropPrefix? {ρ : Type} (pat : ρ) [ForwardPattern pat] (s : Slice) : Option s.Pos :=
ForwardPattern.skipPrefix? pat s
/--
A lawful forward pattern is one where the three functions {name}`ForwardPattern.skipPrefix?`,
{name}`ForwardPattern.skipPrefixOfNonempty?` and {name}`ForwardPattern.startsWith` agree for any
given input slice.
Note that this is a relatively weak condition. It is non-uniform in the sense that the functions
can still return completely different results on different slices, even if they represent the same
string.
There is a stronger lawfulness typeclass {lit}`LawfulForwardPatternModel` that asserts that the
{name}`ForwardPattern.skipPrefix?` function behaves like a function that drops the longest prefix
according to some notion of matching.
-/
class LawfulForwardPattern {ρ : Type} (pat : ρ) [ForwardPattern pat] : Prop where
skipPrefixOfNonempty?_eq {s : Slice} (h) :
ForwardPattern.skipPrefixOfNonempty? pat s h = ForwardPattern.skipPrefix? pat s
startsWith_eq (s : Slice) :
ForwardPattern.startsWith pat s = (ForwardPattern.skipPrefix? pat s).isSome
/--
A strict forward pattern is one which never drops an empty prefix.
This condition ensures that the default searcher derived from the forward pattern is a finite
iterator.
-/
class StrictForwardPattern {ρ : Type} (pat : ρ) [ForwardPattern pat] : Prop where
ne_startPos {s : Slice} (h) (q) :
ForwardPattern.skipPrefixOfNonempty? pat s h = some q → q ≠ s.startPos
/--
Provides a conversion from a pattern to an iterator of {name}`SearchStep` that searches for matches
of the pattern from the start towards the end of a {name}`Slice`.
While these operations can be implemented on top of {name}`ForwardPattern`, some patterns allow
for more efficient implementations. For example, a searcher for {name}`String` patterns derived
from the {name}`ForwardPattern` instance on strings would try to match the pattern at every
position in the string, but more efficient string matching routines are known. Indeed, the Lean
standard library uses the Knuth-Morris-Pratt algorithm. See the module
{module -checked}`Init.Data.String.Pattern.String` for the implementation.
This class can be used to provide such an efficient implementation. If there is no
need to specialize in this fashion, then
{name (scope := "Init.Data.String.Pattern.Basic")}`ToForwardSearcher.defaultImplementation` can be
used to automatically derive an instance.
-/
class ToForwardSearcher {ρ : Type} (pat : ρ) (σ : outParam (Slice → Type)) where
/--
Builds an iterator of {name}`SearchStep` corresponding to matches of {name}`pat` along the slice
{name}`s`. The {name}`SearchStep`s returned by this iterator must contain ranges that are
adjacent, non-overlapping and cover all of {name}`s`.
-/
toSearcher : (s : Slice) → Std.Iter (α := σ s) (SearchStep s)
namespace ToForwardSearcher
structure DefaultForwardSearcher {ρ : Type} (pat : ρ) (s : Slice) where
currPos : s.Pos
deriving Inhabited
namespace DefaultForwardSearcher
variable {ρ : Type} (pat : ρ)
@[inline]
def iter (s : Slice) : Std.Iter (α := DefaultForwardSearcher pat s) (SearchStep s) :=
⟨⟨s.startPos⟩⟩
instance (s : Slice) [ForwardPattern pat] :
Std.Iterator (DefaultForwardSearcher pat s) Id (SearchStep s) where
IsPlausibleStep it
| .yield it' (.rejected p₁ p₂) => ∃ (h : it.internalState.currPos ≠ s.endPos),
ForwardPattern.skipPrefixOfNonempty? pat (s.sliceFrom it.internalState.currPos) (by simpa) = none ∧
p₁ = it.internalState.currPos ∧ p₂ = it.internalState.currPos.next h ∧
it'.internalState.currPos = it.internalState.currPos.next h
| .yield it' (.matched p₁ p₂) => ∃ (h : it.internalState.currPos ≠ s.endPos), ∃ pos,
ForwardPattern.skipPrefixOfNonempty? pat (s.sliceFrom it.internalState.currPos) (by simpa) = some pos ∧
p₁ = it.internalState.currPos ∧ p₂ = Slice.Pos.ofSliceFrom pos ∧
it'.internalState.currPos = Slice.Pos.ofSliceFrom pos
| .done => it.internalState.currPos = s.endPos
| .skip _ => False
step it :=
if h : it.internalState.currPos = s.endPos then
pure (.deflate ⟨.done, by simp [h]⟩)
else
match h' : ForwardPattern.skipPrefixOfNonempty? pat (s.sliceFrom it.internalState.currPos) (by simpa) with
| some pos =>
pure (.deflate ⟨.yield ⟨⟨Slice.Pos.ofSliceFrom pos⟩⟩
(.matched it.internalState.currPos (Slice.Pos.ofSliceFrom pos)), by simp [h, h']⟩)
| none =>
pure (.deflate ⟨.yield ⟨⟨it.internalState.currPos.next h⟩⟩
(.rejected it.internalState.currPos (it.internalState.currPos.next h)), by simp [h, h']⟩)
private def finitenessRelation (s : Slice) [ForwardPattern pat] [StrictForwardPattern pat] :
Std.Iterators.FinitenessRelation (DefaultForwardSearcher pat s) Id where
Rel := InvImage WellFoundedRelation.rel (fun it => it.internalState.currPos)
wf := InvImage.wf _ WellFoundedRelation.wf
subrelation {it it'} h := by
simp_wf
obtain ⟨step, h, h'⟩ := h
match step with
| .yield it'' (.rejected p₁ p₂) =>
obtain ⟨_, ⟨-, -, -, h'⟩⟩ := h'
cases h
simp [h']
| .yield it'' (.matched p₁ p₂) =>
obtain ⟨_, pos, ⟨h₀, -, -, h'⟩⟩ := h'
cases h
have := StrictForwardPattern.ne_startPos _ _ h₀
rw [h']
exact Std.lt_of_le_of_lt Slice.Pos.le_ofSliceFrom
(Slice.Pos.ofSliceFrom_lt_ofSliceFrom_iff.2 ((Slice.Pos.startPos_lt_iff _).2 this))
instance {s : Slice} [ForwardPattern pat] [StrictForwardPattern pat] :
Std.Iterators.Finite (DefaultForwardSearcher pat s) Id :=
.of_finitenessRelation (finitenessRelation pat s)
instance [ForwardPattern pat] : Std.IteratorLoop (DefaultForwardSearcher pat s) Id Id :=
.defaultImplementation
end DefaultForwardSearcher
/--
The default implementation of {name}`ToForwardSearcher` repeatedly tries to match the pattern using
the given {name}`ForwardPattern` instance.
It is sometimes possible to give a more efficient implementation; see {name}`ToForwardSearcher`
for more details.
-/
@[inline, implicit_reducible]
def defaultImplementation [ForwardPattern pat] :
ToForwardSearcher pat (DefaultForwardSearcher pat) where
toSearcher := DefaultForwardSearcher.iter pat
end ToForwardSearcher
namespace Internal
@[extern "lean_string_memcmp"]
def memcmpStr (lhs rhs : @& String) (lstart : @& String.Pos.Raw) (rstart : @& String.Pos.Raw)
(len : @& String.Pos.Raw) (h1 : len.offsetBy lstart ≤ lhs.rawEndPos)
(h2 : len.offsetBy rstart ≤ rhs.rawEndPos) : Bool :=
go 0
where
go (curr : String.Pos.Raw) : Bool :=
if h : curr < len then
have hl := by
simp [Pos.Raw.le_iff, Pos.Raw.lt_iff] at h h1 ⊢
omega
have hr := by
simp [Pos.Raw.le_iff, Pos.Raw.lt_iff] at h h2 ⊢
omega
if lhs.getUTF8Byte (curr.offsetBy lstart) hl == rhs.getUTF8Byte (curr.offsetBy rstart) hr then
go curr.inc
else
false
else
true
termination_by len.byteIdx - curr.byteIdx
decreasing_by
simp [Pos.Raw.lt_iff] at h ⊢
omega
@[inline]
def memcmpSlice (lhs rhs : Slice) (lstart : String.Pos.Raw) (rstart : String.Pos.Raw)
(len : String.Pos.Raw) (h1 : len.offsetBy lstart ≤ lhs.rawEndPos)
(h2 : len.offsetBy rstart ≤ rhs.rawEndPos) : Bool :=
memcmpStr
lhs.str
rhs.str
(lstart.offsetBy lhs.startInclusive.offset)
(rstart.offsetBy rhs.startInclusive.offset)
len
(by
have := lhs.startInclusive_le_endExclusive
have := lhs.endExclusive.isValid.le_utf8ByteSize
simp [String.Pos.le_iff, Pos.Raw.le_iff, Slice.utf8ByteSize_eq,
-String.Pos.byteIdx_offset_le_utf8ByteSize] at *
omega)
(by
have := rhs.startInclusive_le_endExclusive
have := rhs.endExclusive.isValid.le_utf8ByteSize
simp [String.Pos.le_iff, Pos.Raw.le_iff, Slice.utf8ByteSize_eq,
-String.Pos.byteIdx_offset_le_utf8ByteSize] at *
omega)
end Internal
/--
Provides simple pattern matching capabilities from the end of a {name}`Slice`.
-/
class BackwardPattern {ρ : Type} (pat : ρ) where
/--
Checks whether the slice ends with the pattern. If it does, the slice is returned with the
suffix removed; otherwise the result is {name}`none`.
-/
dropSuffix? : (s : Slice) → Option s.Pos
/--
Checks whether the slice ends with the pattern. If it does, the slice is returned with the
suffix removed; otherwise the result is {name}`none`.
-/
dropSuffixOfNonempty? : (s : Slice) → (h : s.isEmpty = false) → Option s.Pos := fun s _ => dropSuffix? s
/--
Checks whether the slice ends with the pattern.
-/
endsWith : Slice → Bool := fun s => (dropSuffix? s).isSome
/--
A lawful backward pattern is one where the three functions {name}`BackwardPattern.dropSuffix?`,
{name}`BackwardPattern.dropSuffixOfNonempty?` and {name}`BackwardPattern.endsWith` agree for any
given input slice.
Note that this is a relatively weak condition. It is non-uniform in the sense that the functions
can still return completely different results on different slices, even if they represent the same
string.
There is a stronger lawfulness typeclass {lit}`LawfulBackwardPatternModel` that asserts that the
{name}`BackwardPattern.dropSuffix?` function behaves like a function that drops the longest prefix
according to some notion of matching.
-/
class LawfulBackwardPattern {ρ : Type} (pat : ρ) [BackwardPattern pat] : Prop where
dropSuffixOfNonempty?_eq {s : Slice} (h) :
BackwardPattern.dropSuffixOfNonempty? pat s h = BackwardPattern.dropSuffix? pat s
endsWith_eq (s : Slice) :
BackwardPattern.endsWith pat s = (BackwardPattern.dropSuffix? pat s).isSome
/--
A strict backward pattern is one which never drops an empty suffix.
This condition ensures that the default searcher derived from the backward pattern is a finite
iterator.
-/
class StrictBackwardPattern {ρ : Type} (pat : ρ) [BackwardPattern pat] : Prop where
ne_endPos {s : Slice} (h) (q) :
BackwardPattern.dropSuffixOfNonempty? pat s h = some q → q ≠ s.endPos
/--
Provides a conversion from a pattern to an iterator of {name}`SearchStep` searching for matches of
the pattern from the end towards the start of a {name}`Slice`.
While these operations can be implemented on top of {name}`BackwardPattern`, some patterns allow
for more efficient implementations. For example, a searcher for {name}`String` patterns derived
from the {name}`BackwardPattern` instance on strings would try to match the pattern at every
position in the string, but more efficient string matching routines are known. Indeed, the Lean
standard library uses the Knuth-Morris-Pratt algorithm. See the module
{module -checked}`Init.Data.String.Pattern.String` for the implementation.
This class can be used to provide such an efficient implementation. If there is no
need to specialize in this fashion, then
{name (scope := "Init.Data.String.Pattern.Basic")}`ToBackwardSearcher.defaultImplementation` can be
used to automatically derive an instance.
-/
class ToBackwardSearcher {ρ : Type} (pat : ρ) (σ : outParam (Slice → Type)) where
/--
Build an iterator of {name}`SearchStep` corresponding to matches of {lean}`pat` along the slice
{name}`s`. The {name}`SearchStep`s returned by this iterator must contain ranges that are
adjacent, non-overlapping and cover all of {name}`s`.
-/
toSearcher : (s : Slice) → Std.Iter (α := σ s) (SearchStep s)
namespace ToBackwardSearcher
structure DefaultBackwardSearcher {ρ : Type} (pat : ρ) (s : Slice) where
currPos : s.Pos
deriving Inhabited
namespace DefaultBackwardSearcher
variable {ρ : Type} (pat : ρ)
@[inline]
def iter (s : Slice) : Std.Iter (α := DefaultBackwardSearcher pat s) (SearchStep s) :=
⟨⟨s.endPos⟩⟩
instance (s : Slice) [BackwardPattern pat] :
Std.Iterator (DefaultBackwardSearcher pat s) Id (SearchStep s) where
IsPlausibleStep it
| .yield it' (.rejected p₁ p₂) => ∃ (h : it.internalState.currPos ≠ s.startPos),
BackwardPattern.dropSuffixOfNonempty? pat (s.sliceTo it.internalState.currPos) (by simpa) = none ∧
p₁ = it.internalState.currPos.prev h ∧ p₂ = it.internalState.currPos ∧
it'.internalState.currPos = it.internalState.currPos.prev h
| .yield it' (.matched p₁ p₂) => ∃ (h : it.internalState.currPos ≠ s.startPos), ∃ pos,
BackwardPattern.dropSuffixOfNonempty? pat (s.sliceTo it.internalState.currPos) (by simpa) = some pos ∧
p₁ = Slice.Pos.ofSliceTo pos ∧ p₂ = it.internalState.currPos ∧
it'.internalState.currPos = Slice.Pos.ofSliceTo pos
| .done => it.internalState.currPos = s.startPos
| .skip _ => False
step it :=
if h : it.internalState.currPos = s.startPos then
pure (.deflate ⟨.done, by simp [h]⟩)
else
match h' : BackwardPattern.dropSuffixOfNonempty? pat (s.sliceTo it.internalState.currPos) (by simpa) with
| some pos =>
pure (.deflate ⟨.yield ⟨⟨Slice.Pos.ofSliceTo pos⟩⟩
(.matched (Slice.Pos.ofSliceTo pos) it.internalState.currPos), by simp [h, h']⟩)
| none =>
pure (.deflate ⟨.yield ⟨⟨it.internalState.currPos.prev h⟩⟩
(.rejected (it.internalState.currPos.prev h) it.internalState.currPos), by simp [h, h']⟩)
private def finitenessRelation (s : Slice) [BackwardPattern pat] [StrictBackwardPattern pat] :
Std.Iterators.FinitenessRelation (DefaultBackwardSearcher pat s) Id where
Rel := InvImage WellFoundedRelation.rel (fun it => it.internalState.currPos.down)
wf := InvImage.wf _ WellFoundedRelation.wf
subrelation {it it'} h := by
simp_wf
obtain ⟨step, h, h'⟩ := h
match step with
| .yield it'' (.rejected p₁ p₂) =>
obtain ⟨_, ⟨-, -, -, h'⟩⟩ := h'
cases h
simp [h']
| .yield it'' (.matched p₁ p₂) =>
obtain ⟨_, pos, ⟨h₀, -, -, h'⟩⟩ := h'
cases h
have := StrictBackwardPattern.ne_endPos _ _ h₀
rw [h']
exact Std.lt_of_lt_of_le (Slice.Pos.ofSliceTo_lt_ofSliceTo_iff.2 ((Slice.Pos.lt_endPos_iff _).2 this)) Slice.Pos.ofSliceTo_le
instance {s : Slice} [BackwardPattern pat] [StrictBackwardPattern pat] :
Std.Iterators.Finite (DefaultBackwardSearcher pat s) Id :=
.of_finitenessRelation (finitenessRelation pat s)
instance [BackwardPattern pat] : Std.IteratorLoop (DefaultBackwardSearcher pat s) Id Id :=
.defaultImplementation
end DefaultBackwardSearcher
/--
The default implementation of {name}`ToBackwardSearcher` repeatedly tries to match the pattern using
the given {name}`BackwardPattern` instance.
It is sometimes possible to give a more efficient implementation; see {name}`ToBackwardSearcher`
for more details.
-/
@[inline, implicit_reducible]
def defaultImplementation [BackwardPattern pat] :
ToBackwardSearcher pat (DefaultBackwardSearcher pat) where
toSearcher := DefaultBackwardSearcher.iter pat
end ToBackwardSearcher
end String.Slice.Pattern