From 5ba467d920da2d093676024cbca38a5525abd517 Mon Sep 17 00:00:00 2001 From: Markus Himmel Date: Mon, 9 Feb 2026 10:27:47 +0100 Subject: [PATCH] feat: `LawfulForwardPatternModel` for string patterns (#12360) This PR provides a `LawfulForwardPatternModel` instance for string patterns, i.e., it proves correctness of the `dropPrefix?` and `startsWith` functions for string patterns. Note that this is "just" the correctness proof; there isn't a way to actually use it yet. API lemmas will follow. --- src/Init/Data/String/Basic.lean | 6 +- src/Init/Data/String/Defs.lean | 6 + src/Init/Data/String/Lemmas/Basic.lean | 64 ++++++++++ src/Init/Data/String/Lemmas/IsEmpty.lean | 13 ++ src/Init/Data/String/Lemmas/Pattern.lean | 1 + .../Data/String/Lemmas/Pattern/Basic.lean | 9 ++ src/Init/Data/String/Lemmas/Pattern/Char.lean | 2 + src/Init/Data/String/Lemmas/Pattern/Pred.lean | 2 + .../Data/String/Lemmas/Pattern/String.lean | 10 ++ .../String/Lemmas/Pattern/String/Basic.lean | 75 +++++++++++ .../Lemmas/Pattern/String/ForwardPattern.lean | 70 ++++++++++ src/Init/Data/String/Lemmas/Splits.lean | 120 +++++++++++++++++- 12 files changed, 373 insertions(+), 5 deletions(-) create mode 100644 src/Init/Data/String/Lemmas/Pattern/String.lean create mode 100644 src/Init/Data/String/Lemmas/Pattern/String/Basic.lean create mode 100644 src/Init/Data/String/Lemmas/Pattern/String/ForwardPattern.lean diff --git a/src/Init/Data/String/Basic.lean b/src/Init/Data/String/Basic.lean index 23fc40da7d..f95ecc87af 100644 --- a/src/Init/Data/String/Basic.lean +++ b/src/Init/Data/String/Basic.lean @@ -891,6 +891,10 @@ theorem Pos.Raw.isValidForSlice_iff_isUTF8FirstByte {s : Slice} {p : Pos.Raw} : p.IsValidForSlice s ↔ (p = s.rawEndPos ∨ (∃ (h : p < s.rawEndPos), (s.getUTF8Byte p h).IsUTF8FirstByte)) := by simp [← isValid_copy_iff, isValid_iff_isUTF8FirstByte, Slice.getUTF8Byte_copy] +theorem Pos.Raw.isValidForSlice_iff_exists_append {s : Slice} {p : Pos.Raw} : + p.IsValidForSlice s ↔ ∃ t₁ t₂, s.copy = t₁ ++ t₂ ∧ p = t₁.rawEndPos := by + rw [← isValid_copy_iff, isValid_iff_exists_append] + /-- Efficiently checks whether a position is at a UTF-8 character boundary of the slice `s`. -/ @[expose] def Pos.Raw.isValidForSlice (s : Slice) (p : Pos.Raw) : Bool := @@ -1434,7 +1438,7 @@ theorem Slice.Pos.byte_eq_byte_copy {s : Slice} {pos : s.Pos} {h} : (byte_copy _).symm /-- Given a position in `s.sliceFrom p₀`, obtain the corresponding position in `s`. -/ -@[inline] +@[inline, expose] def Slice.Pos.ofSliceFrom {s : Slice} {p₀ : s.Pos} (pos : (s.sliceFrom p₀).Pos) : s.Pos where offset := pos.offset.offsetBy p₀.offset isValidForSlice := Pos.Raw.isValidForSlice_sliceFrom.1 pos.isValidForSlice diff --git a/src/Init/Data/String/Defs.lean b/src/Init/Data/String/Defs.lean index 207ab9cbb6..cbcf150992 100644 --- a/src/Init/Data/String/Defs.lean +++ b/src/Init/Data/String/Defs.lean @@ -179,6 +179,11 @@ theorem append_left_inj {s₁ s₂ : String} (t : String) : s₁ ++ t = s₂ ++ t ↔ s₁ = s₂ := by simp [← toByteArray_inj] +@[simp] +theorem append_right_inj (s : String) {t₁ t₂ : String} : + s ++ t₁ = s ++ t₂ ↔ t₁ = t₂ := by + simp [← toByteArray_inj] + theorem append_assoc {s₁ s₂ s₃ : String} : s₁ ++ s₂ ++ s₃ = s₁ ++ (s₂ ++ s₃) := by simp [← toByteArray_inj, ByteArray.append_assoc] @@ -398,6 +403,7 @@ achieved by tracking the bounds by hand, the slice API is much more convenient. `String.Slice` bundles proofs to ensure that the start and end positions always delineate a valid string. For this reason, it should be preferred over `Substring.Raw`. -/ +@[ext] structure Slice where /-- The underlying strings. -/ str : String diff --git a/src/Init/Data/String/Lemmas/Basic.lean b/src/Init/Data/String/Lemmas/Basic.lean index ed8d110cdc..40650d14e6 100644 --- a/src/Init/Data/String/Lemmas/Basic.lean +++ b/src/Init/Data/String/Lemmas/Basic.lean @@ -99,4 +99,68 @@ theorem Slice.utf8ByteSize_eq_size_toByteArray_copy {s : Slice} : s.utf8ByteSize = s.copy.toByteArray.size := by simp [utf8ByteSize_eq] +section Iterate + +/- +These lemmas are slightly evil because they are non-definitional equalities between slices, but they +are useful and they are at least equalities between slices with definitionally equal underlying +strings, so it should be fine. +-/ + +@[simp] +theorem Slice.sliceTo_sliceFrom {s : Slice} {pos pos'} : + (s.sliceFrom pos).sliceTo pos' = + s.slice pos (Slice.Pos.ofSliceFrom pos') Slice.Pos.le_ofSliceFrom := by + ext <;> simp [String.Pos.ext_iff, Pos.Raw.offsetBy_assoc] + +@[simp] +theorem Slice.sliceFrom_sliceTo {s : Slice} {pos pos'} : + (s.sliceTo pos).sliceFrom pos' = + s.slice (Slice.Pos.ofSliceTo pos') pos Slice.Pos.ofSliceTo_le := by + ext <;> simp [String.Pos.ext_iff] + +@[simp] +theorem Slice.sliceFrom_sliceFrom {s : Slice} {pos pos'} : + (s.sliceFrom pos).sliceFrom pos' = + s.sliceFrom (Slice.Pos.ofSliceFrom pos') := by + ext <;> simp [String.Pos.ext_iff, Pos.Raw.offsetBy_assoc] + +@[simp] +theorem Slice.sliceTo_sliceTo {s : Slice} {pos pos'} : + (s.sliceTo pos).sliceTo pos' = s.sliceTo (Slice.Pos.ofSliceTo pos') := by + ext <;> simp [String.Pos.ext_iff] + +end Iterate + +theorem Slice.copy_eq_copy_slice {s : Slice} {pos₁ pos₂ : s.Pos} {h} : + s.copy = (s.sliceTo pos₁).copy ++ (s.slice pos₁ pos₂ h).copy ++ (s.sliceFrom pos₂).copy := by + simp [copy_eq_copy_sliceTo (pos := pos₂), copy_eq_copy_sliceTo (pos := Slice.Pos.sliceTo _ _ h)] + +theorem copy_toByteArray_sliceTo {s : String} {pos : s.Pos} : + (s.sliceTo pos).copy.toByteArray = s.toByteArray.extract 0 pos.offset.byteIdx := by + simp [Slice.toByteArray_copy] + +theorem Slice.pos!_eq_pos {s : Slice} {p : Pos.Raw} (h : p.IsValidForSlice s) : + s.pos! p = s.pos p h := by + simp [Slice.pos!, h] + +theorem pos!_eq_pos {s : String} {p : Pos.Raw} (h : p.IsValid s) : + s.pos! p = s.pos p h := by + rw [String.pos!, Slice.pos!_eq_pos h.toSlice, String.pos] + +@[simp] +theorem Slice.copy_pos {s : Slice} {p : Pos.Raw} {h : Pos.Raw.IsValidForSlice s p} : + (s.pos p h).copy = s.copy.pos p (Pos.Raw.isValid_copy_iff.2 h) := by + simp [String.Pos.ext_iff] + +@[simp] +theorem Slice.cast_pos {s t : Slice} {p : Pos.Raw} {h : Pos.Raw.IsValidForSlice s p} {h' : s = t} : + (s.pos p h).cast h' = t.pos p (h' ▸ h) := by + simp [Pos.ext_iff] + +@[simp] +theorem cast_pos {s t : String} {p : Pos.Raw} {h : Pos.Raw.IsValid s p} {h' : s = t} : + (s.pos p h).cast h' = t.pos p (h' ▸ h) := by + simp [String.Pos.ext_iff] + end String diff --git a/src/Init/Data/String/Lemmas/IsEmpty.lean b/src/Init/Data/String/Lemmas/IsEmpty.lean index e9b08aa895..0b956a4ade 100644 --- a/src/Init/Data/String/Lemmas/IsEmpty.lean +++ b/src/Init/Data/String/Lemmas/IsEmpty.lean @@ -162,4 +162,17 @@ theorem Slice.isEmpty_slice_eq_false_iff {s : Slice} {p₁ p₂ h} : theorem length_eq_zero_iff {s : String} : s.length = 0 ↔ s = "" := by simp [← length_toList] +@[simp] +theorem toByteArray_eq_empty_iff {s : String} : + s.toByteArray = ByteArray.empty ↔ s = "" := by + simp [← toByteArray_inj] + +theorem Slice.toByteArray_copy_eq_empty_iff {s : Slice} : + s.copy.toByteArray = ByteArray.empty ↔ s.isEmpty = true := by + simp + +theorem Slice.toByteArray_copy_ne_empty_iff {s : Slice} : + s.copy.toByteArray ≠ ByteArray.empty ↔ s.isEmpty = false := by + simp + end String diff --git a/src/Init/Data/String/Lemmas/Pattern.lean b/src/Init/Data/String/Lemmas/Pattern.lean index fbea873704..021b4f10eb 100644 --- a/src/Init/Data/String/Lemmas/Pattern.lean +++ b/src/Init/Data/String/Lemmas/Pattern.lean @@ -10,3 +10,4 @@ public import Init.Data.String.Lemmas.Pattern.Basic public import Init.Data.String.Lemmas.Pattern.Memcmp public import Init.Data.String.Lemmas.Pattern.Pred public import Init.Data.String.Lemmas.Pattern.Char +public import Init.Data.String.Lemmas.Pattern.String diff --git a/src/Init/Data/String/Lemmas/Pattern/Basic.lean b/src/Init/Data/String/Lemmas/Pattern/Basic.lean index 8b3ba27cda..deaeacbce3 100644 --- a/src/Init/Data/String/Lemmas/Pattern/Basic.lean +++ b/src/Init/Data/String/Lemmas/Pattern/Basic.lean @@ -152,6 +152,11 @@ structure IsLongestMatchAt (pat : ρ) [ForwardPatternModel pat] {s : Slice} (sta le : startPos ≤ endPos isLongestMatch_sliceFrom : IsLongestMatch pat (Slice.Pos.sliceFrom _ _ le) +theorem isLongestMatchAt_iff {pat : ρ} [ForwardPatternModel pat] {s : Slice} {pos₁ pos₂ : s.Pos} : + IsLongestMatchAt pat pos₁ pos₂ ↔ + ∃ (h : pos₁ ≤ pos₂), IsLongestMatch pat (Slice.Pos.sliceFrom _ _ h) := + ⟨fun ⟨h, h'⟩ => ⟨h, h'⟩, fun ⟨h, h'⟩ => ⟨h, h'⟩⟩ + theorem IsLongestMatchAt.lt {pat : ρ} [ForwardPatternModel pat] {s : Slice} {startPos endPos : s.Pos} (h : IsLongestMatchAt pat startPos endPos) : startPos < endPos := by have := h.isLongestMatch_sliceFrom.ne_startPos @@ -175,6 +180,10 @@ Predicate stating that there is a (longest) match starting at the given position structure MatchesAt (pat : ρ) [ForwardPatternModel pat] {s : Slice} (pos : s.Pos) : Prop where exists_isMatchAt : ∃ endPos, IsLongestMatchAt pat pos endPos +theorem matchesAt_iff_exists_isLongestMatchAt {pat : ρ} [ForwardPatternModel pat] {s : Slice} + {pos : s.Pos} : MatchesAt pat pos ↔ ∃ endPos, IsLongestMatchAt pat pos endPos := + ⟨fun ⟨h⟩ => h, fun h => ⟨h⟩⟩ + theorem matchesAt_iff_exists_isLongestMatch {pat : ρ} [ForwardPatternModel pat] {s : Slice} {pos : s.Pos} : MatchesAt pat pos ↔ ∃ (endPos : s.Pos), ∃ h, IsLongestMatch pat (pos.sliceFrom endPos h) := diff --git a/src/Init/Data/String/Lemmas/Pattern/Char.lean b/src/Init/Data/String/Lemmas/Pattern/Char.lean index 5824c0b1ad..ee9a9e5ee9 100644 --- a/src/Init/Data/String/Lemmas/Pattern/Char.lean +++ b/src/Init/Data/String/Lemmas/Pattern/Char.lean @@ -11,6 +11,8 @@ public import Init.Data.String.Lemmas.Pattern.Basic import Init.Data.Option.Lemmas import Init.Data.String.Lemmas.Basic +public section + namespace String.Slice.Pattern.Char instance {c : Char} : ForwardPatternModel c where diff --git a/src/Init/Data/String/Lemmas/Pattern/Pred.lean b/src/Init/Data/String/Lemmas/Pattern/Pred.lean index d673f46207..79c204a179 100644 --- a/src/Init/Data/String/Lemmas/Pattern/Pred.lean +++ b/src/Init/Data/String/Lemmas/Pattern/Pred.lean @@ -11,6 +11,8 @@ public import Init.Data.String.Lemmas.Pattern.Basic import Init.Data.Option.Lemmas import Init.Data.String.Lemmas.Basic +public section + namespace String.Slice.Pattern.CharPred instance {p : Char → Bool} : ForwardPatternModel p where diff --git a/src/Init/Data/String/Lemmas/Pattern/String.lean b/src/Init/Data/String/Lemmas/Pattern/String.lean new file mode 100644 index 0000000000..adc2f755c8 --- /dev/null +++ b/src/Init/Data/String/Lemmas/Pattern/String.lean @@ -0,0 +1,10 @@ +/- +Copyright (c) 2026 Lean FRO, LLC. All rights reserved. +Released under Apache 2.0 license as described in the file LICENSE. +Author: Markus Himmel +-/ +module + +prelude +public import Init.Data.String.Lemmas.Pattern.String.Basic +public import Init.Data.String.Lemmas.Pattern.String.ForwardPattern diff --git a/src/Init/Data/String/Lemmas/Pattern/String/Basic.lean b/src/Init/Data/String/Lemmas/Pattern/String/Basic.lean new file mode 100644 index 0000000000..dab3b2b70a --- /dev/null +++ b/src/Init/Data/String/Lemmas/Pattern/String/Basic.lean @@ -0,0 +1,75 @@ +/- +Copyright (c) 2026 Lean FRO, LLC. All rights reserved. +Released under Apache 2.0 license as described in the file LICENSE. +Author: Markus Himmel +-/ +module + +prelude +public import Init.Data.String.Lemmas.Pattern.Basic +import Init.Data.String.Lemmas.IsEmpty +import Init.Data.String.Lemmas.Basic +import Init.Data.ByteArray.Lemmas +import Init.Omega + +public section + +namespace String.Slice.Pattern + +namespace ForwardSliceSearcher + +instance {pat : Slice} : ForwardPatternModel pat where + /- + See the docstring of `ForwardPatternModel` for an explanation about why we disallow matching the + empty string. + + Requiring `s ≠ ""` is a trick that allows us to give a `ForwardPatternModel` instance + unconditionally, without forcing `pat.copy` to be non-empty (which would make it very awkward + to state theorems about the instance). It does not change anything about the fact that all lemmas + about this instance require `pat.isEmpty = false`. + -/ + Matches s := s ≠ "" ∧ s = pat.copy + not_matches_empty := by simp + +instance {pat : Slice} : NoPrefixForwardPatternModel pat := + .of_length_eq (by simp +contextual [ForwardPatternModel.Matches]) + +theorem isMatch_iff {pat s : Slice} {pos : s.Pos} (h : pat.isEmpty = false) : + IsMatch pat pos ↔ (s.sliceTo pos).copy = pat.copy := by + simp only [Pattern.isMatch_iff, ForwardPatternModel.Matches, ne_eq, copy_eq_empty_iff, + Bool.not_eq_true, and_iff_right_iff_imp] + intro h' + rw [← isEmpty_copy (s := s.sliceTo pos), h', isEmpty_copy, h] + +theorem isLongestMatch_iff {pat s : Slice} {pos : s.Pos} (h : pat.isEmpty = false) : + IsLongestMatch pat pos ↔ (s.sliceTo pos).copy = pat.copy := by + rw [isLongestMatch_iff_isMatch, isMatch_iff h] + +theorem isLongestMatchAt_iff {pat s : Slice} {pos₁ pos₂ : s.Pos} (h : pat.isEmpty = false) : + IsLongestMatchAt pat pos₁ pos₂ ↔ ∃ h, (s.slice pos₁ pos₂ h).copy = pat.copy := by + simp [Pattern.isLongestMatchAt_iff, isLongestMatch_iff h] + +theorem isLongestMatchAt_iff_splits {pat s : Slice} {pos₁ pos₂ : s.Pos} (h : pat.isEmpty = false) : + IsLongestMatchAt pat pos₁ pos₂ ↔ ∃ t₁ t₂, pos₁.Splits t₁ (pat.copy ++ t₂) ∧ + pos₂.Splits (t₁ ++ pat.copy) t₂ := by + simp only [isLongestMatchAt_iff h, copy_slice_eq_iff_splits] + +theorem isLongestMatchAt_iff_extract {pat s : Slice} {pos₁ pos₂ : s.Pos} (h : pat.isEmpty = false) : + IsLongestMatchAt pat pos₁ pos₂ ↔ + s.copy.toByteArray.extract pos₁.offset.byteIdx pos₂.offset.byteIdx = pat.copy.toByteArray := by + rw [isLongestMatchAt_iff h] + refine ⟨fun ⟨h, h'⟩ => ?_, fun h' => ?_⟩ + · simp [← h', toByteArray_copy_slice] + · rw [← Slice.toByteArray_copy_ne_empty_iff, ← h', ne_eq, ByteArray.extract_eq_empty_iff] at h + exact ⟨by simp [Pos.le_iff, Pos.Raw.le_iff]; omega, + by simp [← h', ← toByteArray_inj, toByteArray_copy_slice]⟩ + +theorem matchesAt_iff_splits {pat s : Slice} {pos : s.Pos} (h : pat.isEmpty = false) : + MatchesAt pat pos ↔ ∃ t₁ t₂, pos.Splits t₁ (pat.copy ++ t₂) := by + simp only [matchesAt_iff_exists_isLongestMatchAt, isLongestMatchAt_iff_splits h] + exact ⟨fun ⟨e, t₁, t₂, ht₁, ht₂⟩ => ⟨t₁, t₂, ht₁⟩, + fun ⟨t₁, t₂, ht⟩ => ⟨ht.rotateRight, t₁, t₂, ht, ht.splits_rotateRight⟩⟩ + +end ForwardSliceSearcher + +end String.Slice.Pattern diff --git a/src/Init/Data/String/Lemmas/Pattern/String/ForwardPattern.lean b/src/Init/Data/String/Lemmas/Pattern/String/ForwardPattern.lean new file mode 100644 index 0000000000..58e8ff37c9 --- /dev/null +++ b/src/Init/Data/String/Lemmas/Pattern/String/ForwardPattern.lean @@ -0,0 +1,70 @@ +/- +Copyright (c) 2026 Lean FRO, LLC. All rights reserved. +Released under Apache 2.0 license as described in the file LICENSE. +Author: Markus Himmel +-/ +module + +prelude +public import Init.Data.String.Lemmas.Pattern.String.Basic +public import Init.Data.String.Pattern.String +import all Init.Data.String.Pattern.String +import Init.Data.String.Lemmas.Pattern.Memcmp +import Init.Data.String.Lemmas.Basic +import Init.Data.ByteArray.Lemmas + +namespace String.Slice.Pattern + +namespace ForwardSliceSearcher + +theorem startsWith_iff {pat s : Slice} : startsWith pat s ↔ ∃ t, s.copy = pat.copy ++ t := by + rw [startsWith] + simp [Internal.memcmpSlice_eq_true_iff, utf8ByteSize_eq_size_toByteArray_copy, -size_toByteArray] + generalize pat.copy = pat + generalize s.copy = s + refine ⟨fun ⟨h₁, h₂⟩ => ?_, ?_⟩ + · suffices pat.rawEndPos.IsValid s by + have h₁ : (s.sliceTo (s.pos _ this)).copy = pat := by + simpa [← toByteArray_inj, copy_toByteArray_sliceTo] + have := (s.pos _ this).splits + rw [h₁] at this + refine ⟨_, this.eq_append⟩ + rw [Pos.Raw.isValid_iff_isValidUTF8_extract_zero] + refine ⟨by simpa using h₁, ?_⟩ + simp only [size_toByteArray] at h₂ + simpa [h₂] using pat.isValidUTF8 + · rintro ⟨t, rfl⟩ + simp [-size_toByteArray, ByteArray.extract_append] + +theorem dropPrefix?_eq_some_iff {pat s : Slice} {pos : s.Pos} : + dropPrefix? pat s = some pos ↔ (s.sliceTo pos).copy = pat.copy := by + fun_cases dropPrefix? with + | case1 h => + simp only [offset_startPos, Pos.Raw.offsetBy_zero, Option.some.injEq] + obtain ⟨t, ht⟩ := startsWith_iff.1 h + have hval : pat.rawEndPos.IsValidForSlice s := by + rw [← Pos.Raw.isValid_copy_iff, ht, ← Slice.rawEndPos_copy] + exact Pos.Raw.isValid_rawEndPos.append_right _ + have hsp : (s.pos _ hval).Splits pat.copy t := ⟨ht, by simp⟩ + rw [pos!_eq_pos hval] + exact ⟨(· ▸ hsp.copy_sliceTo_eq), fun h => hsp.pos_eq (h ▸ pos.splits)⟩ + | case2 h => + simp only [startsWith_iff, not_exists] at h + simp only [reduceCtorEq, false_iff] + intro heq + have := h (s.sliceFrom pos).copy + simp [← heq, pos.splits.eq_append] at this + +theorem isSome_dropPrefix? {pat s : Slice} : (dropPrefix? pat s).isSome = startsWith pat s := by + fun_cases dropPrefix? <;> simp_all + +public theorem lawfulForwardPatternModel {pat : Slice} (hpat : pat.isEmpty = false) : + LawfulForwardPatternModel pat where + dropPrefixOfNonempty?_eq h := rfl + startsWith_eq s := isSome_dropPrefix?.symm + dropPrefix?_eq_some_iff pos := by + simp [ForwardPattern.dropPrefix?, dropPrefix?_eq_some_iff, isLongestMatch_iff hpat] + +end ForwardSliceSearcher + +end String.Slice.Pattern diff --git a/src/Init/Data/String/Lemmas/Splits.lean b/src/Init/Data/String/Lemmas/Splits.lean index 2a8973e26d..d638819d52 100644 --- a/src/Init/Data/String/Lemmas/Splits.lean +++ b/src/Init/Data/String/Lemmas/Splits.lean @@ -277,14 +277,30 @@ theorem sliceTo_copy_eq_iff_exists_splits {s : String} {p : s.Pos} {t₁ : Strin · rintro ⟨t₂, h⟩ exact p.splits.eq_left h -theorem Slice.Pos.Splits.pos_eq {s : Slice} {p q : s.Pos} {s t : String} (h : p.Splits s t) - (h' : q.Splits s t) : p = q := by +theorem Pos.Splits.offset_eq_decreaseBy {s : String} {p : s.Pos} (h : p.Splits t₁ t₂) : + p.offset = s.rawEndPos.decreaseBy t₂.utf8ByteSize := by + simp [h.offset_eq_rawEndPos, h.eq_append, Pos.Raw.ext_iff] + +theorem Slice.Pos.Splits.offset_eq_decreaseBy {s : Slice} {p : s.Pos} (h : p.Splits t₁ t₂) : + p.offset = s.rawEndPos.decreaseBy t₂.utf8ByteSize := by + simp [← offset_copy, (splits_copy_iff.2 h).offset_eq_decreaseBy] + +theorem Slice.Pos.Splits.pos_eq {s : Slice} {p q : s.Pos} {s t t' : String} (h : p.Splits s t) + (h' : q.Splits s t') : p = q := by rw [Slice.Pos.ext_iff, h.offset_eq_rawEndPos, h'.offset_eq_rawEndPos] -theorem Pos.Splits.pos_eq {s : String} {p q : s.Pos} {s t : String} (h : p.Splits s t) - (h' : q.Splits s t) : p = q := by +theorem Slice.Pos.Splits.pos_eq_of_eq_right {s : Slice} {p q : s.Pos} {s s' t : String} + (h : p.Splits s t) (h' : q.Splits s' t) : p = q := by + rw [Slice.Pos.ext_iff, h.offset_eq_decreaseBy, h'.offset_eq_decreaseBy] + +theorem Pos.Splits.pos_eq {s : String} {p q : s.Pos} {s t t' : String} (h : p.Splits s t) + (h' : q.Splits s t') : p = q := by rw [Pos.ext_iff, h.offset_eq_rawEndPos, h'.offset_eq_rawEndPos] +theorem Pos.Splits.pos_eq_of_eq_right {s : String} {p q : s.Pos} {s s' t : String} + (h : p.Splits s t) (h' : q.Splits s' t) : p = q := by + rw [Pos.ext_iff, h.offset_eq_decreaseBy, h'.offset_eq_decreaseBy] + theorem Slice.Pos.Splits.get_eq_of_singleton {s : Slice} {p : s.Pos} {h : p ≠ s.endPos} {t₁ t₂ : String} {c : Char} (hs : (p.next h).Splits (t₁ ++ singleton c) t₂) : p.get h = c := by have := hs.eq_left (splits_next p h) @@ -405,4 +421,100 @@ theorem Pos.Splits.lt_iff_exists_eq_append {s : String} {p₁ p₂ : s.Pos} {t p₁ < p₂ ↔ ∃ t₅, t₅ ≠ "" ∧ t₃ = t₁ ++ t₅ ∧ t₂ = t₅ ++ t₄ := by rw [← toSlice_lt, (splits_toSlice_iff.2 h).lt_iff_exists_eq_append (splits_toSlice_iff.2 h')] +@[simp] +theorem Slice.Pos.splits_ofToSlice_iff {s : String} {p : s.toSlice.Pos} {t₁ t₂ : String} : + (Pos.ofToSlice p).Splits t₁ t₂ ↔ p.Splits t₁ t₂ := by + simp [← Pos.splits_toSlice_iff] + +/-- +Given a position that splits `s` into `t₁` and `t₂ ++ t₃` with witness `h`, constructs a position +`h.rotateRight` that splits `s` into `t₁ ++ t₂` and `t₃`. Use `h.splits_rotateRight` for the witness +that `h.rotateRight` splits `s` as required. +-/ +@[inline] +def Slice.Pos.Splits.rotateRight {s : Slice} {p : s.Pos} {t₁ t₂ t₃ : String} + (h : p.Splits t₁ (t₂ ++ t₃)) : s.Pos := + s.pos (p.offset.increaseBy t₂.utf8ByteSize) + (Pos.Raw.isValidForSlice_iff_exists_append.2 + ⟨t₁ ++ t₂, t₃, by simpa [append_assoc] using h.eq_append, + by simp [Pos.Raw.ext_iff, h.offset_eq_rawEndPos]⟩) + +theorem Slice.Pos.Splits.splits_rotateRight {s : Slice} {p : s.Pos} {t₁ t₂ t₃ : String} + (h : p.Splits t₁ (t₂ ++ t₃)) : h.rotateRight.Splits (t₁ ++ t₂) t₃ where + eq_append := by simpa [append_assoc] using h.eq_append + offset_eq_rawEndPos := by simp [rotateRight, Pos.Raw.ext_iff, h.offset_eq_rawEndPos] + +/-- +Given a position that splits `s` into `t₁ ++ t₂` and `t₃` with witness `h`, construct a position +`h.rotateLeft` that splits `s` into `t₁` and `t₂ ++ t₃`. Use `h.splits_rotateLeft` for the witness +that `h.rotateLEft` splits `s` as required. +-/ +@[inline] +def Slice.Pos.Splits.rotateLeft {s : Slice} {p : s.Pos} {t₁ t₂ t₃ : String} + (h : p.Splits (t₁ ++ t₂) t₃) : s.Pos := + s.pos t₁.rawEndPos (Pos.Raw.isValidForSlice_iff_exists_append.2 ⟨t₁, t₂ ++ t₃, + by simpa [append_assoc] using h.eq_append, rfl⟩) + +theorem Slice.Pos.Splits.splits_rotateLeft {s : Slice} {p : s.Pos} {t₁ t₂ t₃ : String} + (h : p.Splits (t₁ ++ t₂) t₃) : h.rotateLeft.Splits t₁ (t₂ ++ t₃) where + eq_append := by simpa [append_assoc] using h.eq_append + offset_eq_rawEndPos := rfl + +@[inline, inherit_doc Slice.Pos.Splits.rotateRight] +def Pos.Splits.rotateRight {s : String} {p : s.Pos} {t₁ t₂ t₃ : String} + (h : p.Splits t₁ (t₂ ++ t₃)) : s.Pos := + String.Pos.ofToSlice (splits_toSlice_iff.2 h).rotateRight + +theorem Pos.Splits.splits_rotateRight {s : String} {p : s.Pos} {t₁ t₂ t₃ : String} + (h : p.Splits t₁ (t₂ ++ t₃)) : h.rotateRight.Splits (t₁ ++ t₂) t₃ := by + simpa [Pos.Splits.rotateRight] using Slice.Pos.Splits.splits_rotateRight _ + +@[inline, inherit_doc Slice.Pos.Splits.rotateLeft] +def Pos.Splits.rotateLeft {s : String} {p : s.Pos} {t₁ t₂ t₃ : String} + (h : p.Splits (t₁ ++ t₂) t₃) : s.Pos := + String.Pos.ofToSlice (splits_toSlice_iff.2 h).rotateLeft + +theorem Pos.Splits.splits_rotateLeft {s : String} {p : s.Pos} {t₁ t₂ t₃ : String} + (h : p.Splits (t₁ ++ t₂) t₃) : h.rotateLeft.Splits t₁ (t₂ ++ t₃) := by + simpa [Pos.Splits.rotateLeft] using Slice.Pos.Splits.splits_rotateLeft _ + +theorem Slice.copy_slice_eq_iff_splits {s : Slice} {pos₁ pos₂ : s.Pos} : + (∃ h, (s.slice pos₁ pos₂ h).copy = t) ↔ + ∃ t₁ t₂, pos₁.Splits t₁ (t ++ t₂) ∧ pos₂.Splits (t₁ ++ t) t₂ := by + refine ⟨?_, fun ⟨t₁, t₂, ht₁, ht₂⟩ => ?_⟩ + · rintro ⟨h, rfl⟩ + refine ⟨(s.sliceTo pos₁).copy, (s.sliceFrom pos₂).copy, + ⟨by simpa [append_assoc] using copy_eq_copy_slice, by simp⟩, ⟨copy_eq_copy_slice, ?_⟩⟩ + simp [Pos.Raw.ext_iff, Slice.Pos.le_iff, Pos.Raw.le_iff, Pos.Raw.byteDistance_eq] at ⊢ h + omega + · have h : pos₁ ≤ pos₂ := (ht₁.le_iff_exists_eq_append ht₂).2 ⟨t, rfl, rfl⟩ + exact ⟨h, by simpa [ht₂.eq_append, ht₁.eq_left pos₁.splits, ht₂.eq_right pos₂.splits] using + (copy_eq_copy_slice (h := h)).symm⟩ + +theorem copy_slice_eq_iff_splits {s : String} {pos₁ pos₂ : s.Pos} : + (∃ h, (s.slice pos₁ pos₂ h).copy = t) ↔ + ∃ t₁ t₂, pos₁.Splits t₁ (t ++ t₂) ∧ pos₂.Splits (t₁ ++ t) t₂ := by + simp [← Pos.splits_toSlice_iff, ← Slice.copy_slice_eq_iff_splits] + +theorem Pos.splits_append_rawEndPos {s t : String} : + ((s ++ t).pos s.rawEndPos ((Pos.Raw.isValid_rawEndPos).append_right t)).Splits s t where + eq_append := rfl + offset_eq_rawEndPos := rfl + +theorem Pos.Splits.copy_sliceTo_eq {s : String} {p : s.Pos} (h : p.Splits t₁ t₂) : + (s.sliceTo p).copy = t₁ := + p.splits.eq_left h + +theorem Pos.Splits.copy_sliceFrom_eq {s : String} {p : s.Pos} (h : p.Splits t₁ t₂) : + (s.sliceFrom p).copy = t₂ := + p.splits.eq_right h + +theorem Slice.Pos.Splits.copy_sliceTo_eq {s : Slice} {p : s.Pos} (h : p.Splits t₁ t₂) : + (s.sliceTo p).copy = t₁ := + p.splits.eq_left h + +theorem Slice.Pos.Splits.copy_sliceFrom_eq {s : Slice} {p : s.Pos} (h : p.Splits t₁ t₂) : + (s.sliceFrom p).copy = t₂ := + p.splits.eq_right h + end String