perf: less allocations when using string patterns (#11255)
This PR reduces the allocations when using string patterns. In particular `startsWith`, `dropPrefix?`, `endsWith`, `dropSuffix?` are optimized.
This commit is contained in:
parent
75570f327f
commit
52b687cab4
8 changed files with 41 additions and 29 deletions
|
|
@ -435,7 +435,7 @@ theorem endExclusive_toSlice {s : String} : s.toSlice.endExclusive = s.endValidP
|
|||
theorem str_toSlice {s : String} : s.toSlice.str = s := rfl
|
||||
|
||||
/-- The number of bytes of the UTF-8 encoding of the string slice. -/
|
||||
@[expose]
|
||||
@[expose, inline]
|
||||
def Slice.utf8ByteSize (s : Slice) : Nat :=
|
||||
s.startInclusive.offset.byteDistance s.endExclusive.offset
|
||||
|
||||
|
|
@ -464,7 +464,7 @@ theorem Pos.Raw.byteIdx_sub_slice {p : Pos.Raw} {s : Slice} :
|
|||
(p - s).byteIdx = p.byteIdx - s.utf8ByteSize := rfl
|
||||
|
||||
/-- The end position of a slice, as a `Pos.Raw`. -/
|
||||
@[expose]
|
||||
@[expose, inline]
|
||||
def Slice.rawEndPos (s : Slice) : Pos.Raw where
|
||||
byteIdx := s.utf8ByteSize
|
||||
|
||||
|
|
|
|||
|
|
@ -74,8 +74,8 @@ class ForwardPattern (ρ : Type) where
|
|||
|
||||
namespace Internal
|
||||
|
||||
@[extern "lean_slice_memcmp"]
|
||||
def memcmp (lhs rhs : @& Slice) (lstart : @& String.Pos.Raw) (rstart : @& String.Pos.Raw)
|
||||
@[extern "lean_string_memcmp"]
|
||||
def memcmpStr (lhs rhs : @& String) (lstart : @& String.Pos.Raw) (rstart : @& String.Pos.Raw)
|
||||
(len : @& String.Pos.Raw) (h1 : len.offsetBy lstart ≤ lhs.rawEndPos)
|
||||
(h2 : len.offsetBy rstart ≤ rhs.rawEndPos) : Bool :=
|
||||
go 0
|
||||
|
|
@ -99,6 +99,27 @@ where
|
|||
simp [Pos.Raw.lt_iff] at h ⊢
|
||||
omega
|
||||
|
||||
@[inline]
|
||||
def memcmpSlice (lhs rhs : Slice) (lstart : String.Pos.Raw) (rstart : String.Pos.Raw)
|
||||
(len : String.Pos.Raw) (h1 : len.offsetBy lstart ≤ lhs.rawEndPos)
|
||||
(h2 : len.offsetBy rstart ≤ rhs.rawEndPos) : Bool :=
|
||||
memcmpStr
|
||||
lhs.str
|
||||
rhs.str
|
||||
(lstart.offsetBy lhs.startInclusive.offset)
|
||||
(rstart.offsetBy rhs.startInclusive.offset)
|
||||
len
|
||||
(by
|
||||
have := lhs.startInclusive_le_endExclusive
|
||||
have := lhs.endExclusive.isValid.le_utf8ByteSize
|
||||
simp [ValidPos.le_iff, Pos.Raw.le_iff, Slice.utf8ByteSize_eq] at *
|
||||
omega)
|
||||
(by
|
||||
have := rhs.startInclusive_le_endExclusive
|
||||
have := rhs.endExclusive.isValid.le_utf8ByteSize
|
||||
simp [ValidPos.le_iff, Pos.Raw.le_iff, Slice.utf8ByteSize_eq] at *
|
||||
omega)
|
||||
|
||||
end Internal
|
||||
|
||||
namespace ForwardPattern
|
||||
|
|
|
|||
|
|
@ -270,7 +270,7 @@ def startsWith (s : Slice) (pat : Slice) : Bool :=
|
|||
omega
|
||||
have hp := by
|
||||
simp [Pos.Raw.le_iff]
|
||||
Internal.memcmp s pat s.startPos.offset pat.startPos.offset pat.rawEndPos hs hp
|
||||
Internal.memcmpSlice s pat s.startPos.offset pat.startPos.offset pat.rawEndPos hs hp
|
||||
else
|
||||
false
|
||||
|
||||
|
|
@ -306,7 +306,7 @@ def endsWith (s : Slice) (pat : Slice) : Bool :=
|
|||
omega
|
||||
have hp := by
|
||||
simp [patStart, Pos.Raw.le_iff] at h ⊢
|
||||
Internal.memcmp s pat sStart patStart pat.rawEndPos hs hp
|
||||
Internal.memcmpSlice s pat sStart patStart pat.rawEndPos hs hp
|
||||
else
|
||||
false
|
||||
|
||||
|
|
|
|||
|
|
@ -68,7 +68,7 @@ def beq (s1 s2 : Slice) : Bool :=
|
|||
if h : s1.utf8ByteSize = s2.utf8ByteSize then
|
||||
have h1 := by simp [h, String.Pos.Raw.le_iff]
|
||||
have h2 := by simp [h, String.Pos.Raw.le_iff]
|
||||
Internal.memcmp s1 s2 s1.startPos.offset s2.startPos.offset s1.rawEndPos h1 h2
|
||||
Internal.memcmpSlice s1 s2 s1.startPos.offset s2.startPos.offset s1.rawEndPos h1 h2
|
||||
else
|
||||
false
|
||||
|
||||
|
|
|
|||
|
|
@ -1163,7 +1163,7 @@ static inline uint8_t lean_string_dec_eq(b_lean_obj_arg s1, b_lean_obj_arg s2) {
|
|||
static inline uint8_t lean_string_dec_lt(b_lean_obj_arg s1, b_lean_obj_arg s2) { return lean_string_lt(s1, s2); }
|
||||
LEAN_EXPORT uint64_t lean_string_hash(b_lean_obj_arg);
|
||||
LEAN_EXPORT lean_obj_res lean_string_of_usize(size_t);
|
||||
LEAN_EXPORT uint8_t lean_slice_memcmp(b_lean_obj_arg s1, b_lean_obj_arg s2, b_lean_obj_arg lstart, b_lean_obj_arg rstart, b_lean_obj_arg len);
|
||||
LEAN_EXPORT uint8_t lean_string_memcmp(b_lean_obj_arg s1, b_lean_obj_arg s2, b_lean_obj_arg lstart, b_lean_obj_arg rstart, b_lean_obj_arg len);
|
||||
LEAN_EXPORT uint64_t lean_slice_hash(b_lean_obj_arg);
|
||||
LEAN_EXPORT uint8_t lean_slice_dec_lt(b_lean_obj_arg s1, b_lean_obj_arg s2);
|
||||
|
||||
|
|
|
|||
|
|
@ -82,13 +82,4 @@ where
|
|||
removeExts s i' e
|
||||
termination_by i.1
|
||||
|
||||
-- sanity check
|
||||
example :
|
||||
modOfFilePath "Foo/Bar" = `Foo.Bar
|
||||
∧ modOfFilePath "Foo/Bar/" = `Foo.Bar
|
||||
∧ modOfFilePath "Foo/Bar.lean" = `Foo.Bar
|
||||
∧ modOfFilePath "Foo/Bar.tar.gz" = `Foo.Bar
|
||||
∧ modOfFilePath "Foo/Bar.lean/" = `Foo.«Bar.lean»
|
||||
:= by native_decide
|
||||
|
||||
attribute [deprecated "Deprecated without replacement." (since := "2025-08-01")] modOfFilePath
|
||||
|
|
|
|||
|
|
@ -2362,6 +2362,17 @@ extern "C" LEAN_EXPORT obj_res lean_string_of_usize(size_t n) {
|
|||
return mk_ascii_string_unchecked(std::to_string(n));
|
||||
}
|
||||
|
||||
extern "C" LEAN_EXPORT uint8_t lean_string_memcmp(b_obj_arg s1, b_obj_arg s2, b_obj_arg lstart, b_obj_arg rstart, b_obj_arg len) {
|
||||
// Thanks to the proof arguments we know that lstart, rstart and len are all scalars.
|
||||
lean_assert(lean_is_scalar(lstart));
|
||||
lean_assert(lean_is_scalar(rstart));
|
||||
lean_assert(lean_is_scalar(len));
|
||||
|
||||
char const * lbase = lean_string_cstr(s1) + lean_unbox(lstart);
|
||||
char const * rbase = lean_string_cstr(s2) + lean_unbox(rstart);
|
||||
return std::memcmp(lbase, rbase, lean_unbox(len)) == 0;
|
||||
}
|
||||
|
||||
size_t lean_slice_size(b_obj_arg slice) {
|
||||
b_obj_res start = lean_ctor_get(slice, 1);
|
||||
lean_assert(lean_is_scalar(start));
|
||||
|
|
@ -2377,17 +2388,6 @@ char const * lean_slice_base(b_obj_arg slice) {
|
|||
return lean_string_cstr(string) + lean_unbox(offset);
|
||||
}
|
||||
|
||||
extern "C" LEAN_EXPORT uint8_t lean_slice_memcmp(b_obj_arg s1, b_obj_arg s2, b_obj_arg lstart, b_obj_arg rstart, b_obj_arg len) {
|
||||
// Thanks to the proof arguments we know that lstart, rstart and len are all scalars.
|
||||
lean_assert(lean_is_scalar(lstart));
|
||||
lean_assert(lean_is_scalar(rstart));
|
||||
lean_assert(lean_is_scalar(len));
|
||||
|
||||
char const * lbase = lean_slice_base(s1) + lean_unbox(lstart);
|
||||
char const * rbase = lean_slice_base(s2) + lean_unbox(rstart);
|
||||
return std::memcmp(lbase, rbase, lean_unbox(len)) == 0;
|
||||
}
|
||||
|
||||
extern "C" LEAN_EXPORT uint64_t lean_slice_hash(b_obj_arg s) {
|
||||
size_t sz = lean_slice_size(s);
|
||||
char const * str = lean_slice_base(s);
|
||||
|
|
|
|||
|
|
@ -11,7 +11,7 @@ options get_default_options() {
|
|||
opts = opts.update({"debug", "terminalTacticsAsSorry"}, false);
|
||||
// switch to `true` for ABI-breaking changes affecting meta code;
|
||||
// see also next option!
|
||||
opts = opts.update({"interpreter", "prefer_native"}, false);
|
||||
opts = opts.update({"interpreter", "prefer_native"}, true);
|
||||
// switch to `false` when enabling `prefer_native` should also affect use
|
||||
// of built-in parsers in quotations; this is usually the case, but setting
|
||||
// both to `true` may be necessary for handling non-builtin parsers with
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue