perf: improve auto completion and fuzzy matching (#11630)

This PR improves the performance of autocompletion and fuzzy matching by
introducing an ASCII fast path into one of their core loops and making
Char.toLower/toUpper more efficient.

Co-authored-by: Rob23oba <152706811+Rob23oba@users.noreply.github.com>
This commit is contained in:
Henrik Böving 2025-12-17 17:04:05 +01:00 committed by GitHub
parent 1918d4f0dc
commit f21f8d96f9
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
5 changed files with 73 additions and 27 deletions

View file

@ -102,7 +102,7 @@ Returns `true` if the character is a uppercase ASCII letter.
The uppercase ASCII letters are the following: `ABCDEFGHIJKLMNOPQRSTUVWXYZ`.
-/
@[inline] def isUpper (c : Char) : Bool :=
c.val ≥ 65 && c.val ≤ 90
c.val ≥ 'A'.val ∧ c.val ≤ 'Z'.val
/--
Returns `true` if the character is a lowercase ASCII letter.
@ -110,7 +110,7 @@ Returns `true` if the character is a lowercase ASCII letter.
The lowercase ASCII letters are the following: `abcdefghijklmnopqrstuvwxyz`.
-/
@[inline] def isLower (c : Char) : Bool :=
c.val ≥ 97 && c.val ≤ 122
c.val ≥ 'a'.val && c.val ≤ 'z'.val
/--
Returns `true` if the character is an ASCII letter.
@ -126,7 +126,7 @@ Returns `true` if the character is an ASCII digit.
The ASCII digits are the following: `0123456789`.
-/
@[inline] def isDigit (c : Char) : Bool :=
c.val ≥ 48 && c.val ≤ 57
c.val ≥ '0'.val && c.val ≤ '9'.val
/--
Returns `true` if the character is an ASCII letter or digit.
@ -143,9 +143,16 @@ alphabet are returned unchanged.
The uppercase ASCII letters are the following: `ABCDEFGHIJKLMNOPQRSTUVWXYZ`.
-/
@[inline]
def toLower (c : Char) : Char :=
let n := toNat c;
if n >= 65 ∧ n <= 90 then ofNat (n + 32) else c
if h : c.val ≥ 'A'.val ∧ c.val ≤ 'Z'.val then
⟨c.val + ('a'.val - 'A'.val), ?_⟩
else
c
where finally
have h : c.val.toBitVec.toNat + ('a'.val - 'A'.val).toBitVec.toNat < 0xd800 :=
Nat.add_lt_add_right (Nat.lt_of_le_of_lt h.2 (by decide)) _
exact .inl (lt_of_eq_of_lt (Nat.mod_eq_of_lt (Nat.lt_trans h (by decide))) h)
/--
Converts a lowercase ASCII letter to the corresponding uppercase letter. Letters outside the ASCII
@ -153,8 +160,20 @@ alphabet are returned unchanged.
The lowercase ASCII letters are the following: `abcdefghijklmnopqrstuvwxyz`.
-/
@[inline]
def toUpper (c : Char) : Char :=
let n := toNat c;
if n >= 97 ∧ n <= 122 then ofNat (n - 32) else c
if h : c.val ≥ 'a'.val ∧ c.val ≤ 'z'.val then
⟨c.val + ('A'.val - 'a'.val), ?_⟩
else
c
where finally
have h₁ : 2^32 ≤ c.val.toNat + ('A'.val - 'a'.val).toNat :=
@Nat.add_le_add 'a'.val.toNat _ (2^32 - 'a'.val.toNat) _ h.1 (by decide)
have h₂ : c.val.toBitVec.toNat + ('A'.val - 'a'.val).toNat < 2^32 + 0xd800 :=
Nat.add_lt_add_right (Nat.lt_of_le_of_lt h.2 (by decide)) _
have add_eq {x y : UInt32} : (x + y).toNat = (x.toNat + y.toNat) % 2^32 := rfl
replace h₂ := Nat.sub_lt_left_of_lt_add h₁ h₂
exact .inl <| lt_of_eq_of_lt (add_eq.trans (Nat.mod_eq_sub_mod h₁) |>.trans
(Nat.mod_eq_of_lt (Nat.lt_trans h₂ (by decide)))) h₂
end Char

View file

@ -405,22 +405,6 @@ instance : Min UInt16 := minOfLe
/-- Converts an `Int` to a `UInt32` by taking the (non-negative remainder of the division by `2 ^ 32`. -/
def UInt32.ofInt (x : Int) : UInt32 := ofNat (x % 2 ^ 32).toNat
/--
Adds two 32-bit unsigned integers, wrapping around on overflow. Usually accessed via the `+`
operator.
This function is overridden at runtime with an efficient implementation.
-/
@[extern "lean_uint32_add"]
protected def UInt32.add (a b : UInt32) : UInt32 := ⟨a.toBitVec + b.toBitVec⟩
/--
Subtracts one 32-bit unsigned integer from another, wrapping around on underflow. Usually accessed
via the `-` operator.
This function is overridden at runtime with an efficient implementation.
-/
@[extern "lean_uint32_sub"]
protected def UInt32.sub (a b : UInt32) : UInt32 := ⟨a.toBitVec - b.toBitVec⟩
/--
Multiplies two 32-bit unsigned integers, wrapping around on overflow. Usually accessed via the `*`
operator.
@ -526,8 +510,6 @@ natural numbers. Usually accessed via the `≤` operator.
-/
@[expose] protected def UInt32.le (a b : UInt32) : Prop := a.toBitVec ≤ b.toBitVec
instance : Add UInt32 := ⟨UInt32.add⟩
instance : Sub UInt32 := ⟨UInt32.sub⟩
instance : Mul UInt32 := ⟨UInt32.mul⟩
instance : Pow UInt32 Nat := ⟨UInt32.pow⟩
instance : Mod UInt32 := ⟨UInt32.mod⟩

View file

@ -207,6 +207,28 @@ theorem UInt32.lt_ofNatLT_of_lt {n m : Nat} (h1 : n < UInt32.size) (h2 : m < UIn
simp only [(· < ·), BitVec.toNat, ofNatLT, BitVec.ofNatLT, ofNat, BitVec.ofNat, Fin.Internal.ofNat_eq_ofNat,
Fin.ofNat, Nat.mod_eq_of_lt h2, imp_self]
/--
Adds two 32-bit unsigned integers, wrapping around on overflow. Usually accessed via the `+`
operator.
This function is overridden at runtime with an efficient implementation.
-/
@[extern "lean_uint32_add"]
protected def UInt32.add (a b : UInt32) : UInt32 := ⟨a.toBitVec + b.toBitVec⟩
/--
Subtracts one 32-bit unsigned integer from another, wrapping around on underflow. Usually accessed
via the `-` operator.
This function is overridden at runtime with an efficient implementation.
-/
@[extern "lean_uint32_sub"]
protected def UInt32.sub (a b : UInt32) : UInt32 := ⟨a.toBitVec - b.toBitVec⟩
instance : Add UInt32 := ⟨UInt32.add⟩
instance : Sub UInt32 := ⟨UInt32.sub⟩
/-- Converts a `UInt64` into the corresponding `Fin UInt64.size`. -/
def UInt64.toFin (x : UInt64) : Fin UInt64.size := x.toBitVec.toFin

View file

@ -18,6 +18,7 @@ public import Init.Data.Option.Coe
public import Init.Data.Range
import Init.Data.SInt.Basic
import Init.Data.String.Basic
import Lean.Server.Completion.CompletionUtils
public section
@ -271,7 +272,7 @@ def fuzzyMatchScore? (pattern word : String) : Option Float := Id.run do
return some 1
if pattern.length > word.length then
return none
if !(containsInOrderLower pattern word) then
if !(pattern.charactersIn word) then
return none
let some score := fuzzyMatchCore pattern word (stringInfo pattern) (stringInfo word)

View file

@ -11,8 +11,30 @@ public import Lean.Meta.WHNF
public section
partial def String.charactersIn (a b : String) : Bool :=
go ⟨0⟩ ⟨0⟩
goFastScalar ⟨0⟩ ⟨0⟩
where
/-
This function is the ASCII fast path for `go`
-/
goFastScalar (aPos bPos : String.Pos.Raw) : Bool :=
if ha : ¬aPos < a.rawEndPos then
true
else if hb : ¬bPos < b.rawEndPos then
false
else
let aByte := a.getUTF8Byte aPos (by simpa using ha)
let bByte := b.getUTF8Byte bPos (by simpa using hb)
-- If a or b are not UTF-8 bytes we give up on the fast path
if (aByte &&& 0x80 != 0) || (bByte &&& 0x80 != 0) then
go aPos bPos
else
let bPos := ⟨bPos.byteIdx + 1⟩
if aByte.toAsciiLower == bByte.toAsciiLower then
let aPos := ⟨aPos.byteIdx + 1⟩
goFastScalar aPos bPos
else
goFastScalar aPos bPos
go (aPos bPos : String.Pos.Raw) : Bool :=
if ha : aPos.atEnd a then
true