perf: improve auto completion and fuzzy matching (#11630)

This PR improves the performance of autocompletion and fuzzy matching by introducing an ASCII fast path into one of their core loops and making Char.toLower/toUpper more efficient. Co-authored-by: Rob23oba <152706811+Rob23oba@users.noreply.github.com>
2025-12-17 17:04:05 +01:00 · 2025-12-17 17:04:05 +01:00 · f21f8d96f9
commit f21f8d96f9
parent 1918d4f0dc
5 changed files with 73 additions and 27 deletions
--- a/src/Init/Data/Char/Basic.lean
+++ b/src/Init/Data/Char/Basic.lean
@ -102,7 +102,7 @@ Returns `true` if the character is a uppercase ASCII letter.
 The uppercase ASCII letters are the following: `ABCDEFGHIJKLMNOPQRSTUVWXYZ`.
 -/
@[inline] def isUpper (c : Char) : Bool :=
-  c.val ≥ 65 && c.val ≤ 90
+  c.val ≥ 'A'.val ∧ c.val ≤ 'Z'.val

 /--
 Returns `true` if the character is a lowercase ASCII letter.
@ -110,7 +110,7 @@ Returns `true` if the character is a lowercase ASCII letter.
 The lowercase ASCII letters are the following: `abcdefghijklmnopqrstuvwxyz`.
 -/
@[inline] def isLower (c : Char) : Bool :=
-  c.val ≥ 97 && c.val ≤ 122
+  c.val ≥ 'a'.val && c.val ≤ 'z'.val

 /--
 Returns `true` if the character is an ASCII letter.
@ -126,7 +126,7 @@ Returns `true` if the character is an ASCII digit.
 The ASCII digits are the following: `0123456789`.
 -/
@[inline] def isDigit (c : Char) : Bool :=
-  c.val ≥ 48 && c.val ≤ 57
+  c.val ≥ '0'.val && c.val ≤ '9'.val

 /--
 Returns `true` if the character is an ASCII letter or digit.
@ -143,9 +143,16 @@ alphabet are returned unchanged.

 The uppercase ASCII letters are the following: `ABCDEFGHIJKLMNOPQRSTUVWXYZ`.
 -/
+@[inline]
 def toLower (c : Char) : Char :=
-  let n := toNat c;
-  if n >= 65 ∧ n <= 90 then ofNat (n + 32) else c
+  if h : c.val ≥ 'A'.val ∧ c.val ≤ 'Z'.val then
+    ⟨c.val + ('a'.val - 'A'.val), ?_⟩
+  else
+    c
+where finally
+  have h : c.val.toBitVec.toNat + ('a'.val - 'A'.val).toBitVec.toNat < 0xd800 :=
+    Nat.add_lt_add_right (Nat.lt_of_le_of_lt h.2 (by decide)) _
+  exact .inl (lt_of_eq_of_lt (Nat.mod_eq_of_lt (Nat.lt_trans h (by decide))) h)

 /--
 Converts a lowercase ASCII letter to the corresponding uppercase letter. Letters outside the ASCII
@ -153,8 +160,20 @@ alphabet are returned unchanged.

 The lowercase ASCII letters are the following: `abcdefghijklmnopqrstuvwxyz`.
 -/
+@[inline]
 def toUpper (c : Char) : Char :=
-  let n := toNat c;
-  if n >= 97 ∧ n <= 122 then ofNat (n - 32) else c
+  if h : c.val ≥ 'a'.val ∧ c.val ≤ 'z'.val then
+    ⟨c.val + ('A'.val - 'a'.val), ?_⟩
+  else
+    c
+where finally
+  have h₁ : 2^32 ≤ c.val.toNat + ('A'.val - 'a'.val).toNat :=
+    @Nat.add_le_add 'a'.val.toNat _ (2^32 - 'a'.val.toNat) _ h.1 (by decide)
+  have h₂ : c.val.toBitVec.toNat + ('A'.val - 'a'.val).toNat < 2^32 + 0xd800 :=
+    Nat.add_lt_add_right (Nat.lt_of_le_of_lt h.2 (by decide)) _
+  have add_eq {x y : UInt32} : (x + y).toNat = (x.toNat + y.toNat) % 2^32 := rfl
+  replace h₂ := Nat.sub_lt_left_of_lt_add h₁ h₂
+  exact .inl <| lt_of_eq_of_lt (add_eq.trans (Nat.mod_eq_sub_mod h₁) |>.trans
+    (Nat.mod_eq_of_lt (Nat.lt_trans h₂ (by decide)))) h₂

 end Char
--- a/src/Init/Data/UInt/Basic.lean
+++ b/src/Init/Data/UInt/Basic.lean
@ -405,22 +405,6 @@ instance : Min UInt16 := minOfLe
 /-- Converts an `Int` to a `UInt32` by taking the (non-negative remainder of the division by `2 ^ 32`. -/
 def UInt32.ofInt (x : Int) : UInt32 := ofNat (x % 2 ^ 32).toNat

-/--
-Adds two 32-bit unsigned integers, wrapping around on overflow. Usually accessed via the `+`
-operator.
-
-This function is overridden at runtime with an efficient implementation.
-/
-@[extern "lean_uint32_add"]
-protected def UInt32.add (a b : UInt32) : UInt32 := ⟨a.toBitVec + b.toBitVec⟩
-/--
-Subtracts one 32-bit unsigned integer from another, wrapping around on underflow. Usually accessed
-via the `-` operator.
-
-This function is overridden at runtime with an efficient implementation.
-/
-@[extern "lean_uint32_sub"]
-protected def UInt32.sub (a b : UInt32) : UInt32 := ⟨a.toBitVec - b.toBitVec⟩
 /--
 Multiplies two 32-bit unsigned integers, wrapping around on overflow.  Usually accessed via the `*`
 operator.
@ -526,8 +510,6 @@ natural numbers. Usually accessed via the `≤` operator.
 -/
@[expose] protected def UInt32.le (a b : UInt32) : Prop := a.toBitVec ≤ b.toBitVec

-instance : Add UInt32       := ⟨UInt32.add⟩
-instance : Sub UInt32       := ⟨UInt32.sub⟩
 instance : Mul UInt32       := ⟨UInt32.mul⟩
 instance : Pow UInt32 Nat   := ⟨UInt32.pow⟩
 instance : Mod UInt32       := ⟨UInt32.mod⟩
--- a/src/Init/Data/UInt/BasicAux.lean
+++ b/src/Init/Data/UInt/BasicAux.lean
@ -207,6 +207,28 @@ theorem UInt32.lt_ofNatLT_of_lt {n m : Nat} (h1 : n < UInt32.size) (h2 : m < UIn
  simp only [(· < ·), BitVec.toNat, ofNatLT, BitVec.ofNatLT, ofNat, BitVec.ofNat, Fin.Internal.ofNat_eq_ofNat,
    Fin.ofNat, Nat.mod_eq_of_lt h2, imp_self]

+
+/--
+Adds two 32-bit unsigned integers, wrapping around on overflow. Usually accessed via the `+`
+operator.
+
+This function is overridden at runtime with an efficient implementation.
+-/
+@[extern "lean_uint32_add"]
+protected def UInt32.add (a b : UInt32) : UInt32 := ⟨a.toBitVec + b.toBitVec⟩
+
+/--
+Subtracts one 32-bit unsigned integer from another, wrapping around on underflow. Usually accessed
+via the `-` operator.
+
+This function is overridden at runtime with an efficient implementation.
+-/
+@[extern "lean_uint32_sub"]
+protected def UInt32.sub (a b : UInt32) : UInt32 := ⟨a.toBitVec - b.toBitVec⟩
+
+instance : Add UInt32       := ⟨UInt32.add⟩
+instance : Sub UInt32       := ⟨UInt32.sub⟩
+
 /-- Converts a `UInt64` into the corresponding `Fin UInt64.size`. -/
 def UInt64.toFin (x : UInt64) : Fin UInt64.size := x.toBitVec.toFin

--- a/src/Lean/Data/FuzzyMatching.lean
+++ b/src/Lean/Data/FuzzyMatching.lean
@ -18,6 +18,7 @@ public import Init.Data.Option.Coe
 public import Init.Data.Range
 import Init.Data.SInt.Basic
 import Init.Data.String.Basic
+import Lean.Server.Completion.CompletionUtils

 public section

@ -271,7 +272,7 @@ def fuzzyMatchScore? (pattern word : String) : Option Float := Id.run do
    return some 1
  if pattern.length > word.length then
    return none
-  if !(containsInOrderLower pattern word) then
+  if !(pattern.charactersIn word) then
    return none

  let some score := fuzzyMatchCore pattern word (stringInfo pattern) (stringInfo word)
--- a/src/Lean/Server/Completion/CompletionUtils.lean
+++ b/src/Lean/Server/Completion/CompletionUtils.lean
@ -11,8 +11,30 @@ public import Lean.Meta.WHNF
 public section

 partial def String.charactersIn (a b : String) : Bool :=
-  go ⟨0⟩ ⟨0⟩
+  goFastScalar ⟨0⟩ ⟨0⟩
 where
+  /-
+  This function is the ASCII fast path for `go`
+  -/
+  goFastScalar (aPos bPos : String.Pos.Raw) : Bool :=
+    if ha : ¬aPos < a.rawEndPos then
+      true
+    else if hb : ¬bPos < b.rawEndPos then
+      false
+    else
+      let aByte := a.getUTF8Byte aPos (by simpa using ha)
+      let bByte := b.getUTF8Byte bPos (by simpa using hb)
+      -- If a or b are not UTF-8 bytes we give up on the fast path
+      if (aByte &&& 0x80 != 0) || (bByte &&& 0x80 != 0) then
+        go aPos bPos
+      else
+        let bPos := ⟨bPos.byteIdx + 1⟩
+        if aByte.toAsciiLower == bByte.toAsciiLower then
+          let aPos := ⟨aPos.byteIdx + 1⟩
+          goFastScalar aPos bPos
+        else
+          goFastScalar aPos bPos
+
  go (aPos bPos : String.Pos.Raw) : Bool :=
    if ha : aPos.atEnd a then
      true