perf: improve auto completion and fuzzy matching (#11630)
This PR improves the performance of autocompletion and fuzzy matching by introducing an ASCII fast path into one of their core loops and making Char.toLower/toUpper more efficient. Co-authored-by: Rob23oba <152706811+Rob23oba@users.noreply.github.com>
This commit is contained in:
parent
1918d4f0dc
commit
f21f8d96f9
5 changed files with 73 additions and 27 deletions
|
|
@ -102,7 +102,7 @@ Returns `true` if the character is a uppercase ASCII letter.
|
|||
The uppercase ASCII letters are the following: `ABCDEFGHIJKLMNOPQRSTUVWXYZ`.
|
||||
-/
|
||||
@[inline] def isUpper (c : Char) : Bool :=
|
||||
c.val ≥ 65 && c.val ≤ 90
|
||||
c.val ≥ 'A'.val ∧ c.val ≤ 'Z'.val
|
||||
|
||||
/--
|
||||
Returns `true` if the character is a lowercase ASCII letter.
|
||||
|
|
@ -110,7 +110,7 @@ Returns `true` if the character is a lowercase ASCII letter.
|
|||
The lowercase ASCII letters are the following: `abcdefghijklmnopqrstuvwxyz`.
|
||||
-/
|
||||
@[inline] def isLower (c : Char) : Bool :=
|
||||
c.val ≥ 97 && c.val ≤ 122
|
||||
c.val ≥ 'a'.val && c.val ≤ 'z'.val
|
||||
|
||||
/--
|
||||
Returns `true` if the character is an ASCII letter.
|
||||
|
|
@ -126,7 +126,7 @@ Returns `true` if the character is an ASCII digit.
|
|||
The ASCII digits are the following: `0123456789`.
|
||||
-/
|
||||
@[inline] def isDigit (c : Char) : Bool :=
|
||||
c.val ≥ 48 && c.val ≤ 57
|
||||
c.val ≥ '0'.val && c.val ≤ '9'.val
|
||||
|
||||
/--
|
||||
Returns `true` if the character is an ASCII letter or digit.
|
||||
|
|
@ -143,9 +143,16 @@ alphabet are returned unchanged.
|
|||
|
||||
The uppercase ASCII letters are the following: `ABCDEFGHIJKLMNOPQRSTUVWXYZ`.
|
||||
-/
|
||||
@[inline]
|
||||
def toLower (c : Char) : Char :=
|
||||
let n := toNat c;
|
||||
if n >= 65 ∧ n <= 90 then ofNat (n + 32) else c
|
||||
if h : c.val ≥ 'A'.val ∧ c.val ≤ 'Z'.val then
|
||||
⟨c.val + ('a'.val - 'A'.val), ?_⟩
|
||||
else
|
||||
c
|
||||
where finally
|
||||
have h : c.val.toBitVec.toNat + ('a'.val - 'A'.val).toBitVec.toNat < 0xd800 :=
|
||||
Nat.add_lt_add_right (Nat.lt_of_le_of_lt h.2 (by decide)) _
|
||||
exact .inl (lt_of_eq_of_lt (Nat.mod_eq_of_lt (Nat.lt_trans h (by decide))) h)
|
||||
|
||||
/--
|
||||
Converts a lowercase ASCII letter to the corresponding uppercase letter. Letters outside the ASCII
|
||||
|
|
@ -153,8 +160,20 @@ alphabet are returned unchanged.
|
|||
|
||||
The lowercase ASCII letters are the following: `abcdefghijklmnopqrstuvwxyz`.
|
||||
-/
|
||||
@[inline]
|
||||
def toUpper (c : Char) : Char :=
|
||||
let n := toNat c;
|
||||
if n >= 97 ∧ n <= 122 then ofNat (n - 32) else c
|
||||
if h : c.val ≥ 'a'.val ∧ c.val ≤ 'z'.val then
|
||||
⟨c.val + ('A'.val - 'a'.val), ?_⟩
|
||||
else
|
||||
c
|
||||
where finally
|
||||
have h₁ : 2^32 ≤ c.val.toNat + ('A'.val - 'a'.val).toNat :=
|
||||
@Nat.add_le_add 'a'.val.toNat _ (2^32 - 'a'.val.toNat) _ h.1 (by decide)
|
||||
have h₂ : c.val.toBitVec.toNat + ('A'.val - 'a'.val).toNat < 2^32 + 0xd800 :=
|
||||
Nat.add_lt_add_right (Nat.lt_of_le_of_lt h.2 (by decide)) _
|
||||
have add_eq {x y : UInt32} : (x + y).toNat = (x.toNat + y.toNat) % 2^32 := rfl
|
||||
replace h₂ := Nat.sub_lt_left_of_lt_add h₁ h₂
|
||||
exact .inl <| lt_of_eq_of_lt (add_eq.trans (Nat.mod_eq_sub_mod h₁) |>.trans
|
||||
(Nat.mod_eq_of_lt (Nat.lt_trans h₂ (by decide)))) h₂
|
||||
|
||||
end Char
|
||||
|
|
|
|||
|
|
@ -405,22 +405,6 @@ instance : Min UInt16 := minOfLe
|
|||
/-- Converts an `Int` to a `UInt32` by taking the (non-negative remainder of the division by `2 ^ 32`. -/
|
||||
def UInt32.ofInt (x : Int) : UInt32 := ofNat (x % 2 ^ 32).toNat
|
||||
|
||||
/--
|
||||
Adds two 32-bit unsigned integers, wrapping around on overflow. Usually accessed via the `+`
|
||||
operator.
|
||||
|
||||
This function is overridden at runtime with an efficient implementation.
|
||||
-/
|
||||
@[extern "lean_uint32_add"]
|
||||
protected def UInt32.add (a b : UInt32) : UInt32 := ⟨a.toBitVec + b.toBitVec⟩
|
||||
/--
|
||||
Subtracts one 32-bit unsigned integer from another, wrapping around on underflow. Usually accessed
|
||||
via the `-` operator.
|
||||
|
||||
This function is overridden at runtime with an efficient implementation.
|
||||
-/
|
||||
@[extern "lean_uint32_sub"]
|
||||
protected def UInt32.sub (a b : UInt32) : UInt32 := ⟨a.toBitVec - b.toBitVec⟩
|
||||
/--
|
||||
Multiplies two 32-bit unsigned integers, wrapping around on overflow. Usually accessed via the `*`
|
||||
operator.
|
||||
|
|
@ -526,8 +510,6 @@ natural numbers. Usually accessed via the `≤` operator.
|
|||
-/
|
||||
@[expose] protected def UInt32.le (a b : UInt32) : Prop := a.toBitVec ≤ b.toBitVec
|
||||
|
||||
instance : Add UInt32 := ⟨UInt32.add⟩
|
||||
instance : Sub UInt32 := ⟨UInt32.sub⟩
|
||||
instance : Mul UInt32 := ⟨UInt32.mul⟩
|
||||
instance : Pow UInt32 Nat := ⟨UInt32.pow⟩
|
||||
instance : Mod UInt32 := ⟨UInt32.mod⟩
|
||||
|
|
|
|||
|
|
@ -207,6 +207,28 @@ theorem UInt32.lt_ofNatLT_of_lt {n m : Nat} (h1 : n < UInt32.size) (h2 : m < UIn
|
|||
simp only [(· < ·), BitVec.toNat, ofNatLT, BitVec.ofNatLT, ofNat, BitVec.ofNat, Fin.Internal.ofNat_eq_ofNat,
|
||||
Fin.ofNat, Nat.mod_eq_of_lt h2, imp_self]
|
||||
|
||||
|
||||
/--
|
||||
Adds two 32-bit unsigned integers, wrapping around on overflow. Usually accessed via the `+`
|
||||
operator.
|
||||
|
||||
This function is overridden at runtime with an efficient implementation.
|
||||
-/
|
||||
@[extern "lean_uint32_add"]
|
||||
protected def UInt32.add (a b : UInt32) : UInt32 := ⟨a.toBitVec + b.toBitVec⟩
|
||||
|
||||
/--
|
||||
Subtracts one 32-bit unsigned integer from another, wrapping around on underflow. Usually accessed
|
||||
via the `-` operator.
|
||||
|
||||
This function is overridden at runtime with an efficient implementation.
|
||||
-/
|
||||
@[extern "lean_uint32_sub"]
|
||||
protected def UInt32.sub (a b : UInt32) : UInt32 := ⟨a.toBitVec - b.toBitVec⟩
|
||||
|
||||
instance : Add UInt32 := ⟨UInt32.add⟩
|
||||
instance : Sub UInt32 := ⟨UInt32.sub⟩
|
||||
|
||||
/-- Converts a `UInt64` into the corresponding `Fin UInt64.size`. -/
|
||||
def UInt64.toFin (x : UInt64) : Fin UInt64.size := x.toBitVec.toFin
|
||||
|
||||
|
|
|
|||
|
|
@ -18,6 +18,7 @@ public import Init.Data.Option.Coe
|
|||
public import Init.Data.Range
|
||||
import Init.Data.SInt.Basic
|
||||
import Init.Data.String.Basic
|
||||
import Lean.Server.Completion.CompletionUtils
|
||||
|
||||
public section
|
||||
|
||||
|
|
@ -271,7 +272,7 @@ def fuzzyMatchScore? (pattern word : String) : Option Float := Id.run do
|
|||
return some 1
|
||||
if pattern.length > word.length then
|
||||
return none
|
||||
if !(containsInOrderLower pattern word) then
|
||||
if !(pattern.charactersIn word) then
|
||||
return none
|
||||
|
||||
let some score := fuzzyMatchCore pattern word (stringInfo pattern) (stringInfo word)
|
||||
|
|
|
|||
|
|
@ -11,8 +11,30 @@ public import Lean.Meta.WHNF
|
|||
public section
|
||||
|
||||
partial def String.charactersIn (a b : String) : Bool :=
|
||||
go ⟨0⟩ ⟨0⟩
|
||||
goFastScalar ⟨0⟩ ⟨0⟩
|
||||
where
|
||||
/-
|
||||
This function is the ASCII fast path for `go`
|
||||
-/
|
||||
goFastScalar (aPos bPos : String.Pos.Raw) : Bool :=
|
||||
if ha : ¬aPos < a.rawEndPos then
|
||||
true
|
||||
else if hb : ¬bPos < b.rawEndPos then
|
||||
false
|
||||
else
|
||||
let aByte := a.getUTF8Byte aPos (by simpa using ha)
|
||||
let bByte := b.getUTF8Byte bPos (by simpa using hb)
|
||||
-- If a or b are not UTF-8 bytes we give up on the fast path
|
||||
if (aByte &&& 0x80 != 0) || (bByte &&& 0x80 != 0) then
|
||||
go aPos bPos
|
||||
else
|
||||
let bPos := ⟨bPos.byteIdx + 1⟩
|
||||
if aByte.toAsciiLower == bByte.toAsciiLower then
|
||||
let aPos := ⟨aPos.byteIdx + 1⟩
|
||||
goFastScalar aPos bPos
|
||||
else
|
||||
goFastScalar aPos bPos
|
||||
|
||||
go (aPos bPos : String.Pos.Raw) : Bool :=
|
||||
if ha : aPos.atEnd a then
|
||||
true
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue