fix: ilean loading performance (#4900)

This PR roughly halves the time needed to load the .ilean files by
optimizing the JSON parser and the conversion from JSON to Lean data
structures.

The code is optimized roughly as follows:
- String operations are inlined more aggressively
- Parsers are changed to use new `String.Iterator` functions `curr'` and
`next'` that receive a proof and hence do not need to perform an
additional check
- The `RefIdent` of .ilean files now uses a `String` instead of a `Name`
to avoid the expensive parse step from `String` to `Name` (despite the
fact that we only very rarely actually need a `Name` in downstream code)
- Instead of `List`s and `Subarray`s, the JSON to Lean conversion now
directly passes around arrays and array indices to avoid redundant
boxing
- Parsec's `peek?` sometimes generates redundant `Option` wrappers
because the generation of basic blocks interferes with the ctor-match
optimization, so it is changed to use an `isEof` check where possible
- Early returns and inline-do-blocks cause the code generator to
generate new functions, which then interfere with optimizations, so they
are now avoided
- Mutual defs are used instead of unspecialized passing of higher-order
functions to generate faster code
- The object parser is made tail-recursive

This PR also fixes a stack overflow in `Lean.Json.compress` that would
occur with long lists and adds a benchmark for the .ilean roundtrip
(compressed pretty-printing -> parsing).
This commit is contained in:
Marc Huisinga 2024-08-29 13:51:48 +02:00 committed by GitHub
parent 5c61ad38be
commit 9009c1ac91
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
17 changed files with 423 additions and 264 deletions

View file

@ -275,6 +275,22 @@ def atEnd : Iterator → Bool
def hasNext : Iterator → Bool
| ⟨arr, i⟩ => i < arr.size
/-- The byte at the current position. --/
@[inline]
def curr' (it : Iterator) (h : it.hasNext) : UInt8 :=
match it with
| ⟨arr, i⟩ =>
have : i < arr.size := by
simp only [hasNext, decide_eq_true_eq] at h
assumption
arr[i]
/-- Moves the iterator's position forward by one byte. --/
@[inline]
def next' (it : Iterator) (_h : it.hasNext) : Iterator :=
match it with
| ⟨arr, i⟩ => ⟨arr, i + 1⟩
/-- True if the position is not zero. -/
@[inline]
def hasPrev : Iterator → Bool

View file

@ -63,27 +63,27 @@ instance : Inhabited Char where
default := 'A'
/-- Is the character a space (U+0020) a tab (U+0009), a carriage return (U+000D) or a newline (U+000A)? -/
def isWhitespace (c : Char) : Bool :=
@[inline] def isWhitespace (c : Char) : Bool :=
c = ' ' || c = '\t' || c = '\r' || c = '\n'
/-- Is the character in `ABCDEFGHIJKLMNOPQRSTUVWXYZ`? -/
def isUpper (c : Char) : Bool :=
@[inline] def isUpper (c : Char) : Bool :=
c.val ≥ 65 && c.val ≤ 90
/-- Is the character in `abcdefghijklmnopqrstuvwxyz`? -/
def isLower (c : Char) : Bool :=
@[inline] def isLower (c : Char) : Bool :=
c.val ≥ 97 && c.val ≤ 122
/-- Is the character in `ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz`? -/
def isAlpha (c : Char) : Bool :=
@[inline] def isAlpha (c : Char) : Bool :=
c.isUpper || c.isLower
/-- Is the character in `0123456789`? -/
def isDigit (c : Char) : Bool :=
@[inline] def isDigit (c : Char) : Bool :=
c.val ≥ 48 && c.val ≤ 57
/-- Is the character in `ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789`? -/
def isAlphanum (c : Char) : Bool :=
@[inline] def isAlphanum (c : Char) : Bool :=
c.isAlpha || c.isDigit
/-- Convert an upper case character to its lower case character.

View file

@ -227,7 +227,7 @@ Examples:
* `"abc".front = 'a'`
* `"".front = (default : Char)`
-/
def front (s : String) : Char :=
@[inline] def front (s : String) : Char :=
get s 0
/--
@ -237,7 +237,7 @@ Examples:
* `"abc".back = 'c'`
* `"".back = (default : Char)`
-/
def back (s : String) : Char :=
@[inline] def back (s : String) : Char :=
get s (prev s s.endPos)
/--
@ -374,7 +374,7 @@ Examples:
* `"abba".posOf 'z' = none`
* `"L∃∀N".posOf '∀' = some ⟨4⟩`
-/
def revPosOf (s : String) (c : Char) : Option Pos :=
@[inline] def revPosOf (s : String) (c : Char) : Option Pos :=
revPosOfAux s c s.endPos
def findAux (s : String) (p : Char → Bool) (stopPos : Pos) (pos : Pos) : Pos :=
@ -398,7 +398,7 @@ def revFindAux (s : String) (p : Char → Bool) (pos : Pos) : Option Pos :=
else revFindAux s p pos
termination_by pos.1
def revFind (s : String) (p : Char → Bool) : Option Pos :=
@[inline] def revFind (s : String) (p : Char → Bool) : Option Pos :=
revFindAux s p s.endPos
abbrev Pos.min (p₁ p₂ : Pos) : Pos :=
@ -505,7 +505,7 @@ The default separator is `" "`. The separators are not included in the returned
"ababacabac".splitOn "aba" = ["", "bac", "c"]
```
-/
def splitOn (s : String) (sep : String := " ") : List String :=
@[inline] def splitOn (s : String) (sep : String := " ") : List String :=
if sep == "" then [s] else splitOnAux s sep 0 0 0 []
instance : Inhabited String := ⟨""⟩
@ -515,16 +515,16 @@ instance : Append String := ⟨String.append⟩
@[deprecated push (since := "2024-04-06")]
def str : String → Char → String := push
def pushn (s : String) (c : Char) (n : Nat) : String :=
@[inline] def pushn (s : String) (c : Char) (n : Nat) : String :=
n.repeat (fun s => s.push c) s
def isEmpty (s : String) : Bool :=
@[inline] def isEmpty (s : String) : Bool :=
s.endPos == 0
def join (l : List String) : String :=
@[inline] def join (l : List String) : String :=
l.foldl (fun r s => r ++ s) ""
def singleton (c : Char) : String :=
@[inline] def singleton (c : Char) : String :=
"".push c
def intercalate (s : String) : List String → String
@ -561,7 +561,7 @@ structure Iterator where
deriving DecidableEq, Inhabited
/-- Creates an iterator at the beginning of a string. -/
def mkIterator (s : String) : Iterator :=
@[inline] def mkIterator (s : String) : Iterator :=
⟨s, 0⟩
@[inherit_doc mkIterator]
@ -575,66 +575,74 @@ theorem Iterator.sizeOf_eq (i : String.Iterator) : sizeOf i = i.1.utf8ByteSize -
rfl
namespace Iterator
@[inherit_doc Iterator.s]
@[inline, inherit_doc Iterator.s]
def toString := Iterator.s
/-- Number of bytes remaining in the iterator. -/
def remainingBytes : Iterator → Nat
@[inline] def remainingBytes : Iterator → Nat
| ⟨s, i⟩ => s.endPos.byteIdx - i.byteIdx
@[inherit_doc Iterator.i]
@[inline, inherit_doc Iterator.i]
def pos := Iterator.i
/-- The character at the current position.
On an invalid position, returns `(default : Char)`. -/
def curr : Iterator → Char
@[inline] def curr : Iterator → Char
| ⟨s, i⟩ => get s i
/-- Moves the iterator's position forward by one character, unconditionally.
It is only valid to call this function if the iterator is not at the end of the string, *i.e.*
`Iterator.atEnd` is `false`; otherwise, the resulting iterator will be invalid. -/
def next : Iterator → Iterator
@[inline] def next : Iterator → Iterator
| ⟨s, i⟩ => ⟨s, s.next i⟩
/-- Decreases the iterator's position.
If the position is zero, this function is the identity. -/
def prev : Iterator → Iterator
@[inline] def prev : Iterator → Iterator
| ⟨s, i⟩ => ⟨s, s.prev i⟩
/-- True if the iterator is past the string's last character. -/
def atEnd : Iterator → Bool
@[inline] def atEnd : Iterator → Bool
| ⟨s, i⟩ => i.byteIdx ≥ s.endPos.byteIdx
/-- True if the iterator is not past the string's last character. -/
def hasNext : Iterator → Bool
@[inline] def hasNext : Iterator → Bool
| ⟨s, i⟩ => i.byteIdx < s.endPos.byteIdx
/-- True if the position is not zero. -/
def hasPrev : Iterator → Bool
@[inline] def hasPrev : Iterator → Bool
| ⟨_, i⟩ => i.byteIdx > 0
@[inline] def curr' (it : Iterator) (h : it.hasNext) : Char :=
match it with
| ⟨s, i⟩ => get' s i (by simpa only [hasNext, endPos, decide_eq_true_eq, String.atEnd, ge_iff_le, Nat.not_le] using h)
@[inline] def next' (it : Iterator) (h : it.hasNext) : Iterator :=
match it with
| ⟨s, i⟩ => ⟨s, s.next' i (by simpa only [hasNext, endPos, decide_eq_true_eq, String.atEnd, ge_iff_le, Nat.not_le] using h)⟩
/-- Replaces the current character in the string.
Does nothing if the iterator is at the end of the string. If the iterator contains the only
reference to its string, this function will mutate the string in-place instead of allocating a new
one. -/
def setCurr : Iterator → Char → Iterator
@[inline] def setCurr : Iterator → Char → Iterator
| ⟨s, i⟩, c => ⟨s.set i c, i⟩
/-- Moves the iterator's position to the end of the string.
Note that `i.toEnd.atEnd` is always `true`. -/
def toEnd : Iterator → Iterator
@[inline] def toEnd : Iterator → Iterator
| ⟨s, _⟩ => ⟨s, s.endPos⟩
/-- Extracts the substring between the positions of two iterators.
Returns the empty string if the iterators are for different strings, or if the position of the first
iterator is past the position of the second iterator. -/
def extract : Iterator → Iterator → String
@[inline] def extract : Iterator → Iterator → String
| ⟨s₁, b⟩, ⟨s₂, e⟩ =>
if s₁ ≠ s₂ || b > e then ""
else s₁.extract b e
@ -648,7 +656,7 @@ def forward : Iterator → Nat → Iterator
| it, n+1 => forward it.next n
/-- The remaining characters in an iterator, as a string. -/
def remainingToString : Iterator → String
@[inline] def remainingToString : Iterator → String
| ⟨s, i⟩ => s.extract i s.endPos
@[inherit_doc forward]
@ -673,7 +681,7 @@ def offsetOfPosAux (s : String) (pos : Pos) (i : Pos) (offset : Nat) : Nat :=
offsetOfPosAux s pos (s.next i) (offset+1)
termination_by s.endPos.1 - i.1
def offsetOfPos (s : String) (pos : Pos) : Nat :=
@[inline] def offsetOfPos (s : String) (pos : Pos) : Nat :=
offsetOfPosAux s pos 0 0
@[specialize] def foldlAux {α : Type u} (f : α → Char → α) (s : String) (stopPos : Pos) (i : Pos) (a : α) : α :=
@ -714,7 +722,7 @@ termination_by stopPos.1 - i.1
@[inline] def all (s : String) (p : Char → Bool) : Bool :=
!s.any (fun c => !p c)
def contains (s : String) (c : Char) : Bool :=
@[inline] def contains (s : String) (c : Char) : Bool :=
s.any (fun a => a == c)
theorem utf8SetAux_of_gt (c' : Char) : ∀ (cs : List Char) {i p : Pos}, i > p → utf8SetAux c' cs i p = cs
@ -770,7 +778,7 @@ termination_by s.endPos.1 - i.1
@[inline] def map (f : Char → Char) (s : String) : String :=
mapAux f 0 s
def isNat (s : String) : Bool :=
@[inline] def isNat (s : String) : Bool :=
!s.isEmpty && s.all (·.isDigit)
def toNat? (s : String) : Option Nat :=
@ -940,7 +948,7 @@ def splitOn (s : Substring) (sep : String := " ") : List Substring :=
@[inline] def all (s : Substring) (p : Char → Bool) : Bool :=
!s.any (fun c => !p c)
def contains (s : Substring) (c : Char) : Bool :=
@[inline] def contains (s : Substring) (c : Char) : Bool :=
s.any (fun a => a == c)
@[specialize] def takeWhileAux (s : String) (stopPos : String.Pos) (p : Char → Bool) (i : String.Pos) : String.Pos :=
@ -995,7 +1003,7 @@ termination_by i.1
let e := takeRightWhileAux s b Char.isWhitespace e
⟨s, b, e⟩
def isNat (s : Substring) : Bool :=
@[inline] def isNat (s : Substring) : Bool :=
s.all fun c => c.isDigit
def toNat? (s : Substring) : Option Nat :=
@ -1017,43 +1025,43 @@ end Substring
namespace String
def drop (s : String) (n : Nat) : String :=
@[inline] def drop (s : String) (n : Nat) : String :=
(s.toSubstring.drop n).toString
def dropRight (s : String) (n : Nat) : String :=
@[inline] def dropRight (s : String) (n : Nat) : String :=
(s.toSubstring.dropRight n).toString
def take (s : String) (n : Nat) : String :=
@[inline] def take (s : String) (n : Nat) : String :=
(s.toSubstring.take n).toString
def takeRight (s : String) (n : Nat) : String :=
@[inline] def takeRight (s : String) (n : Nat) : String :=
(s.toSubstring.takeRight n).toString
def takeWhile (s : String) (p : Char → Bool) : String :=
@[inline] def takeWhile (s : String) (p : Char → Bool) : String :=
(s.toSubstring.takeWhile p).toString
def dropWhile (s : String) (p : Char → Bool) : String :=
@[inline] def dropWhile (s : String) (p : Char → Bool) : String :=
(s.toSubstring.dropWhile p).toString
def takeRightWhile (s : String) (p : Char → Bool) : String :=
@[inline] def takeRightWhile (s : String) (p : Char → Bool) : String :=
(s.toSubstring.takeRightWhile p).toString
def dropRightWhile (s : String) (p : Char → Bool) : String :=
@[inline] def dropRightWhile (s : String) (p : Char → Bool) : String :=
(s.toSubstring.dropRightWhile p).toString
def startsWith (s pre : String) : Bool :=
@[inline] def startsWith (s pre : String) : Bool :=
s.toSubstring.take pre.length == pre.toSubstring
def endsWith (s post : String) : Bool :=
@[inline] def endsWith (s post : String) : Bool :=
s.toSubstring.takeRight post.length == post.toSubstring
def trimRight (s : String) : String :=
@[inline] def trimRight (s : String) : String :=
s.toSubstring.trimRight.toString
def trimLeft (s : String) : String :=
@[inline] def trimLeft (s : String) : String :=
s.toSubstring.trimLeft.toString
def trim (s : String) : String :=
@[inline] def trim (s : String) : String :=
s.toSubstring.trim.toString
@[inline] def nextWhile (s : String) (p : Char → Bool) (i : String.Pos) : String.Pos :=
@ -1062,23 +1070,23 @@ def trim (s : String) : String :=
@[inline] def nextUntil (s : String) (p : Char → Bool) (i : String.Pos) : String.Pos :=
nextWhile s (fun c => !p c) i
def toUpper (s : String) : String :=
@[inline] def toUpper (s : String) : String :=
s.map Char.toUpper
def toLower (s : String) : String :=
@[inline] def toLower (s : String) : String :=
s.map Char.toLower
def capitalize (s : String) :=
@[inline] def capitalize (s : String) :=
s.set 0 <| s.get 0 |>.toUpper
def decapitalize (s : String) :=
@[inline] def decapitalize (s : String) :=
s.set 0 <| s.get 0 |>.toLower
end String
namespace Char
protected def toString (c : Char) : String :=
@[inline] protected def toString (c : Char) : String :=
String.singleton c
@[simp] theorem length_toString (c : Char) : c.toString.length = 1 := rfl

View file

@ -75,7 +75,7 @@ See #2572.
opaque Internal.hasLLVMBackend (u : Unit) : Bool
/-- Valid identifier names -/
def isGreek (c : Char) : Bool :=
@[inline] def isGreek (c : Char) : Bool :=
0x391 ≤ c.val && c.val ≤ 0x3dd
def isLetterLike (c : Char) : Bool :=
@ -86,7 +86,7 @@ def isLetterLike (c : Char) : Bool :=
(0x2100 ≤ c.val && c.val ≤ 0x214f) || -- Letter like block
(0x1d49c ≤ c.val && c.val ≤ 0x1d59f) -- Latin letters, Script, Double-struck, Fractur
def isNumericSubscript (c : Char) : Bool :=
@[inline] def isNumericSubscript (c : Char) : Bool :=
0x2080 ≤ c.val && c.val ≤ 0x2089
def isSubScriptAlnum (c : Char) : Bool :=
@ -94,16 +94,16 @@ def isSubScriptAlnum (c : Char) : Bool :=
(0x2090 ≤ c.val && c.val ≤ 0x209c) ||
(0x1d62 ≤ c.val && c.val ≤ 0x1d6a)
def isIdFirst (c : Char) : Bool :=
@[inline] def isIdFirst (c : Char) : Bool :=
c.isAlpha || c = '_' || isLetterLike c
def isIdRest (c : Char) : Bool :=
@[inline] def isIdRest (c : Char) : Bool :=
c.isAlphanum || c = '_' || c = '\'' || c == '!' || c == '?' || isLetterLike c || isSubScriptAlnum c
def idBeginEscape := '«'
def idEndEscape := '»'
def isIdBeginEscape (c : Char) : Bool := c = idBeginEscape
def isIdEndEscape (c : Char) : Bool := c = idEndEscape
@[inline] def isIdBeginEscape (c : Char) : Bool := c = idBeginEscape
@[inline] def isIdEndEscape (c : Char) : Bool := c = idEndEscape
namespace Name
def getRoot : Name → Name

View file

@ -9,20 +9,19 @@ import Lean.Data.Json.Basic
import Lean.Data.RBMap
import Std.Internal.Parsec
namespace Lean.Json.Parser
open Std.Internal.Parsec
open Std.Internal.Parsec.String
@[inline]
namespace Lean.Json.Parser
def hexChar : Parser Nat := do
let c ← any
if '0' ≤ c ∧ c ≤ '9' then
pure $ c.val.toNat - '0'.val.toNat
else if 'a' ≤ c ∧ c ≤ 'f' then
pure $ c.val.toNat - 'a'.val.toNat + 10
else if 'A' ≤ c ∧ c ≤ 'F' then
pure $ c.val.toNat - 'A'.val.toNat + 10
if '0' <= c && c <= '9' then
pure $ (c.val - '0'.val).toNat
else if 'a' <= c && c <= 'f' then
pure $ (c.val - 'a'.val + 10).toNat
else if 'A' <= c && c <= 'F' then
pure $ (c.val - 'A'.val + 10).toNat
else
fail "invalid hex character"
@ -44,31 +43,46 @@ def escapedChar : Parser Char := do
partial def strCore (acc : String) : Parser String := do
let c ← peek!
if c = '"' then -- "
if c == '"' then
skip
return acc
else
let c ← any
if c = '\\' then
if c == '\\' then
strCore (acc.push (← escapedChar))
-- as to whether c.val > 0xffff should be split up and encoded with multiple \u,
-- the JSON standard is not definite: both directly printing the character
-- and encoding it with multiple \u is allowed. we choose the former.
else if 0x0020 ≤ c.val ∧ c.val ≤ 0x10ffff then
else if 0x0020 <= c.val && c.val <= 0x10ffff then
strCore (acc.push c)
else
fail "unexpected character in string"
def str : Parser String := strCore ""
@[inline] def str : Parser String := strCore ""
partial def natCore (acc digits : Nat) : Parser (Nat × Nat) := do
let some c ← peek? | return (acc, digits)
if '0' ≤ c ∧ c ≤ '9' then
skip
let acc' := 10*acc + (c.val.toNat - '0'.val.toNat)
natCore acc' (digits+1)
partial def natCore (acc : Nat) : Parser Nat := do
if ← isEof then
return acc
else
let c ← peek!
if '0' <= c && c <= '9' then
skip
let acc' := 10*acc + (c.val - '0'.val).toNat
natCore acc'
else
return acc
partial def natCoreNumDigits (acc digits : Nat) : Parser (Nat × Nat) := do
if ← isEof then
return (acc, digits)
else
let c ← peek!
if '0' <= c && c <= '9' then
skip
let acc' := 10*acc + (c.val - '0'.val).toNat
natCoreNumDigits acc' (digits+1)
else
return (acc, digits)
@[inline]
def lookahead (p : Char → Prop) (desc : String) [DecidablePred p] : Parser Unit := do
@ -80,129 +94,152 @@ def lookahead (p : Char → Prop) (desc : String) [DecidablePred p] : Parser Uni
@[inline]
def natNonZero : Parser Nat := do
lookahead (fun c => '1' ≤ c ∧ c ≤ '9') "1-9"
let (n, _) ← natCore 0 0
return n
lookahead (fun c => '1' <= c && c <= '9') "1-9"
natCore 0
@[inline]
def natNumDigits : Parser (Nat × Nat) := do
lookahead (fun c => '0' ≤ c ∧ c ≤ '9') "digit"
natCore 0 0
lookahead (fun c => '0' <= c && c <= '9') "digit"
natCoreNumDigits 0 0
@[inline]
def natMaybeZero : Parser Nat := do
let (n, _) ← natNumDigits
return n
lookahead (fun c => '0' <= c && c <= '9') "0-9"
natCore 0
def num : Parser JsonNumber := do
@[inline]
def numSign : Parser Int := do
let c ← peek!
let sign ← if c = '-' then
let sign ← if c == '-' then
skip
pure (-1 : Int)
return (-1 : Int)
else
pure 1
return 1
@[inline]
def nat : Parser Nat := do
let c ← peek!
let res ← if c = '0' then
if c == '0' then
skip
pure 0
return 0
else
natNonZero
let c? ← peek?
let res : JsonNumber ← if c? = some '.' then
skip
let (n, d) ← natNumDigits
if d > USize.size then fail "too many decimals"
let mantissa' := sign * (res * (10^d : Nat) + n)
let exponent' := d
pure <| JsonNumber.mk mantissa' exponent'
@[inline]
def numWithDecimals : Parser JsonNumber := do
let sign ← numSign
let whole ← nat
if ← isEof then
pure <| JsonNumber.fromInt (sign * whole)
else
pure <| JsonNumber.fromInt (sign * res)
let c? ← peek?
if c? = some 'e' c? = some 'E' then
skip
let c ← peek!
if c = '-' then
if c == '.' then
skip
let n ← natMaybeZero
return res.shiftr n
let (n, d) ← natNumDigits
if d > USize.size then fail "too many decimals"
let mantissa' := sign * (whole * (10^d : Nat) + n)
let exponent' := d
pure <| JsonNumber.mk mantissa' exponent'
else
if c = '+' then skip
let n ← natMaybeZero
if n > USize.size then fail "exp too large"
return res.shiftl n
else
return res
pure <| JsonNumber.fromInt (sign * whole)
partial def arrayCore (anyCore : Parser Json) (acc : Array Json) : Parser (Array Json) := do
let hd ← anyCore
let acc' := acc.push hd
let c ← any
if c = ']' then
ws
return acc'
else if c = ',' then
ws
arrayCore anyCore acc'
@[inline]
def exponent (value : JsonNumber) : Parser JsonNumber := do
if ← isEof then
return value
else
fail "unexpected character in array"
partial def objectCore (anyCore : Parser Json) : Parser (RBNode String (fun _ => Json)) := do
lookahead (fun c => c = '"') "\""; skip; -- "
let k ← strCore ""; ws
lookahead (fun c => c = ':') ":"; skip; ws
let v ← anyCore
let c ← any
if c = '}' then
ws
return RBNode.singleton k v
else if c = ',' then
ws
let kvs ← objectCore anyCore
return kvs.insert compare k v
else
fail "unexpected character in object"
partial def anyCore : Parser Json := do
let c ← peek!
if c = '[' then
skip; ws
let c ← peek!
if c = ']' then
skip; ws
return Json.arr (Array.mkEmpty 0)
if c == 'e' || c == 'E' then
skip
let c ← peek!
if c == '-' then
skip
let n ← natMaybeZero
return value.shiftr n
else
if c = '+' then skip
let n ← natMaybeZero
if n > USize.size then fail "exp too large"
return value.shiftl n
else
let a ← arrayCore anyCore (Array.mkEmpty 4)
return Json.arr a
else if c = '{' then
skip; ws
let c ← peek!
if c = '}' then
skip; ws
return Json.obj (RBNode.leaf)
else
let kvs ← objectCore anyCore
return Json.obj kvs
else if c = '\"' then
skip
let s ← strCore ""
ws
return Json.str s
else if c = 'f' then
skipString "false"; ws
return Json.bool false
else if c = 't' then
skipString "true"; ws
return Json.bool true
else if c = 'n' then
skipString "null"; ws
return Json.null
else if c = '-' ('0' ≤ c ∧ c ≤ '9') then
let n ← num
ws
return Json.num n
else
fail "unexpected input"
return value
def num : Parser JsonNumber := do
let res : JsonNumber ← numWithDecimals
exponent res
mutual
partial def arrayCore (acc : Array Json) : Parser (Array Json) := do
let hd ← anyCore
let acc' := acc.push hd
let c ← any
if c == ']' then
ws
return acc'
else if c == ',' then
ws
arrayCore acc'
else
fail "unexpected character in array"
partial def objectCore (kvs : RBNode String (fun _ => Json)) : Parser (RBNode String (fun _ => Json)) := do
lookahead (fun c => c == '"') "\""; skip;
let k ← str; ws
lookahead (fun c => c == ':') ":"; skip; ws
let v ← anyCore
let c ← any
if c == '}' then
ws
return kvs.insert compare k v
else if c == ',' then
ws
objectCore (kvs.insert compare k v)
else
fail "unexpected character in object"
partial def anyCore : Parser Json := do
let c ← peek!
if c == '[' then
skip; ws
let c ← peek!
if c == ']' then
skip; ws
return Json.arr (Array.mkEmpty 0)
else
let a ← arrayCore (Array.mkEmpty 4)
return Json.arr a
else if c == '{' then
skip; ws
let c ← peek!
if c == '}' then
skip; ws
return Json.obj (RBNode.leaf)
else
let kvs ← objectCore RBNode.leaf
return Json.obj kvs
else if c == '\"' then
skip
let s ← str
ws
return Json.str s
else if c == 'f' then
skipString "false"; ws
return Json.bool false
else if c == 't' then
skipString "true"; ws
return Json.bool true
else if c == 'n' then
skipString "null"; ws
return Json.null
else if c == '-' || ('0' <= c && c <= '9') then
let n ← num
ws
return Json.num n
else
fail "unexpected input"
end
def any : Parser Json := do
ws
@ -215,9 +252,7 @@ end Json.Parser
namespace Json
def parse (s : String) : Except String Lean.Json :=
match Json.Parser.any s.mkIterator with
| .success _ res => Except.ok res
| .error it err => Except.error s!"offset {repr it.i.byteIdx}: {err}"
Parser.run Json.Parser.any s
end Json

View file

@ -6,6 +6,7 @@ Authors: Gabriel Ebner, Marc Huisinga, Wojciech Nawrocki
prelude
import Lean.Data.Format
import Lean.Data.Json.Basic
import Init.Data.List.Impl
namespace Lean
namespace Json

View file

@ -22,27 +22,23 @@ reduce the size of the resulting JSON. -/
/--
Identifier of a reference.
-/
-- Names are represented by strings to avoid having to parse them to `Name`,
-- which is relatively expensive. Most uses of these names only need equality, anyways.
inductive RefIdent where
/-- Named identifier. These are used in all references that are globally available. -/
| const (moduleName : Name) (identName : Name) : RefIdent
| const (moduleName : String) (identName : String) : RefIdent
/-- Unnamed identifier. These are used for all local references. -/
| fvar (moduleName : Name) (id : FVarId) : RefIdent
| fvar (moduleName : String) (id : String) : RefIdent
deriving BEq, Hashable, Inhabited
namespace RefIdent
instance : ToJson FVarId where
toJson id := toJson id.name
instance : FromJson FVarId where
fromJson? s := return ⟨← fromJson? s⟩
/-- Shortened representation of `RefIdent` for more compact serialization. -/
inductive RefIdentJsonRepr
/-- Shortened representation of `RefIdent.const` for more compact serialization. -/
| c (m n : Name)
| c (m n : String)
/-- Shortened representation of `RefIdent.fvar` for more compact serialization. -/
| f (m : Name) (i : FVarId)
| f (m : String) (i : String)
deriving FromJson, ToJson
/-- Converts `id` to its compact serialization representation. -/
@ -74,7 +70,7 @@ end RefIdent
/-- Information about the declaration surrounding a reference. -/
structure RefInfo.ParentDecl where
/-- Name of the declaration surrounding a reference. -/
name : Name
name : String
/-- Range of the declaration surrounding a reference. -/
range : Lsp.Range
/-- Selection range of the declaration surrounding a reference. -/
@ -104,7 +100,7 @@ instance : ToJson RefInfo where
let rangeToList (r : Lsp.Range) : List Nat :=
[r.start.line, r.start.character, r.end.line, r.end.character]
let parentDeclToList (d : RefInfo.ParentDecl) : List Json :=
let name := d.name.toString |> toJson
let name := d.name |> toJson
let range := rangeToList d.range |>.map toJson
let selectionRange := rangeToList d.selectionRange |>.map toJson
[name] ++ range ++ selectionRange
@ -118,34 +114,42 @@ instance : ToJson RefInfo where
]
instance : FromJson RefInfo where
-- This implementation is optimized to prevent redundant intermediate allocations.
fromJson? j := do
let toRange : List Nat → Except String Lsp.Range
| [sLine, sChar, eLine, eChar] => pure ⟨⟨sLine, sChar⟩, ⟨eLine, eChar⟩⟩
| l => throw s!"Expected list of length 4, not {l.length}"
let toParentDecl (a : Array Json) : Except String RefInfo.ParentDecl := do
let name := String.toName <| ← fromJson? a[0]!
let range ← a[1:5].toArray.toList |>.mapM fromJson?
let range ← toRange range
let selectionRange ← a[5:].toArray.toList |>.mapM fromJson?
let selectionRange ← toRange selectionRange
let toRange (a : Array Json) (i : Nat) : Except String Lsp.Range :=
if h : a.size < i + 4 then
throw s!"Expected list of length 4, not {a.size}"
else
return {
start := {
line := ← fromJson? a[i]
character := ← fromJson? a[i+1]
}
«end» := {
line := ← fromJson? a[i+2]
character := ← fromJson? a[i+3]
}
}
let toParentDecl (a : Array Json) (i : Nat) : Except String RefInfo.ParentDecl := do
let name ← fromJson? a[i]!
let range ← toRange a (i + 1)
let selectionRange ← toRange a (i + 5)
return ⟨name, range, selectionRange⟩
let toLocation (l : List Json) : Except String RefInfo.Location := do
let l := l.toArray
if l.size != 4 && l.size != 13 then
let toLocation (a : Array Json) : Except String RefInfo.Location := do
if a.size != 4 && a.size != 13 then
.error "Expected list of length 4 or 13, not {l.size}"
let range ← l[:4].toArray.toList |>.mapM fromJson?
let range ← toRange range
if l.size == 13 then
let parentDecl ← toParentDecl l[4:].toArray
let range ← toRange a 0
if a.size == 13 then
let parentDecl ← toParentDecl a 4
return ⟨range, parentDecl⟩
else
return ⟨range, none⟩
let definition? ← j.getObjValAs? (Option $ List Json) "definition"
let definition? ← j.getObjValAs? (Option $ Array Json) "definition"
let definition? ← match definition? with
| none => pure none
| some list => some <$> toLocation list
let usages ← j.getObjValAs? (Array $ List Json) "usages"
| some array => some <$> toLocation array
let usages ← j.getObjValAs? (Array $ Array Json) "usages"
let usages ← usages.mapM toLocation
pure { definition?, usages }

View file

@ -13,11 +13,11 @@ open Lean
namespace Lean
namespace Xml
namespace Parser
open Std.Internal.Parsec
open Std.Internal.Parsec.String
namespace Parser
abbrev LeanChar := Char
/-- consume a newline character sequence pretending, that we read '\n'. As per spec:
@ -482,8 +482,6 @@ def document : Parser Element := prolog *> element <* many Misc <* eof
end Parser
def parse (s : String) : Except String Element :=
match Xml.Parser.document s.mkIterator with
| .success _ res => Except.ok res
| .error it err => Except.error s!"offset {it.i.byteIdx.repr}: {err}\n{(it.prevn 10).extract it}"
Parser.run Xml.Parser.document s
end Xml

View file

@ -81,7 +81,7 @@ def toLspRefInfo (i : RefInfo) : BaseIO Lsp.RefInfo := do
let parentDeclName ← parentDeclName?
let parentDeclRange := (← parentDeclRanges?).range.toLspRange
let parentDeclSelectionRange := (← parentDeclRanges?).selectionRange.toLspRange
return ⟨parentDeclName, parentDeclRange, parentDeclSelectionRange⟩
return ⟨parentDeclName.toString, parentDeclRange, parentDeclSelectionRange⟩
}
let definition? ← i.definition.mapM refToRefInfoLocation
let usages ← i.usages.mapM refToRefInfoLocation
@ -219,14 +219,14 @@ def identOf (ci : ContextInfo) (i : Info) : Option (RefIdent × Bool) := do
match i with
| Info.ofTermInfo ti => match ti.expr with
| Expr.const n .. =>
some (RefIdent.const (← getModuleContainingDecl? ci.env n) n, ti.isBinder)
some (RefIdent.const (← getModuleContainingDecl? ci.env n).toString n.toString, ti.isBinder)
| Expr.fvar id =>
some (RefIdent.fvar ci.env.header.mainModule id, ti.isBinder)
some (RefIdent.fvar ci.env.header.mainModule.toString id.name.toString, ti.isBinder)
| _ => none
| Info.ofFieldInfo fi =>
some (RefIdent.const (← getModuleContainingDecl? ci.env fi.projName) fi.projName, false)
some (RefIdent.const (← getModuleContainingDecl? ci.env fi.projName).toString fi.projName.toString, false)
| Info.ofOptionInfo oi =>
some (RefIdent.const (← getModuleContainingDecl? ci.env oi.declName) oi.declName, false)
some (RefIdent.const (← getModuleContainingDecl? ci.env oi.declName).toString oi.declName.toString, false)
| _ => none
/-- Finds all references in `trees`. -/
@ -328,7 +328,7 @@ where
if let .ofFVarAliasInfo ai := info then
-- FVars can only be aliases of FVars of the same file / module
let mod := ci.env.header.mainModule
insertIdMap (.fvar mod ai.id) (.fvar mod ai.baseId)))
insertIdMap (.fvar mod.toString ai.id.name.toString) (.fvar mod.toString ai.baseId.name.toString)))
get
@ -445,9 +445,10 @@ def allRefsFor
let refsToCheck := match ident with
| RefIdent.const .. => self.allRefs.toArray
| RefIdent.fvar identModule .. =>
match self.allRefs[identModule]? with
let identModuleName := identModule.toName
match self.allRefs[identModuleName]? with
| none => #[]
| some refs => #[(identModule, refs)]
| some refs => #[(identModuleName, refs)]
let mut result := #[]
for (module, refs) in refsToCheck do
let some info := refs.get? ident
@ -518,9 +519,9 @@ def definitionsMatching
| continue
let uri := System.Uri.pathToUri <| ← IO.FS.realPath path
for (ident, info) in refs.toList do
let (RefIdent.const _ name, some ⟨definitionRange, _⟩) := (ident, info.definition?)
let (RefIdent.const _ nameString, some ⟨definitionRange, _⟩) := (ident, info.definition?)
| continue
let some a := filter name
let some a := filter nameString.toName
| continue
result := result.push (a, ⟨uri, definitionRange⟩)
if let some maxAmount := maxAmount? then

View file

@ -530,7 +530,8 @@ private def callHierarchyItemOf?
| return none
match ident with
| .const definitionModule definitionName =>
| .const definitionModule definitionNameString =>
let definitionName := definitionNameString.toName
-- If we have a constant with a proper name, use it.
-- If `callHierarchyItemOf?` is used either on the name of a definition itself or e.g. an
-- `inductive` constructor, this is the right thing to do and using the parent decl is
@ -545,14 +546,15 @@ private def callHierarchyItemOf?
range := definitionLocation.range,
selectionRange := definitionLocation.range
data? := toJson {
module := definitionModule
module := definitionModule.toName
name := definitionName
: CallHierarchyItemData
}
}
| _ =>
let some ⟨parentDeclName, parentDeclRange, parentDeclSelectionRange⟩ := parentDecl?
let some ⟨parentDeclNameString, parentDeclRange, parentDeclSelectionRange⟩ := parentDecl?
| return none
let parentDeclName := parentDeclNameString.toName
let some definitionModule ← srcSearchPath.searchModuleNameOfUri definitionLocation.uri
| return none
@ -596,13 +598,15 @@ def handleCallHierarchyIncomingCalls (p : CallHierarchyIncomingCallsParams)
let srcSearchPath := (← read).srcSearchPath
let references ← (← read).references.get
let identRefs ← references.referringTo srcSearchPath (.const itemData.module itemData.name) false
let identRefs ← references.referringTo srcSearchPath (.const itemData.module.toString itemData.name.toString) false
let incomingCalls ← identRefs.filterMapM fun ⟨location, parentDecl?⟩ => do
let some ⟨parentDeclName, parentDeclRange, parentDeclSelectionRange⟩ := parentDecl?
let some ⟨parentDeclNameString, parentDeclRange, parentDeclSelectionRange⟩ := parentDecl?
| return none
let parentDeclName := parentDeclNameString.toName
let some refModule ← srcSearchPath.searchModuleNameOfUri location.uri
| return none
@ -657,7 +661,7 @@ def handleCallHierarchyOutgoingCalls (p : CallHierarchyOutgoingCallsParams)
let outgoingUsages := info.usages.filter fun usage => Id.run do
let some parentDecl := usage.parentDecl?
| return false
return itemData.name == parentDecl.name
return itemData.name == parentDecl.name.toName
let outgoingUsages := outgoingUsages.map (·.range)
if outgoingUsages.isEmpty then

View file

@ -27,6 +27,8 @@ class Input (ι : Type) (elem : outParam Type) (idx : outParam Type) [DecidableE
next : ιι
curr : ι → elem
hasNext : ι → Bool
next' (it : ι) : (hasNext it) → ι
curr' (it : ι) : (hasNext it) → elem
variable {α : Type} {ι : Type} {elem : Type} {idx : Type}
variable [DecidableEq idx] [DecidableEq elem] [Input ι elem idx]
@ -34,21 +36,22 @@ variable [DecidableEq idx] [DecidableEq elem] [Input ι elem idx]
instance : Inhabited (Parsec ι α) where
default := fun it => .error it ""
@[inline]
@[always_inline, inline]
protected def pure (a : α) : Parsec ι α := fun it =>
.success it a
@[inline]
@[always_inline, inline]
def bind {α β : Type} (f : Parsec ι α) (g : α → Parsec ι β) : Parsec ι β := fun it =>
match f it with
| .success rem a => g a rem
| .error pos msg => .error pos msg
@[always_inline]
instance : Monad (Parsec ι) where
pure := Parsec.pure
bind := Parsec.bind
@[inline]
@[always_inline, inline]
def fail (msg : String) : Parsec ι α := fun it =>
.error it msg
@ -61,16 +64,17 @@ def tryCatch (p : Parsec ι α) (csuccess : α → Parsec ι β) (cerror : Unit
-- We assume that it.s never changes as the `Parsec` monad only modifies `it.pos`.
if Input.pos it = Input.pos rem then cerror () rem else .error rem err
@[inline]
@[always_inline, inline]
def orElse (p : Parsec ι α) (q : Unit → Parsec ι α) : Parsec ι α :=
tryCatch p pure q
@[inline]
@[always_inline, inline]
def attempt (p : Parsec ι α) : Parsec ι α := fun it =>
match p it with
| .success rem res => .success rem res
| .error _ err => .error it err
@[always_inline]
instance : Alternative (Parsec ι) where
failure := fail ""
orElse := orElse
@ -85,7 +89,7 @@ def eof : Parsec ι Unit := fun it =>
.success it ()
@[inline]
def eof? : Parsec ι Bool := fun it =>
def isEof : Parsec ι Bool := fun it =>
.success it (!Input.hasNext it)
@[specialize]
@ -102,8 +106,10 @@ def unexpectedEndOfInput := "unexpected end of input"
@[inline]
def any : Parsec ι elem := fun it =>
if Input.hasNext it then
.success (Input.next it) (Input.curr it)
if h : Input.hasNext it then
let c := Input.curr' it h
let it' := Input.next' it h
.success it' c
else
.error it unexpectedEndOfInput
@ -120,19 +126,31 @@ def notFollowedBy (p : Parsec ι α) : Parsec ι Unit := fun it =>
@[inline]
def peek? : Parsec ι (Option elem) := fun it =>
if Input.hasNext it then
.success it (Input.curr it)
if h : Input.hasNext it then
.success it (Input.curr' it h)
else
.success it none
@[inline]
def peek! : Parsec ι elem := do
let some c ← peek? | fail unexpectedEndOfInput
return c
def peek! : Parsec ι elem := fun it =>
if h : Input.hasNext it then
.success it (Input.curr' it h)
else
.error it unexpectedEndOfInput
@[inline]
def peekD (default : elem) : Parsec ι elem := fun it =>
if h : Input.hasNext it then
.success it (Input.curr' it h)
else
.success it default
@[inline]
def skip : Parsec ι Unit := fun it =>
.success (Input.next it) ()
if h : Input.hasNext it then
.success (Input.next' it h) ()
else
.error it unexpectedEndOfInput
@[specialize]
partial def manyCharsCore (p : Parsec ι Char) (acc : String) : Parsec ι String :=

View file

@ -17,6 +17,8 @@ instance : Input ByteArray.Iterator UInt8 Nat where
next it := it.next
curr it := it.curr
hasNext it := it.hasNext
next' it := it.next'
curr' it := it.curr'
abbrev Parser (α : Type) : Type := Parsec ByteArray.Iterator α
@ -75,12 +77,12 @@ private partial def digitsCore (acc : Nat) : Parser Nat := fun it =>
.success it res
where
go (it : ByteArray.Iterator) (acc : Nat) : Nat × ByteArray.Iterator :=
if it.hasNext then
let candidate := it.curr
if h : it.hasNext then
let candidate := it.curr' h
if '0'.toUInt8 ≤ candidate ∧ candidate ≤ '9'.toUInt8 then
let digit := digitToNat candidate
let acc := acc * 10 + digit
go it.next acc
go (it.next' h) acc
else
(acc, it)
else
@ -107,10 +109,10 @@ def asciiLetter : Parser Char := attempt do
fail s!"ASCII letter expected"
private partial def skipWs (it : ByteArray.Iterator) : ByteArray.Iterator :=
if it.hasNext then
let b := it.curr
if h : it.hasNext then
let b := it.curr' h
if b = '\u0009'.toUInt8 b = '\u000a'.toUInt8 b = '\u000d'.toUInt8 b = '\u0020'.toUInt8 then
skipWs it.next
skipWs (it.next' h)
else
it
else

View file

@ -15,6 +15,8 @@ instance : Input String.Iterator Char String.Pos where
next it := it.next
curr it := it.curr
hasNext it := it.hasNext
next' it := it.next'
curr' it := it.curr'
abbrev Parser (α : Type) : Type := Parsec String.Iterator α
@ -59,12 +61,12 @@ private partial def digitsCore (acc : Nat) : Parser Nat := fun it =>
.success it res
where
go (it : String.Iterator) (acc : Nat) : (Nat × String.Iterator) :=
if it.hasNext then
if h : it.hasNext then
let candidate := it.curr
if '0' ≤ candidate ∧ candidate ≤ '9' then
let digit := digitToNat candidate
let acc := acc * 10 + digit
go it.next acc
go (it.next' h) acc
else
(acc, it)
else
@ -88,10 +90,10 @@ def asciiLetter : Parser Char := attempt do
if ('A' ≤ c ∧ c ≤ 'Z') ('a' ≤ c ∧ c ≤ 'z') then return c else fail s!"ASCII letter expected"
private partial def skipWs (it : String.Iterator) : String.Iterator :=
if it.hasNext then
let c := it.curr
if h : it.hasNext then
let c := it.curr' h
if c = '\u0009' c = '\u000a' c = '\u000d' c = '\u0020' then
skipWs it.next
skipWs (it.next' h)
else
it
else

View file

@ -132,7 +132,7 @@ where
if (← peek!) == 'c'.toUInt8 then
let _ ← many (satisfy (fun c => c != '\n'.toUInt8 && c != '\r'.toUInt8))
skipNewline
if ← eof? then
if ← isEof then
return actions
else
go actions
@ -140,7 +140,7 @@ where
let action ← parseAction
skipNewline
let actions := actions.push action
if ← eof? then
if ← isEof then
return actions
else
go actions

View file

@ -0,0 +1,54 @@
import Lean.Data.Lsp
def genModuleRefs (n : Nat) : IO Lean.Lsp.ModuleRefs := do
let someLoc : Lean.Lsp.RefInfo.Location := {
range := ⟨⟨333, 444⟩, ⟨444, 555⟩⟩
parentDecl? := some {
name := "A.Reasonably.Long.ParentDecl.Name.barfoo",
range := ⟨⟨1111, 2222⟩, ⟨3333, 4444⟩⟩
selectionRange := ⟨⟨5555, 6666⟩, ⟨7777, 8888⟩⟩
}
}
let mut someUsages : Array Lean.Lsp.RefInfo.Location := #[]
for _ in [0, 200] do
someUsages := someUsages.push someLoc
let someInfo : Lean.Lsp.RefInfo := {
definition? := someLoc
usages := someUsages
}
let mut refs : Lean.Lsp.ModuleRefs := .empty
for i in [0:n] do
let someIdent := Lean.Lsp.RefIdent.const s!"A.Reasonably.Long.Module.Name{i}" s!"A.Reasonably.Long.Declaration.Name.foobar{i}"
refs := refs.insert someIdent someInfo
return refs
@[noinline]
def compress (refs : Lean.Lsp.ModuleRefs) : IO String := do
return Lean.Json.compress (Lean.ToJson.toJson refs)
@[noinline]
def parse (s : String) : IO (Except String Lean.Json) := do
return Lean.Json.parse s
def main (args : List String) : IO Unit := do
let n := (args.get! 0).toNat!
let refs ← genModuleRefs n
let compressStartTime ← IO.monoMsNow
let s ← compress refs
let compressEndTime ← IO.monoMsNow
let compressTime : Float := (compressEndTime - compressStartTime).toFloat / 1000.0
IO.println s!"compress: {compressTime}"
let parseStartTime ← IO.monoMsNow
let r ← parse s
let parseEndTime ← IO.monoMsNow
let parseTime : Float := (parseEndTime - parseStartTime).toFloat / 1000.0
match r with
| .ok _ =>
IO.println s!"parse: {parseTime}"
| .error _ =>
IO.println s!"parse: {parseTime}"
IO.println "error"

View file

@ -6,8 +6,15 @@
# alternative config: use `perf stat` for extended properties
runner: perf_stat
perf_stat:
properties: ['wall-clock', 'task-clock', 'instructions', 'branches', 'branch-misses']
rusage_properties: ['maxrss']
properties:
[
"wall-clock",
"task-clock",
"instructions",
"branches",
"branch-misses",
]
rusage_properties: ["maxrss"]
run_config:
<<: *time
cmd: |
@ -230,6 +237,15 @@
run_config:
<<: *time
cmd: lean -Dlinter.all=false --run server_startup.lean
- attributes:
description: ilean roundtrip
tags: [fast]
run_config:
<<: *time
cmd: ./ilean_roundtrip.lean.out 200000
parse_output: true
build_config:
cmd: ./compile.sh ilean_roundtrip.lean
- attributes:
description: liasolver
tags: [fast, suite]

View file

@ -11,10 +11,10 @@ open Lsp
def identOf : Info → Option (RefIdent × Bool)
| .ofTermInfo ti => match ti.expr with
| .const n .. => some (.const `anonymous n, ti.isBinder)
| .fvar id .. => some (.fvar `anonymous id, ti.isBinder)
| .const n .. => some (.const (`anonymous).toString n.toString, ti.isBinder)
| .fvar id .. => some (.fvar (`anonymous).toString id.name.toString, ti.isBinder)
| _ => none
| .ofFieldInfo fi => some (.const `anonymous fi.projName, false)
| .ofFieldInfo fi => some (.const (`anonymous).toString fi.projName.toString, false)
| _ => none
def isConst (e : Expr) : Bool :=