diff --git a/src/Lean/ErrorExplanation.lean b/src/Lean/ErrorExplanation.lean index 8756c95479..b6a9b693fa 100644 --- a/src/Lean/ErrorExplanation.lean +++ b/src/Lean/ErrorExplanation.lean @@ -93,7 +93,7 @@ where upToWs (nonempty : Bool) : Parser String := fun it => let it' := it.find fun c => c.isWhitespace if nonempty && it'.pos == it.pos then - .error it' "Expected a nonempty string" + .error it' (.other "Expected a nonempty string") else .success it' (it.extract it') @@ -180,7 +180,7 @@ private abbrev ValidationM := Parsec ValidationState private def ValidationM.run (p : ValidationM α) (input : String) : Except (Nat × String) α := match p (.ofSource input) with | .success _ res => Except.ok res - | .error s err => Except.error (s.getLineNumber, err) + | .error s err => Except.error (s.getLineNumber, toString err) /-- Matches `p` as many times as possible, followed by EOF. If `p` cannot be matched prior to the end @@ -290,7 +290,7 @@ where labelingExampleErrors {α} (header : String) (x : ValidationM α) : ValidationM α := fun s => match x s with | res@(.success ..) => res - | .error s' msg => .error s' s!"Example '{header}' is malformed: {msg}" + | .error s' msg => .error s' (.other s!"Example '{header}' is malformed: {msg}") /-- If `line` is a level-`level` header and, if `title?` is non-`none`, its title is `title?`, diff --git a/src/Std/Data.lean b/src/Std/Data.lean index 8d288ad6d8..d602fdda4d 100644 --- a/src/Std/Data.lean +++ b/src/Std/Data.lean @@ -30,5 +30,6 @@ public import Std.Data.TreeMap.Raw public import Std.Data.TreeSet.Raw public import Std.Data.Iterators +public import Std.Data.ByteSlice @[expose] public section diff --git a/src/Std/Data/ByteSlice.lean b/src/Std/Data/ByteSlice.lean new file mode 100644 index 0000000000..f3eb5aaf09 --- /dev/null +++ b/src/Std/Data/ByteSlice.lean @@ -0,0 +1,338 @@ +/- +Copyright (c) 2025 Lean FRO, LLC. All rights reserved. +Released under Apache 2.0 license as described in the file LICENSE. +Authors: Sofia Rodrigues +-/ +module + +prelude +public import Init.Core +public import Init.GetElem +public import Init.Data.ByteArray +public import Init.Data.Slice.Basic +public import Init.Data.Slice.Notation +public import Init.Data.Range.Polymorphic.Nat + +/-! +This module defines the `ByteSlice` structure. It's a modified version of the `SubArray` code, +with some functions specific to `ByteSlice`. +-/ + +public section + +set_option linter.indexVariables true +set_option linter.missingDocs true + +universe u v w + +/-- +Internal representation of `ByteSlice`, which is an abbreviation for `Slice ByteSliceData`. +-/ +structure Std.Slice.Internal.ByteSliceData where + private mk :: + + /-- + The underlying byte array. + -/ + byteArray : ByteArray + + /-- + The starting index of the region of interest (inclusive). + -/ + start : Nat + + /-- + The ending index of the region of interest (exclusive). + -/ + stop : Nat + + /-- + The starting index is no later than the ending index. + + The ending index is exclusive. If the starting and ending indices are equal, then the byte slice is + empty. + -/ + start_le_stop : start ≤ stop + + /-- The stopping index is no later than the end of the byte array. + + The ending index is exclusive. If it is equal to the size of the byte array, then the last byte of + the byte array is in the byte slice. + -/ + stop_le_size_byteArray : stop ≤ byteArray.size + +open Std.Slice + +/-- +A region of some underlying byte array. + +A byte slice contains a byte array together with the start and end indices of a region of interest. +Byte slices can be used to avoid copying or allocating space, while being more convenient than +tracking the bounds by hand. The region of interest consists of every index that is both greater +than or equal to `start` and strictly less than `stop`. +-/ +abbrev ByteSlice := Std.Slice Internal.ByteSliceData + +instance : Self (Std.Slice Internal.ByteSliceData) ByteSlice where + +namespace ByteSlice + +@[always_inline, inline, expose, inherit_doc Internal.ByteSliceData.byteArray] +def byteArray (xs : @& ByteSlice) : ByteArray := + xs.internalRepresentation.byteArray + +@[always_inline, inline, expose, inherit_doc Internal.ByteSliceData.start] +def start (xs : @& ByteSlice) : Nat := + xs.internalRepresentation.start + +@[always_inline, inline, expose, inherit_doc Internal.ByteSliceData.stop] +def stop (xs : @& ByteSlice) : Nat := + xs.internalRepresentation.stop + +@[inherit_doc Internal.ByteSliceData.start_le_stop] +theorem start_le_stop (xs : ByteSlice) : xs.start ≤ xs.stop := + xs.internalRepresentation.start_le_stop + +@[inherit_doc Internal.ByteSliceData.stop_le_size_byteArray] +theorem stop_le_size_byteArray (xs : ByteSlice) : xs.stop ≤ xs.byteArray.size := + xs.internalRepresentation.stop_le_size_byteArray + +/-- +Computes the size of the byte slice. +-/ +def size (s : ByteSlice) : Nat := + s.stop - s.start + +theorem size_le_size_byteArray {s : ByteSlice} : s.size ≤ s.byteArray.size := by + let ⟨{byteArray, start, stop, start_le_stop, stop_le_size_byteArray}⟩ := s + simp only [size, ge_iff_le] + apply Nat.le_trans (Nat.sub_le stop start) + assumption + +/-- +Extracts a byte from the byte slice. + +The index is relative to the start of the byte slice, rather than the underlying byte array. +-/ +def get (s : ByteSlice) (i : Fin s.size) : UInt8 := + have : s.start + i.val < s.byteArray.size := by + apply Nat.lt_of_lt_of_le _ s.stop_le_size_byteArray + have := i.isLt + simp only [size] at this + rw [Nat.add_comm] + exact Nat.add_lt_of_lt_sub this + s.byteArray[s.start + i.val] + +instance : GetElem ByteSlice Nat UInt8 fun xs i => i < xs.size where + getElem xs i h := xs.get ⟨i, h⟩ + +/-- +Extracts a byte from the byte slice, or returns a default value `v₀` when the index is out of +bounds. + +The index is relative to the start and end of the byte slice, rather than the underlying byte array. +-/ +@[inline] def getD (s : ByteSlice) (i : Nat) (v₀ : UInt8) : UInt8 := + if h : i < s.size then s[i] else v₀ + +/-- +Extracts a byte from the byte slice, or returns a default value when the index is out of bounds. + +The index is relative to the start and end of the byte slice, rather than the underlying byte array. The +default value is 0. +-/ +abbrev get! (s : ByteSlice) (i : Nat) : UInt8 := + getD s i 0 + +/-- +The empty byte slice. + +This empty byte slice is backed by an empty byte array. +-/ +protected def empty : ByteSlice := ⟨{ + byteArray := ByteArray.mk #[] + start := 0 + stop := 0 + start_le_stop := Nat.le_refl 0 + stop_le_size_byteArray := Nat.le_refl 0 + }⟩ + +/-- +Creates a new ByteSlice of a ByteArray +-/ +protected def ofByteArray (ba : ByteArray) : ByteSlice := ⟨{ + byteArray := ba + start := 0 + stop := ba.size + start_le_stop := Nat.zero_le _ + stop_le_size_byteArray := Nat.le_refl _ + }⟩ + +instance : EmptyCollection (ByteSlice) := + ⟨ByteSlice.empty⟩ + +instance : Inhabited (ByteSlice) := + ⟨{}⟩ + +/-- +Converts a byte slice back to a byte array by copying the relevant portion. +-/ +def toByteArray (s : ByteSlice) : ByteArray := + s.byteArray.extract s.start (s.start + s.size) + +/-- +Comparison function +-/ +@[extern "lean_byteslice_beq"] +protected def beq (a b : ByteSlice) : Bool := + a.toByteArray == b.toByteArray + +instance : BEq ByteSlice := ⟨ByteSlice.beq⟩ + +/-- +Folds a monadic operation from right to left over the bytes in a byte slice. + +An accumulator of type `β` is constructed by starting with `init` and monadically combining each +byte of the byte slice with the current accumulator value in turn, moving from the end to the +start. The monad in question may permit early termination or repetition. + +Examples: +```lean example +#eval (ByteArray.mk #[1, 2, 3]).toByteSlice.foldrM (init := 0) fun x acc => + some x.toNat + acc +``` +```output +some 6 +``` +-/ +@[inline] +def foldrM {β : Type v} {m : Type v → Type w} [Monad m] (f : UInt8 → β → m β) (init : β) (as : ByteSlice) : m β := + let rec loop (i : Nat) (acc : β) : m β := + if h : i < as.size then do + let newAcc ← f as[as.size - 1 - i] acc + loop (i + 1) newAcc + else + pure acc + loop 0 init + +/-- +Runs a monadic action on each byte of a byte slice. + +The bytes are processed starting at the lowest index and moving up. +-/ +@[inline] +def forM {m : Type v → Type w} [Monad m] (f : UInt8 → m PUnit) (as : ByteSlice) : m PUnit := + let rec loop (i : Nat) : m PUnit := + if h : i < as.size then do + f as[i] + loop (i + 1) + else + pure ⟨⟩ + loop 0 + +/-- +Folds an operation from right to left over the bytes in a byte slice. + +An accumulator of type `β` is constructed by starting with `init` and combining each byte of the +byte slice with the current accumulator value in turn, moving from the end to the start. + +Examples: + * `(ByteArray.mk #[1, 2, 3]).toByteSlice.foldr (·.toNat + ·) 0 = 6` + * `(ByteArray.mk #[1, 2, 3]).toByteSlice.popFront.foldr (·.toNat + ·) 0 = 5` +-/ +@[inline] +def foldr {β : Type v} (f : UInt8 → β → β) (init : β) (as : ByteSlice) : β := + Id.run <| as.foldrM (pure <| f · ·) (init := init) + +/-- +Creates a sub-slice of the byte slice with the given bounds. + +If `start` or `stop` are not valid bounds for a sub-slice, then they are clamped to the slice's size. +Additionally, the starting index is clamped to the ending index. + +The indices are relative to the current slice, not the underlying byte array. +-/ +def slice (s : ByteSlice) (start : Nat := 0) (stop : Nat := s.size) : ByteSlice := + let actualStart := s.start + min start s.size + let actualStop := s.start + min stop s.size + if h₂ : actualStop ≤ s.byteArray.size then + if h₁ : actualStart ≤ actualStop then + ⟨{ byteArray := s.byteArray, + start := actualStart, + stop := actualStop, + start_le_stop := h₁, + stop_le_size_byteArray := h₂ }⟩ + else + ⟨{ byteArray := s.byteArray, + start := actualStop, + stop := actualStop, + start_le_stop := Nat.le_refl _, + stop_le_size_byteArray := h₂ }⟩ + else + ⟨{ byteArray := s.byteArray, + start := s.start, + stop := s.start, + start_le_stop := Nat.le_refl _, + stop_le_size_byteArray := Nat.le_trans (Nat.le_refl _) (Nat.le_trans s.start_le_stop s.stop_le_size_byteArray) }⟩ + +/-- +Checks if the byte slice contains a specific byte value. + +Returns `true` if any byte in the slice equals the given value, `false` otherwise. +-/ +def contains (s : ByteSlice) (byte : UInt8) : Bool := + let rec loop (i : Nat) : Bool := + if h : i < s.size then + if s[i] == byte then true + else loop (i + 1) + else + false + loop 0 + +end ByteSlice + +namespace ByteArray + +/-- +Returns a byte slice of a byte array, with the given bounds. + +If `start` or `stop` are not valid bounds for a byte slice, then they are clamped to byte array's size. +Additionally, the starting index is clamped to the ending index. +-/ +def toByteSlice (as : ByteArray) (start : Nat := 0) (stop : Nat := as.size) : ByteSlice := + if h₂ : stop ≤ as.size then + if h₁ : start ≤ stop then + ⟨{ byteArray := as, start := start, stop := stop, + start_le_stop := h₁, stop_le_size_byteArray := h₂ }⟩ + else + ⟨{ byteArray := as, start := stop, stop := stop, + start_le_stop := Nat.le_refl _, stop_le_size_byteArray := h₂ }⟩ + else + if h₁ : start ≤ as.size then + ⟨{ byteArray := as, + start := start, + stop := as.size, + start_le_stop := h₁, + stop_le_size_byteArray := Nat.le_refl _ }⟩ + else + ⟨{ byteArray := as, + start := as.size, + stop := as.size, + start_le_stop := Nat.le_refl _, + stop_le_size_byteArray := Nat.le_refl _ }⟩ + +end ByteArray + +open Std Slice PRange + +variable {shape : RangeShape} {α : Type u} + +instance [ClosedOpenIntersection shape Nat] : Sliceable shape ByteArray Nat ByteSlice where + mkSlice xs range := + let halfOpenRange := ClosedOpenIntersection.intersection range 0... "unexpected end of input" + | .other s => s + +/-- +The result of parsing some string. +-/ inductive ParseResult (α : Type) (ι : Type) where + /-- + Parsing succeeded, returning the new position `pos` and the parsed result `res`. + -/ | success (pos : ι) (res : α) - | error (pos : ι) (err : String) - deriving Repr + + /-- + Parsing failed, returning the position `pos` where the error occurred and the error `err`. + -/ + | error (pos : ι) (err : Error) +deriving Repr end Parsec -@[expose] def Parsec (ι : Type) (α : Type) : Type := ι → Parsec.ParseResult α ι +/-- +A `Parsec ι α` represents a parser that consumes input of type `ι` and, produces a +`ParseResult` containing a value of type `α` (the result of parsing) and the remaining input. +-/ +@[expose] +def Parsec (ι : Type) (α : Type) : Type := + ι → Parsec.ParseResult α ι namespace Parsec +/-- +Interface for an input iterator with position tracking and lookahead support. +-/ class Input (ι : Type) (elem : outParam Type) (idx : outParam Type) [DecidableEq idx] [DecidableEq elem] where pos : ι → idx next : ι → ι @@ -39,27 +78,28 @@ variable {α : Type} {ι : Type} {elem : Type} {idx : Type} variable [DecidableEq idx] [DecidableEq elem] [Input ι elem idx] instance : Inhabited (Parsec ι α) where - default := fun it => .error it "" + default := fun it => ParseResult.error it (.other "") @[always_inline, inline] protected def pure (a : α) : Parsec ι α := fun it => .success it a @[always_inline, inline] -def bind {α β : Type} (f : Parsec ι α) (g : α → Parsec ι β) : Parsec ι β := fun it => +protected def bind {α β : Type} (f : Parsec ι α) (g : α → Parsec ι β) : Parsec ι β := fun it => match f it with | .success rem a => g a rem | .error pos msg => .error pos msg -@[always_inline] -instance : Monad (Parsec ι) where - pure := Parsec.pure - bind := Parsec.bind - +/-- +Parser that always fails with the given error message. +-/ @[always_inline, inline] def fail (msg : String) : Parsec ι α := fun it => - .error it msg + .error it (.other msg) +/-- +Try `p`, then decide what to do based on success or failure without consuming input on failure. +-/ @[inline] def tryCatch (p : Parsec ι α) (csuccess : α → Parsec ι β) (cerror : Unit → Parsec ι β) : Parsec ι β := fun it => @@ -69,10 +109,21 @@ def tryCatch (p : Parsec ι α) (csuccess : α → Parsec ι β) (cerror : Unit -- We assume that it.s never changes as the `Parsec` monad only modifies `it.pos`. if Input.pos it = Input.pos rem then cerror () rem else .error rem err +@[always_inline] +instance : Monad (Parsec ι) where + pure := Parsec.pure + bind := Parsec.bind + +/-- +Try `p`, and if it fails without consuming input, run `q ()` instead. +-/ @[always_inline, inline] def orElse (p : Parsec ι α) (q : Unit → Parsec ι α) : Parsec ι α := tryCatch p pure q +/-- +Attempt to parse with `p`, but don't consume input on failure. +-/ @[always_inline, inline] def attempt (p : Parsec ι α) : Parsec ι α := fun it => match p it with @@ -84,12 +135,13 @@ instance : Alternative (Parsec ι) where failure := fail "" orElse := orElse -def expectedEndOfInput := "expected end of input" - +/-- +Succeeds only if input is at end-of-file. +-/ @[inline] def eof : Parsec ι Unit := fun it => if Input.hasNext it then - .error it expectedEndOfInput + .error it (.other "expected end of input") else .success it () @@ -107,8 +159,9 @@ def many (p : Parsec ι α) : Parsec ι <| Array α := manyCore p #[] @[inline] def many1 (p : Parsec ι α) : Parsec ι <| Array α := do manyCore p #[← p] -def unexpectedEndOfInput := "unexpected end of input" - +/-- +Gets the next input element. +-/ @[inline] def any : Parsec ι elem := fun it => if h : Input.hasNext it then @@ -116,19 +169,28 @@ def any : Parsec ι elem := fun it => let it' := Input.next' it h .success it' c else - .error it unexpectedEndOfInput + .error it .eof +/-- +Checks if the next input element matches some condition. +-/ @[inline] def satisfy (p : elem → Bool) : Parsec ι elem := attempt do let c ← any if p c then return c else fail "condition not satisfied" +/-- +Fails if `p` succeeds, otherwise succeeds without consuming input. +-/ @[inline] def notFollowedBy (p : Parsec ι α) : Parsec ι Unit := fun it => match p it with - | .success _ _ => .error it "" + | .success _ _ => .error it (.other "") | .error _ _ => .success it () +/-- +Peeks at the next element, returns `some` if exists else `none`, does not consume input. +-/ @[inline] def peek? : Parsec ι (Option elem) := fun it => if h : Input.hasNext it then @@ -136,13 +198,32 @@ def peek? : Parsec ι (Option elem) := fun it => else .success it none +/-- +Peeks at the next element, returns `some elem` if it satisfies `p`, else `none`. Does not consume input. +-/ +@[inline] +def peekWhen? (p : elem → Bool) : Parsec ι (Option elem) := do + let some data ← peek? + | return none + + if p data then + return some data + else + return none + +/-- +Peeks at the next element, errors on EOF, does not consume input. +-/ @[inline] def peek! : Parsec ι elem := fun it => if h : Input.hasNext it then .success it (Input.curr' it h) else - .error it unexpectedEndOfInput + .error it .eof +/-- +Peeks at the next element or returns a default if at EOF, does not consume input. +-/ @[inline] def peekD (default : elem) : Parsec ι elem := fun it => if h : Input.hasNext it then @@ -150,23 +231,37 @@ def peekD (default : elem) : Parsec ι elem := fun it => else .success it default +/-- +Consumes one element if available, otherwise errors on EOF. +-/ @[inline] def skip : Parsec ι Unit := fun it => if h : Input.hasNext it then .success (Input.next' it h) () else - .error it unexpectedEndOfInput + .error it .eof +/-- +Parses zero or more chars with `p`, accumulates into a string. +-/ @[specialize] partial def manyCharsCore (p : Parsec ι Char) (acc : String) : Parsec ι String := tryCatch p (manyCharsCore p <| acc.push ·) (fun _ => pure acc) +/-- +Parses zero or more chars with `p` into a string. +-/ @[inline] -def manyChars (p : Parsec ι Char) : Parsec ι String := manyCharsCore p "" +def manyChars (p : Parsec ι Char) : Parsec ι String := do + manyCharsCore p "" +/-- +Parses one or more chars with `p` into a string, errors if none. +-/ @[inline] -def many1Chars (p : Parsec ι Char) : Parsec ι String := do manyCharsCore p (← p).toString +def many1Chars (p : Parsec ι Char) : Parsec ι String := do + manyCharsCore p (← p).toString end Parsec - -end Std.Internal +end Internal +end Std diff --git a/src/Std/Internal/Parsec/ByteArray.lean b/src/Std/Internal/Parsec/ByteArray.lean index c9dad94368..5c46a79661 100644 --- a/src/Std/Internal/Parsec/ByteArray.lean +++ b/src/Std/Internal/Parsec/ByteArray.lean @@ -9,10 +9,12 @@ prelude public import Std.Internal.Parsec.Basic public import Init.Data.ByteArray.Basic public import Init.Data.String.Extra +public import Std.Data.ByteSlice public section -namespace Std.Internal +namespace Std +namespace Internal namespace Parsec namespace ByteArray @@ -24,29 +26,52 @@ instance : Input ByteArray.Iterator UInt8 Nat where next' it := it.next' curr' it := it.curr' + +/-- +`Parser α` is a parser that consumes a `ByteArray` input using a `ByteArray.Iterator` and returns a result of type `α`. +-/ abbrev Parser (α : Type) : Type := Parsec ByteArray.Iterator α +/-- +Run a `Parser` on a `ByteArray`, returns either the result or an error string with offset. +-/ protected def Parser.run (p : Parser α) (arr : ByteArray) : Except String α := match p arr.iter with | .success _ res => Except.ok res - | .error it err => Except.error s!"offset {repr it.pos}: {err}" + | .error it err => Except.error s!"offset {repr it.pos}: {err}" +/-- +Parse a single byte equal to `b`, fails if different. +-/ @[inline] def pbyte (b : UInt8) : Parser UInt8 := attempt do if (← any) = b then pure b else fail s!"expected: '{b}'" +/-- +Skip a single byte equal to `b`, fails if different. +-/ @[inline] -def skipByte (b : UInt8) : Parser Unit := pbyte b *> pure () +def skipByte (b : UInt8) : Parser Unit := + pbyte b *> pure () +/-- +Skip a sequence of bytes equal to the given `ByteArray`. +-/ def skipBytes (arr : ByteArray) : Parser Unit := do for b in arr do skipByte b +/-- +Parse a string by matching its UTF-8 bytes, returns the string on success. +-/ @[inline] def pstring (s : String) : Parser String := do skipBytes s.toUTF8 return s +/-- +Skip a string by matching its UTF-8 bytes. +-/ @[inline] def skipString (s : String) : Parser Unit := pstring s *> pure () @@ -63,14 +88,24 @@ Skip a `Char` that can be represented in 1 byte. If `c` uses more than 1 byte it @[inline] def skipByteChar (c : Char) : Parser Unit := skipByte c.toUInt8 +/-- +Parse an ASCII digit `0-9` as a `Char`. +-/ @[inline] def digit : Parser Char := attempt do let b ← any if '0'.toUInt8 ≤ b ∧ b ≤ '9'.toUInt8 then return Char.ofUInt8 b else fail s!"digit expected" +/-- +Convert a byte representing `'0'..'9'` to a `Nat`. +-/ @[inline] -private def digitToNat (b : UInt8) : Nat := (b - '0'.toUInt8).toNat +private def digitToNat (b : UInt8) : Nat := + (b - '0'.toUInt8).toNat +/-- +Parse zero or more ASCII digits into a `Nat`, continuing until non-digit or EOF. +-/ @[inline] private partial def digitsCore (acc : Nat) : Parser Nat := fun it => /- @@ -92,11 +127,17 @@ where else (acc, it) +/-- +Parse one or more ASCII digits into a `Nat`. +-/ @[inline] def digits : Parser Nat := do let d ← digit digitsCore (digitToNat d.toUInt8) +/-- +Parse a hex digit `0-9`, `a-f`, or `A-F` as a `Char`. +-/ @[inline] def hexDigit : Parser Char := attempt do let b ← any @@ -104,6 +145,20 @@ def hexDigit : Parser Char := attempt do ∨ ('a'.toUInt8 ≤ b ∧ b ≤ 'f'.toUInt8) ∨ ('A'.toUInt8 ≤ b ∧ b ≤ 'F'.toUInt8) then return Char.ofUInt8 b else fail s!"hex digit expected" +/-- +Parse an octal digit `0-7` as a `Char`. +-/ +@[inline] +def octDigit : Parser Char := attempt do + let b ← any + if '0'.toUInt8 ≤ b ∧ b ≤ '7'.toUInt8 then + return Char.ofUInt8 b + else + fail s!"octal digit expected" + +/-- +Parse an ASCII letter `a-z` or `A-Z` as a `Char`. +-/ @[inline] def asciiLetter : Parser Char := attempt do let b ← any @@ -122,17 +177,125 @@ private partial def skipWs (it : ByteArray.Iterator) : ByteArray.Iterator := else it +/-- +Skip whitespace: tabs, newlines, carriage returns, and spaces. +-/ @[inline] def ws : Parser Unit := fun it => .success (skipWs it) () -def take (n : Nat) : Parser ByteArray := fun it => - let subarr := it.array.extract it.idx (it.idx + n) - if subarr.size != n then - .error it s!"expected: {n} bytes" +/-- +Parse `n` bytes from the input into a `ByteSlice`, errors if not enough bytes. +-/ +def take (n : Nat) : Parser ByteSlice := fun it => + if it.remainingBytes < n then + .error it .eof else - .success (it.forward n) subarr + .success (it.forward n) (it.array[it.idx...(it.idx+n)]) + +/-- +Parses while a predicate is satisfied. +-/ +@[inline] +partial def takeWhile (pred : UInt8 → Bool) : Parser ByteSlice := + fun it => + let rec findEnd (count : Nat) (iter : ByteArray.Iterator) : Nat × ByteArray.Iterator := + if ¬iter.hasNext then (count, iter) + else if pred iter.curr then findEnd (count + 1) iter.next + else (count, iter) + + let (length, newIt) := findEnd 0 it + .success newIt (it.array[it.idx...(it.idx + length)]) + +/-- +Parses until a predicate is satisfied (exclusive). +-/ +@[inline] +def takeUntil (pred : UInt8 → Bool) : Parser ByteSlice := + takeWhile (fun b => ¬pred b) + +/-- +Skips while a predicate is satisfied. +-/ +@[inline] +partial def skipWhile (pred : UInt8 → Bool) : Parser Unit := + fun it => + let rec findEnd (count : Nat) (iter : ByteArray.Iterator) : ByteArray.Iterator := + if ¬iter.hasNext then iter + else if pred iter.curr then findEnd (count + 1) iter.next + else iter + + .success (findEnd 0 it) () + +/-- +Skips until a predicate is satisfied. +-/ +@[inline] +def skipUntil (pred : UInt8 → Bool) : Parser Unit := + skipWhile (fun b => ¬pred b) + +/-- +Parses while a predicate is satisfied, up to a given limit. +-/ +@[inline] +partial def takeWhileUpTo (pred : UInt8 → Bool) (limit : Nat) : Parser ByteSlice := + fun it => + let rec findEnd (count : Nat) (iter : ByteArray.Iterator) : Nat × ByteArray.Iterator := + if count ≥ limit then (count, iter) + else if ¬iter.hasNext then (count, iter) + else if pred iter.curr then findEnd (count + 1) iter.next + else (count, iter) + + let (length, newIt) := findEnd 0 it + .success newIt (it.array[it.idx...(it.idx + length)]) + +/-- +Parses while a predicate is satisfied, up to a given limit, requiring at least one byte. +-/ +@[inline] +def takeWhileUpTo1 (pred : UInt8 → Bool) (limit : Nat) : Parser ByteSlice := + fun it => + let rec findEnd (count : Nat) (iter : ByteArray.Iterator) : Nat × ByteArray.Iterator := + if count ≥ limit then (count, iter) + else if ¬iter.hasNext then (count, iter) + else if pred iter.curr then findEnd (count + 1) iter.next + else (count, iter) + + let (length, newIt) := findEnd 0 it + if length = 0 then + .error it (if newIt.atEnd then .eof else .other "expected at least one char") + else + .success newIt (it.array[it.idx...(it.idx + length)]) + +/-- +Parses until a predicate is satisfied (exclusive), up to a given limit. +-/ +@[inline] +def takeUntilUpTo (pred : UInt8 → Bool) (limit : Nat) : Parser ByteSlice := + takeWhileUpTo (fun b => ¬pred b) limit + +/-- +Skips while a predicate is satisfied, up to a given limit. +-/ +@[inline] +partial def skipWhileUpTo (pred : UInt8 → Bool) (limit : Nat) : Parser Unit := + fun it => + let rec findEnd (count : Nat) (iter : ByteArray.Iterator) : ByteArray.Iterator := + if count ≥ limit then iter + else if ¬iter.hasNext then iter + else if pred iter.curr then findEnd (count + 1) iter.next + else iter + + .success (findEnd 0 it) () + +/-- +Skips until a predicate is satisfied, up to a given limit. +-/ +@[inline] +def skipUntilUpTo (pred : UInt8 → Bool) (limit : Nat) : Parser Unit := + skipWhileUpTo (fun b => ¬pred b) limit end ByteArray end Parsec -end Std.Internal +end Internal +end Std diff --git a/src/Std/Internal/Parsec/String.lean b/src/Std/Internal/Parsec/String.lean index b74c4657c7..ba03f3745e 100644 --- a/src/Std/Internal/Parsec/String.lean +++ b/src/Std/Internal/Parsec/String.lean @@ -23,36 +23,59 @@ instance : Input String.Iterator Char String.Pos where next' it := it.next' curr' it := it.curr' +/-- +`Parser α` is a parser that consumes a `String` input using a `String.Iterator` and returns a result of type `α`. +-/ abbrev Parser (α : Type) : Type := Parsec String.Iterator α +/-- +Run a `Parser` on a `String`, returns either the result or an error string with offset. +-/ protected def Parser.run (p : Parser α) (s : String) : Except String α := match p s.mkIterator with | .success _ res => Except.ok res - | .error it err => Except.error s!"offset {repr it.i.byteIdx}: {err}" + | .error it err => Except.error s!"offset {repr it.pos.byteIdx}: {err}" -/-- Parses the given string. -/ +/-- +Parses the given string. +-/ def pstring (s : String) : Parser String := fun it => let substr := it.extract (it.forward s.length) if substr = s then .success (it.forward s.length) substr else - .error it s!"expected: {s}" + .error it (.other s!"expected: {s}") +/-- +Skips the given string. +-/ @[inline] def skipString (s : String) : Parser Unit := pstring s *> pure () +/-- +Parses the given char. +-/ @[inline] def pchar (c : Char) : Parser Char := attempt do if (← any) = c then pure c else fail s!"expected: '{c}'" +/-- +Skips the given char. +-/ @[inline] def skipChar (c : Char) : Parser Unit := pchar c *> pure () +/-- +Parse an ASCII digit `0-9` as a `Char`. +-/ @[inline] def digit : Parser Char := attempt do let c ← any if '0' ≤ c ∧ c ≤ '9' then return c else fail s!"digit expected" +/-- +Convert a byte representing `'0'..'9'` to a `Nat`. +-/ @[inline] private def digitToNat (b : Char) : Nat := b.toNat - '0'.toNat @@ -77,11 +100,17 @@ where else (acc, it) +/-- +Parse one or more ASCII digits into a `Nat`. +-/ @[inline] def digits : Parser Nat := do let d ← digit digitsCore (digitToNat d) +/-- +Parse a hex digit `0-9`, `a-f`, or `A-F` as a `Char`. +-/ @[inline] def hexDigit : Parser Char := attempt do let c ← any @@ -89,6 +118,9 @@ def hexDigit : Parser Char := attempt do ∨ ('a' ≤ c ∧ c ≤ 'f') ∨ ('A' ≤ c ∧ c ≤ 'F') then return c else fail s!"hex digit expected" +/-- +Parse an ASCII letter `a-z` or `A-Z` as a `Char`. +-/ @[inline] def asciiLetter : Parser Char := attempt do let c ← any @@ -104,14 +136,20 @@ private partial def skipWs (it : String.Iterator) : String.Iterator := else it +/-- +Skip whitespace: tabs, newlines, carriage returns, and spaces. +-/ @[inline] def ws : Parser Unit := fun it => .success (skipWs it) () +/-- +Takes a fixed amount of chars from the iterator. +-/ def take (n : Nat) : Parser String := fun it => let substr := it.extract (it.forward n) if substr.length != n then - .error it s!"expected: {n} codepoints" + .error it .eof else .success (it.forward n) substr diff --git a/src/Std/Time/Zoned/Database/TzIf.lean b/src/Std/Time/Zoned/Database/TzIf.lean index bbe56bb98e..9b6dbe93e4 100644 --- a/src/Std/Time/Zoned/Database/TzIf.lean +++ b/src/Std/Time/Zoned/Database/TzIf.lean @@ -205,10 +205,10 @@ private def manyN (n : Nat) (p : Parser α) : Parser (Array α) := do result := result.push x return result -private def pu64 : Parser UInt64 := ByteArray.toUInt64LE! <$> take 8 -private def pi64 : Parser Int64 := toInt64 <$> take 8 -private def pu32 : Parser UInt32 := toUInt32 <$> take 4 -private def pi32 : Parser Int32 := toInt32 <$> take 4 +private def pu64 : Parser UInt64 := ByteArray.toUInt64LE! <$> ByteSlice.toByteArray <$> take 8 +private def pi64 : Parser Int64 := toInt64 <$> ByteSlice.toByteArray <$> take 8 +private def pu32 : Parser UInt32 := toUInt32 <$> ByteSlice.toByteArray <$> take 4 +private def pi32 : Parser Int32 := toInt32 <$> ByteSlice.toByteArray <$> take 4 private def pu8 : Parser UInt8 := any private def pbool : Parser Bool := (· != 0) <$> pu8 diff --git a/src/runtime/CMakeLists.txt b/src/runtime/CMakeLists.txt index 0185369b01..45a7f511c6 100644 --- a/src/runtime/CMakeLists.txt +++ b/src/runtime/CMakeLists.txt @@ -1,6 +1,6 @@ set(RUNTIME_OBJS debug.cpp thread.cpp mpz.cpp utf8.cpp object.cpp apply.cpp exception.cpp interrupt.cpp memory.cpp -stackinfo.cpp compact.cpp init_module.cpp io.cpp hash.cpp +stackinfo.cpp compact.cpp init_module.cpp io.cpp hash.cpp byteslice.cpp platform.cpp alloc.cpp allocprof.cpp sharecommon.cpp stack_overflow.cpp process.cpp object_ref.cpp mpn.cpp mutex.cpp libuv.cpp uv/net_addr.cpp uv/event_loop.cpp uv/timer.cpp uv/tcp.cpp uv/udp.cpp uv/dns.cpp uv/system.cpp) diff --git a/src/runtime/byteslice.cpp b/src/runtime/byteslice.cpp new file mode 100644 index 0000000000..824aca093e --- /dev/null +++ b/src/runtime/byteslice.cpp @@ -0,0 +1,38 @@ +/* +Copyright (c) 2025 Lean FRO. All rights reserved. +Released under Apache 2.0 license as described in the file LICENSE. + +Author: Sofia Rodrigues +*/ +#include +#include "runtime/byteslice.h" +#include "runtime/io.h" +#include "runtime/object.h" +#include "runtime/thread.h" + +namespace lean { + +extern "C" LEAN_EXPORT uint8_t lean_byteslice_beq(b_obj_arg a, b_obj_arg b) { + if (a == b) { return true; } + + lean_object* bytearray_a = lean_ctor_get(a, 0); + size_t start_a = lean_unbox(lean_ctor_get(a, 1)); + size_t end_a = lean_unbox(lean_ctor_get(a, 2)); + + lean_object* bytearray_b = lean_ctor_get(b, 0); + size_t start_b = lean_unbox(lean_ctor_get(b, 1)); + size_t end_b = lean_unbox(lean_ctor_get(b, 2)); + + size_t size_a = end_a - start_a; + size_t size_b = end_b - start_b; + + if (size_a != size_b) { return false; } + + if (size_a == 0) { return true; } + + const uint8_t* ptr_a = lean_sarray_cptr(bytearray_a) + start_a; + const uint8_t* ptr_b = lean_sarray_cptr(bytearray_b) + start_b; + + return memcmp(ptr_a, ptr_b, size_a) == 0; +} +} diff --git a/src/runtime/byteslice.h b/src/runtime/byteslice.h new file mode 100644 index 0000000000..63796cc747 --- /dev/null +++ b/src/runtime/byteslice.h @@ -0,0 +1,8 @@ +/* +Copyright (c) 2025 Lean FRO. All rights reserved. +Released under Apache 2.0 license as described in the file LICENSE. + +Author: Sofia Rodrigues +*/ +#pragma once +#include diff --git a/tests/lean/run/byteslice.lean b/tests/lean/run/byteslice.lean new file mode 100644 index 0000000000..11bb93122c --- /dev/null +++ b/tests/lean/run/byteslice.lean @@ -0,0 +1,343 @@ +import Std.Data.ByteSlice + +-- Test data +def ba := ByteArray.mk #[1,2,3,4,5,6,7,8,9,10] +def bb := ByteArray.mk #[1,2,3,4,5,6,7,8,9,10,1,2,3,4,5,6,7,8,9,10] +def emptyArray := ByteArray.mk #[] +def singleByte := ByteArray.mk #[42] + +-- Basic slicing tests (existing) +/-- +info: [1, 2, 3, 4, 5, 6, 7, 8, 9, 10] +-/ +#guard_msgs in +#eval bb[10...20] |>.toByteArray + +/-- +info: [3, 4] +-/ +#guard_msgs in +#eval bb[10...20][2...4] |>.toByteArray + +/-- +info: 4 +-/ +#guard_msgs in +#eval bb[10...20][2...4] |>.get! 1 + +/-- +info: true +-/ +#guard_msgs in +#eval bb[10...20][2...4] |>.contains 4 + +-- Size function tests +/-- +info: 10 +-/ +#guard_msgs in +#eval ba.toByteSlice.size + +/-- +info: 0 +-/ +#guard_msgs in +#eval emptyArray.toByteSlice.size + +/-- +info: 1 +-/ +#guard_msgs in +#eval singleByte.toByteSlice.size + +/-- +info: 5 +-/ +#guard_msgs in +#eval ba[2...7].size + +-- Element access tests +/-- +info: 1 +-/ +#guard_msgs in +#eval ba[0...5][0]! + +/-- +info: 5 +-/ +#guard_msgs in +#eval ba[0...5][4]! + +-- getD (get with default) tests +/-- +info: 3 +-/ +#guard_msgs in +#eval ba.toByteSlice.getD 2 99 + +/-- +info: 99 +-/ +#guard_msgs in +#eval ba.toByteSlice.getD 15 99 + +/-- +info: 255 +-/ +#guard_msgs in +#eval emptyArray.toByteSlice.getD 0 255 + +-- get! tests (with bounds checking) +/-- +info: 0 +-/ +#guard_msgs in +#eval ba.toByteSlice.get! 15 + +/-- +info: 0 +-/ +#guard_msgs in +#eval emptyArray.toByteSlice.get! 0 + +/-- +info: 42 +-/ +#guard_msgs in +#eval singleByte.toByteSlice.get! 0 + +-- Contains function tests +/-- +info: true +-/ +#guard_msgs in +#eval ba.toByteSlice.contains 5 + +/-- +info: false +-/ +#guard_msgs in +#eval ba.toByteSlice.contains 15 + +/-- +info: false +-/ +#guard_msgs in +#eval emptyArray.toByteSlice.contains 1 + +/-- +info: true +-/ +#guard_msgs in +#eval bb[5...15].contains 10 + +-- Equality tests +/-- +info: true +-/ +#guard_msgs in +#eval ba[0...5] == ba[0...5] + +/-- +info: true +-/ +#guard_msgs in +#eval ba[0...3].toByteArray == bb[0...3].toByteArray + +/-- +info: false +-/ +#guard_msgs in +#eval ba[0...3] == ba[1...4] + +/-- +info: true +-/ +#guard_msgs in +#eval ByteSlice.empty == emptyArray.toByteSlice + +-- Edge case: empty slices +/-- +info: 0 +-/ +#guard_msgs in +#eval ByteSlice.empty.size + +/-- +info: [] +-/ +#guard_msgs in +#eval ByteSlice.empty.toByteArray + +/-- +info: false +-/ +#guard_msgs in +#eval ByteSlice.empty.contains 0 + +-- Edge case: slice bounds clamping +/-- +info: [1, 2, 3, 4, 5, 6, 7, 8, 9, 10] +-/ +#guard_msgs in +#eval ba[0...100].toByteArray + +/-- +info: [] +-/ +#guard_msgs in +#eval ba[100...200].toByteArray + +/-- +info: [6, 7, 8, 9, 10] +-/ +#guard_msgs in +#eval ba[5...100].toByteArray + +-- Foldr function tests +/-- +info: 55 +-/ +#guard_msgs in +#eval ba.toByteSlice.foldr (fun b acc => b.toNat + acc) 0 + +/-- +info: 0 +-/ +#guard_msgs in +#eval ByteSlice.empty.foldr (fun b acc => b.toNat + acc) 0 + +/-- +info: 42 +-/ +#guard_msgs in +#eval singleByte.toByteSlice.foldr (fun b acc => b.toNat + acc) 0 + +-- Nested slicing with element access + +/-- +info: 7 +-/ +#guard_msgs in +#eval bb[0...10][5...8][1]! + +/-- +info: true +-/ +#guard_msgs in +#eval bb[0...10][5...8].contains 6 + +/-- +info: false +-/ +#guard_msgs in +#eval bb[0...10][5...8].contains 5 + +-- Size consistency tests +/-- +info: 3 +-/ +#guard_msgs in +#eval bb[0...10][5...8].size + +/-- +info: 5 +-/ +#guard_msgs in +#eval bb[5...15][0...5].size + +-- Boundary testing with single elements +/-- +info: [10] +-/ +#guard_msgs in +#eval ba[9...10].toByteArray + +/-- +info: 1 +-/ +#guard_msgs in +#eval ba[9...10].size + +/-- +info: 10 +-/ +#guard_msgs in +#eval ba[9...10][0]! + +-- Out of bounds slice handling +/-- +info: [] +-/ +#guard_msgs in +#eval ba[15...20].toByteArray + +/-- +info: 0 +-/ +#guard_msgs in +#eval ba[15...20].size + +/-- +info: [] +-/ +#guard_msgs in +#eval ba.toByteSlice.slice 8 3 |>.toByteArray + +/-- +info: 0 +-/ +#guard_msgs in +#eval ba.toByteSlice.slice 8 3 |>.size + +/-- +info: [1, 2, 3, 4, 5, 6, 7, 8, 9, 10] +-/ +#guard_msgs in +#eval ba.toByteSlice.slice 0 10 |>.toByteArray + +/-- +info: true +-/ +#guard_msgs in +#eval bb[0...10] == bb[10...20] + +/-- +info: [1, 2, 3] +-/ +#guard_msgs in +#eval bb[10...13].toByteArray + +-- Testing with different default values in getD +/-- +info: 128 +-/ +#guard_msgs in +#eval ba[0...3].getD 10 128 + +/-- +info: 2 +-/ +#guard_msgs in +#eval ba[0...3].getD 1 128 + +def maxByteArray := ByteArray.mk #[255, 0, 128, 1] + +/-- +info: true +-/ +#guard_msgs in +#eval maxByteArray.toByteSlice.contains 255 + +/-- +info: 384 +-/ +#guard_msgs in +#eval maxByteArray.toByteSlice.foldr (fun b acc => b.toNat + acc) 0 + +/-- +info: [255, 0] +-/ +#guard_msgs in +#eval maxByteArray[0...2].toByteArray