feat: add useful functions in Parsec, add error variant and Std.Data.ByteSlice (#9599)

This PR adds the type `Std.Internal.Parsec.Error`, which contains the constructors `.eof` (useful for checking if parsing failed due to not having enough input and then retrying when more input arrives that is useful in the HTTP server) and `.other`, which describes other errors. It also adds documentation to many functions, along with some new functions to the `ByteArray` Parsec, such as `peekWhen?`, `octDigit`, `takeWhile`, `takeUntil`, `skipWhile`, and `skipUntil`.
2025-09-11 11:53:41 -03:00 · 2025-09-11 11:53:41 -03:00 · a966ce64ca
commit a966ce64ca
parent 5c88a2bf56
11 changed files with 1072 additions and 48 deletions
--- a/src/Lean/ErrorExplanation.lean
+++ b/src/Lean/ErrorExplanation.lean
@ -93,7 +93,7 @@ where
  upToWs (nonempty : Bool) : Parser String := fun it =>
    let it' := it.find fun c => c.isWhitespace
    if nonempty && it'.pos == it.pos then
-      .error it' "Expected a nonempty string"
+      .error it' (.other "Expected a nonempty string")
    else
      .success it' (it.extract it')

@ -180,7 +180,7 @@ private abbrev ValidationM := Parsec ValidationState
 private def ValidationM.run (p : ValidationM α) (input : String) : Except (Nat × String) α :=
  match p (.ofSource input) with
  | .success _ res => Except.ok res
-  | .error s err  => Except.error (s.getLineNumber, err)
+  | .error s err  => Except.error (s.getLineNumber, toString err)

 /--
 Matches `p` as many times as possible, followed by EOF. If `p` cannot be matched prior to the end
@ -290,7 +290,7 @@ where
  labelingExampleErrors {α} (header : String) (x : ValidationM α) : ValidationM α := fun s =>
    match x s with
    | res@(.success ..) => res
-    | .error s' msg => .error s' s!"Example '{header}' is malformed: {msg}"
+    | .error s' msg => .error s' (.other s!"Example '{header}' is malformed: {msg}")

  /--
  If `line` is a level-`level` header and, if `title?` is non-`none`, its title is `title?`,
--- a/src/Std/Data.lean
+++ b/src/Std/Data.lean
@ -30,5 +30,6 @@ public import Std.Data.TreeMap.Raw
 public import Std.Data.TreeSet.Raw

 public import Std.Data.Iterators
+public import Std.Data.ByteSlice

@[expose] public section
--- a/src/Std/Data/ByteSlice.lean
+++ b/src/Std/Data/ByteSlice.lean
@ -0,0 +1,338 @@
+/-
+Copyright (c) 2025 Lean FRO, LLC. All rights reserved.
+Released under Apache 2.0 license as described in the file LICENSE.
+Authors: Sofia Rodrigues
+-/
+module
+
+prelude
+public import Init.Core
+public import Init.GetElem
+public import Init.Data.ByteArray
+public import Init.Data.Slice.Basic
+public import Init.Data.Slice.Notation
+public import Init.Data.Range.Polymorphic.Nat
+
+/-!
+This module defines the `ByteSlice` structure. It's a modified version of the `SubArray` code,
+with some functions specific to `ByteSlice`.
+-/
+
+public section
+
+set_option linter.indexVariables true
+set_option linter.missingDocs true
+
+universe u v w
+
+/--
+Internal representation of `ByteSlice`, which is an abbreviation for `Slice ByteSliceData`.
+-/
+structure Std.Slice.Internal.ByteSliceData where
+  private mk ::
+
+  /--
+  The underlying byte array.
+  -/
+  byteArray : ByteArray
+
+  /--
+  The starting index of the region of interest (inclusive).
+  -/
+  start : Nat
+
+  /--
+  The ending index of the region of interest (exclusive).
+  -/
+  stop : Nat
+
+  /--
+  The starting index is no later than the ending index.
+
+  The ending index is exclusive. If the starting and ending indices are equal, then the byte slice is
+  empty.
+  -/
+  start_le_stop : start ≤ stop
+
+  /-- The stopping index is no later than the end of the byte array.
+
+  The ending index is exclusive. If it is equal to the size of the byte array, then the last byte of
+  the byte array is in the byte slice.
+  -/
+  stop_le_size_byteArray : stop ≤ byteArray.size
+
+open Std.Slice
+
+/--
+A region of some underlying byte array.
+
+A byte slice contains a byte array together with the start and end indices of a region of interest.
+Byte slices can be used to avoid copying or allocating space, while being more convenient than
+tracking the bounds by hand. The region of interest consists of every index that is both greater
+than or equal to `start` and strictly less than `stop`.
+-/
+abbrev ByteSlice := Std.Slice Internal.ByteSliceData
+
+instance : Self (Std.Slice Internal.ByteSliceData) ByteSlice where
+
+namespace ByteSlice
+
+@[always_inline, inline, expose, inherit_doc Internal.ByteSliceData.byteArray]
+def byteArray (xs : @& ByteSlice) : ByteArray :=
+  xs.internalRepresentation.byteArray
+
+@[always_inline, inline, expose, inherit_doc Internal.ByteSliceData.start]
+def start (xs : @& ByteSlice) : Nat :=
+  xs.internalRepresentation.start
+
+@[always_inline, inline, expose, inherit_doc Internal.ByteSliceData.stop]
+def stop (xs : @& ByteSlice) : Nat :=
+  xs.internalRepresentation.stop
+
+@[inherit_doc Internal.ByteSliceData.start_le_stop]
+theorem start_le_stop (xs : ByteSlice) : xs.start ≤ xs.stop :=
+  xs.internalRepresentation.start_le_stop
+
+@[inherit_doc Internal.ByteSliceData.stop_le_size_byteArray]
+theorem stop_le_size_byteArray (xs : ByteSlice) : xs.stop ≤ xs.byteArray.size :=
+  xs.internalRepresentation.stop_le_size_byteArray
+
+/--
+Computes the size of the byte slice.
+-/
+def size (s : ByteSlice) : Nat :=
+  s.stop - s.start
+
+theorem size_le_size_byteArray {s : ByteSlice} : s.size ≤ s.byteArray.size := by
+  let ⟨{byteArray, start, stop, start_le_stop, stop_le_size_byteArray}⟩ := s
+  simp only [size, ge_iff_le]
+  apply Nat.le_trans (Nat.sub_le stop start)
+  assumption
+
+/--
+Extracts a byte from the byte slice.
+
+The index is relative to the start of the byte slice, rather than the underlying byte array.
+-/
+def get (s : ByteSlice) (i : Fin s.size) : UInt8 :=
+  have : s.start + i.val < s.byteArray.size := by
+   apply Nat.lt_of_lt_of_le _ s.stop_le_size_byteArray
+   have := i.isLt
+   simp only [size] at this
+   rw [Nat.add_comm]
+   exact Nat.add_lt_of_lt_sub this
+  s.byteArray[s.start + i.val]
+
+instance : GetElem ByteSlice Nat UInt8 fun xs i => i < xs.size where
+  getElem xs i h := xs.get ⟨i, h⟩
+
+/--
+Extracts a byte from the byte slice, or returns a default value `v₀` when the index is out of
+bounds.
+
+The index is relative to the start and end of the byte slice, rather than the underlying byte array.
+-/
+@[inline] def getD (s : ByteSlice) (i : Nat) (v₀ : UInt8) : UInt8 :=
+  if h : i < s.size then s[i] else v₀
+
+/--
+Extracts a byte from the byte slice, or returns a default value when the index is out of bounds.
+
+The index is relative to the start and end of the byte slice, rather than the underlying byte array. The
+default value is 0.
+-/
+abbrev get! (s : ByteSlice) (i : Nat) : UInt8 :=
+  getD s i 0
+
+/--
+The empty byte slice.
+
+This empty byte slice is backed by an empty byte array.
+-/
+protected def empty : ByteSlice := ⟨{
+    byteArray := ByteArray.mk #[]
+    start := 0
+    stop := 0
+    start_le_stop := Nat.le_refl 0
+    stop_le_size_byteArray := Nat.le_refl 0
+  }⟩
+
+/--
+Creates a new ByteSlice of a ByteArray
+-/
+protected def ofByteArray (ba : ByteArray) : ByteSlice := ⟨{
+    byteArray := ba
+    start := 0
+    stop := ba.size
+    start_le_stop := Nat.zero_le _
+    stop_le_size_byteArray := Nat.le_refl _
+  }⟩
+
+instance : EmptyCollection (ByteSlice) :=
+  ⟨ByteSlice.empty⟩
+
+instance : Inhabited (ByteSlice) :=
+  ⟨{}⟩
+
+/--
+Converts a byte slice back to a byte array by copying the relevant portion.
+-/
+def toByteArray (s : ByteSlice) : ByteArray :=
+  s.byteArray.extract s.start (s.start + s.size)
+
+/--
+Comparison function
+-/
+@[extern "lean_byteslice_beq"]
+protected def beq (a b : ByteSlice) : Bool :=
+  a.toByteArray == b.toByteArray
+
+instance : BEq ByteSlice := ⟨ByteSlice.beq⟩
+
+/--
+Folds a monadic operation from right to left over the bytes in a byte slice.
+
+An accumulator of type `β` is constructed by starting with `init` and monadically combining each
+byte of the byte slice with the current accumulator value in turn, moving from the end to the
+start. The monad in question may permit early termination or repetition.
+
+Examples:
+```lean example
+#eval (ByteArray.mk #[1, 2, 3]).toByteSlice.foldrM (init := 0) fun x acc =>
+  some x.toNat + acc
+```
+```output
+some 6
+```
+-/
+@[inline]
+def foldrM {β : Type v} {m : Type v → Type w} [Monad m] (f : UInt8 → β → m β) (init : β) (as : ByteSlice) : m β :=
+  let rec loop (i : Nat) (acc : β) : m β :=
+    if h : i < as.size then do
+      let newAcc ← f as[as.size - 1 - i] acc
+      loop (i + 1) newAcc
+    else
+      pure acc
+  loop 0 init
+
+/--
+Runs a monadic action on each byte of a byte slice.
+
+The bytes are processed starting at the lowest index and moving up.
+-/
+@[inline]
+def forM {m : Type v → Type w} [Monad m] (f : UInt8 → m PUnit) (as : ByteSlice) : m PUnit :=
+  let rec loop (i : Nat) : m PUnit :=
+    if h : i < as.size then do
+      f as[i]
+      loop (i + 1)
+    else
+      pure ⟨⟩
+  loop 0
+
+/--
+Folds an operation from right to left over the bytes in a byte slice.
+
+An accumulator of type `β` is constructed by starting with `init` and combining each byte of the
+byte slice with the current accumulator value in turn, moving from the end to the start.
+
+Examples:
+ * `(ByteArray.mk #[1, 2, 3]).toByteSlice.foldr (·.toNat + ·) 0 = 6`
+ * `(ByteArray.mk #[1, 2, 3]).toByteSlice.popFront.foldr (·.toNat + ·) 0 = 5`
+-/
+@[inline]
+def foldr {β : Type v} (f : UInt8 → β → β) (init : β) (as : ByteSlice) : β :=
+  Id.run <| as.foldrM (pure <| f · ·) (init := init)
+
+/--
+Creates a sub-slice of the byte slice with the given bounds.
+
+If `start` or `stop` are not valid bounds for a sub-slice, then they are clamped to the slice's size.
+Additionally, the starting index is clamped to the ending index.
+
+The indices are relative to the current slice, not the underlying byte array.
+-/
+def slice (s : ByteSlice) (start : Nat := 0) (stop : Nat := s.size) : ByteSlice :=
+  let actualStart := s.start + min start s.size
+  let actualStop := s.start + min stop s.size
+  if h₂ : actualStop ≤ s.byteArray.size then
+    if h₁ : actualStart ≤ actualStop then
+      ⟨{ byteArray := s.byteArray,
+         start := actualStart,
+         stop := actualStop,
+         start_le_stop := h₁,
+         stop_le_size_byteArray := h₂ }⟩
+    else
+      ⟨{ byteArray := s.byteArray,
+         start := actualStop,
+         stop := actualStop,
+         start_le_stop := Nat.le_refl _,
+         stop_le_size_byteArray := h₂ }⟩
+  else
+    ⟨{ byteArray := s.byteArray,
+       start := s.start,
+       stop := s.start,
+       start_le_stop := Nat.le_refl _,
+       stop_le_size_byteArray := Nat.le_trans (Nat.le_refl _) (Nat.le_trans s.start_le_stop s.stop_le_size_byteArray) }⟩
+
+/--
+Checks if the byte slice contains a specific byte value.
+
+Returns `true` if any byte in the slice equals the given value, `false` otherwise.
+-/
+def contains (s : ByteSlice) (byte : UInt8) : Bool :=
+  let rec loop (i : Nat) : Bool :=
+    if h : i < s.size then
+      if s[i] == byte then true
+      else loop (i + 1)
+    else
+      false
+  loop 0
+
+end ByteSlice
+
+namespace ByteArray
+
+/--
+Returns a byte slice of a byte array, with the given bounds.
+
+If `start` or `stop` are not valid bounds for a byte slice, then they are clamped to byte array's size.
+Additionally, the starting index is clamped to the ending index.
+-/
+def toByteSlice (as : ByteArray) (start : Nat := 0) (stop : Nat := as.size) : ByteSlice :=
+  if h₂ : stop ≤ as.size then
+    if h₁ : start ≤ stop then
+      ⟨{ byteArray := as, start := start, stop := stop,
+         start_le_stop := h₁, stop_le_size_byteArray := h₂ }⟩
+    else
+      ⟨{ byteArray := as, start := stop, stop := stop,
+         start_le_stop := Nat.le_refl _, stop_le_size_byteArray := h₂ }⟩
+  else
+    if h₁ : start ≤ as.size then
+      ⟨{ byteArray := as,
+         start := start,
+         stop := as.size,
+         start_le_stop := h₁,
+         stop_le_size_byteArray := Nat.le_refl _ }⟩
+    else
+      ⟨{ byteArray := as,
+         start := as.size,
+         stop := as.size,
+         start_le_stop := Nat.le_refl _,
+         stop_le_size_byteArray := Nat.le_refl _ }⟩
+
+end ByteArray
+
+open Std Slice PRange
+
+variable {shape : RangeShape} {α : Type u}
+
+instance [ClosedOpenIntersection shape Nat] : Sliceable shape ByteArray Nat ByteSlice where
+  mkSlice xs range :=
+    let halfOpenRange := ClosedOpenIntersection.intersection range 0...<xs.size
+    (xs.toByteSlice halfOpenRange.lower halfOpenRange.upper)
+
+instance [ClosedOpenIntersection shape Nat] : Sliceable shape ByteSlice Nat ByteSlice where
+  mkSlice xs range :=
+    let halfOpenRange := ClosedOpenIntersection.intersection range 0...<xs.size
+    (xs.slice halfOpenRange.lower halfOpenRange.upper)
--- a/src/Std/Internal/Parsec/Basic.lean
+++ b/src/Std/Internal/Parsec/Basic.lean
@ -12,21 +12,60 @@ import Init.Data.String.Basic

 public section

-namespace Std.Internal
-
+namespace Std
+namespace Internal
 namespace Parsec

+/--
+Represents an error that can occur during parsing.
+-/
+inductive Error where
+  /--
+  Indicates that the parser reached the end of the input unexpectedly.
+  -/
+  | eof
+
+  /--
+  Represents any other kind of parsing error with an associated message.
+  -/
+  | other (s : String)
+deriving Repr
+
+instance : ToString Error where
+  toString
+    | .eof => "unexpected end of input"
+    | .other s => s
+
+/--
+The result of parsing some string.
+-/
 inductive ParseResult (α : Type) (ι : Type) where
+  /--
+  Parsing succeeded, returning the new position `pos` and the parsed result `res`.
+  -/
  | success (pos : ι) (res : α)
-  | error (pos : ι) (err : String)
-  deriving Repr
+
+  /--
+  Parsing failed, returning the position `pos` where the error occurred and the error `err`.
+  -/
+  | error (pos : ι) (err : Error)
+deriving Repr

 end Parsec

-@[expose] def Parsec (ι : Type) (α : Type) : Type := ι → Parsec.ParseResult α ι
+/--
+A `Parsec ι α` represents a parser that consumes input of type `ι` and, produces a
+`ParseResult` containing a value of type `α` (the result of parsing) and the remaining input.
+-/
+@[expose]
+def Parsec (ι : Type) (α : Type) : Type :=
+  ι → Parsec.ParseResult α ι

 namespace Parsec

+/--
+Interface for an input iterator with position tracking and lookahead support.
+-/
 class Input (ι : Type) (elem : outParam Type) (idx : outParam Type) [DecidableEq idx] [DecidableEq elem] where
  pos : ι → idx
  next : ι → ι
@ -39,27 +78,28 @@ variable {α : Type} {ι : Type} {elem : Type} {idx : Type}
 variable [DecidableEq idx] [DecidableEq elem] [Input ι elem idx]

 instance : Inhabited (Parsec ι α) where
-  default := fun it => .error it ""
+  default := fun it => ParseResult.error it (.other "")

@[always_inline, inline]
 protected def pure (a : α) : Parsec ι α := fun it =>
  .success it a

@[always_inline, inline]
-def bind {α β : Type} (f : Parsec ι α) (g : α → Parsec ι β) : Parsec ι β := fun it =>
+protected def bind {α β : Type} (f : Parsec ι α) (g : α → Parsec ι β) : Parsec ι β := fun it =>
  match f it with
  | .success rem a => g a rem
  | .error pos msg => .error pos msg

-@[always_inline]
-instance : Monad (Parsec ι) where
-  pure := Parsec.pure
-  bind := Parsec.bind
-
+/--
+Parser that always fails with the given error message.
+-/
@[always_inline, inline]
 def fail (msg : String) : Parsec ι α := fun it =>
-  .error it msg
+  .error it (.other msg)

+/--
+Try `p`, then decide what to do based on success or failure without consuming input on failure.
+-/
@[inline]
 def tryCatch (p : Parsec ι α) (csuccess : α → Parsec ι β) (cerror : Unit → Parsec ι β)
    : Parsec ι β := fun it =>
@ -69,10 +109,21 @@ def tryCatch (p : Parsec ι α) (csuccess : α → Parsec ι β) (cerror : Unit
    -- We assume that it.s never changes as the `Parsec` monad only modifies `it.pos`.
    if Input.pos it = Input.pos rem then cerror () rem else .error rem err

+@[always_inline]
+instance : Monad (Parsec ι) where
+  pure := Parsec.pure
+  bind := Parsec.bind
+
+/--
+Try `p`, and if it fails without consuming input, run `q ()` instead.
+-/
@[always_inline, inline]
 def orElse (p : Parsec ι α) (q : Unit → Parsec ι α) : Parsec ι α :=
  tryCatch p pure q

+/--
+Attempt to parse with `p`, but don't consume input on failure.
+-/
@[always_inline, inline]
 def attempt (p : Parsec ι α) : Parsec ι α := fun it =>
  match p it with
@ -84,12 +135,13 @@ instance : Alternative (Parsec ι) where
  failure := fail ""
  orElse := orElse

-def expectedEndOfInput := "expected end of input"
-
+/--
+Succeeds only if input is at end-of-file.
+-/
@[inline]
 def eof : Parsec ι Unit := fun it =>
  if Input.hasNext it then
-    .error it expectedEndOfInput
+    .error it (.other "expected end of input")
  else
    .success it ()

@ -107,8 +159,9 @@ def many (p : Parsec ι α) : Parsec ι <| Array α := manyCore p #[]
@[inline]
 def many1 (p : Parsec ι α) : Parsec ι <| Array α := do manyCore p #[← p]

-def unexpectedEndOfInput := "unexpected end of input"
-
+/--
+Gets the next input element.
+-/
@[inline]
 def any : Parsec ι elem := fun it =>
  if h : Input.hasNext it then
@ -116,19 +169,28 @@ def any : Parsec ι elem := fun it =>
    let it' := Input.next' it h
    .success it' c
  else
-    .error it unexpectedEndOfInput
+    .error it .eof

+/--
+Checks if the next input element matches some condition.
+-/
@[inline]
 def satisfy (p : elem → Bool) : Parsec ι elem := attempt do
  let c ← any
  if p c then return c else fail "condition not satisfied"

+/--
+Fails if `p` succeeds, otherwise succeeds without consuming input.
+-/
@[inline]
 def notFollowedBy (p : Parsec ι α) : Parsec ι Unit := fun it =>
  match p it with
-  | .success _ _ => .error it ""
+  | .success _ _ => .error it (.other "")
  | .error _ _ => .success it ()

+/--
+Peeks at the next element, returns `some` if exists else `none`, does not consume input.
+-/
@[inline]
 def peek? : Parsec ι (Option elem) := fun it =>
  if h : Input.hasNext it then
@ -136,13 +198,32 @@ def peek? : Parsec ι (Option elem) := fun it =>
  else
    .success it none

+/--
+Peeks at the next element, returns `some elem` if it satisfies `p`, else `none`. Does not consume input.
+-/
+@[inline]
+def peekWhen? (p : elem → Bool) : Parsec ι (Option elem) := do
+  let some data ← peek?
+    | return none
+
+  if p data then
+    return some data
+  else
+    return none
+
+/--
+Peeks at the next element, errors on EOF, does not consume input.
+-/
@[inline]
 def peek! : Parsec ι elem := fun it =>
  if h : Input.hasNext it then
    .success it (Input.curr' it h)
  else
-    .error it unexpectedEndOfInput
+    .error it .eof

+/--
+Peeks at the next element or returns a default if at EOF, does not consume input.
+-/
@[inline]
 def peekD (default : elem) : Parsec ι elem := fun it =>
  if h : Input.hasNext it then
@ -150,23 +231,37 @@ def peekD (default : elem) : Parsec ι elem := fun it =>
  else
    .success it default

+/--
+Consumes one element if available, otherwise errors on EOF.
+-/
@[inline]
 def skip : Parsec ι Unit := fun it =>
  if h : Input.hasNext it then
    .success (Input.next' it h) ()
  else
-    .error it unexpectedEndOfInput
+    .error it .eof

+/--
+Parses zero or more chars with `p`, accumulates into a string.
+-/
@[specialize]
 partial def manyCharsCore (p : Parsec ι Char) (acc : String) : Parsec ι String :=
  tryCatch p (manyCharsCore p <| acc.push ·) (fun _ => pure acc)

+/--
+Parses zero or more chars with `p` into a string.
+-/
@[inline]
-def manyChars (p : Parsec ι Char) : Parsec ι String := manyCharsCore p ""
+def manyChars (p : Parsec ι Char) : Parsec ι String := do
+  manyCharsCore p ""

+/--
+Parses one or more chars with `p` into a string, errors if none.
+-/
@[inline]
-def many1Chars (p : Parsec ι Char) : Parsec ι String := do manyCharsCore p (← p).toString
+def many1Chars (p : Parsec ι Char) : Parsec ι String := do
+  manyCharsCore p (← p).toString

 end Parsec
-
-end Std.Internal
+end Internal
+end Std
--- a/src/Std/Internal/Parsec/ByteArray.lean
+++ b/src/Std/Internal/Parsec/ByteArray.lean
@ -9,10 +9,12 @@ prelude
 public import Std.Internal.Parsec.Basic
 public import Init.Data.ByteArray.Basic
 public import Init.Data.String.Extra
+public import Std.Data.ByteSlice

 public section

-namespace Std.Internal
+namespace Std
+namespace Internal
 namespace Parsec
 namespace ByteArray

@ -24,29 +26,52 @@ instance : Input ByteArray.Iterator UInt8 Nat where
  next' it := it.next'
  curr' it := it.curr'

+
+/--
+`Parser α` is a parser that consumes a `ByteArray` input using a `ByteArray.Iterator` and returns a result of type `α`.
+-/
 abbrev Parser (α : Type) : Type := Parsec ByteArray.Iterator α

+/--
+Run a `Parser` on a `ByteArray`, returns either the result or an error string with offset.
+-/
 protected def Parser.run (p : Parser α) (arr : ByteArray) : Except String α :=
  match p arr.iter with
  | .success _ res => Except.ok res
-  | .error it err  => Except.error s!"offset {repr it.pos}: {err}"
+  | .error it err => Except.error s!"offset {repr it.pos}: {err}"

+/--
+Parse a single byte equal to `b`, fails if different.
+-/
@[inline]
 def pbyte (b : UInt8) : Parser UInt8 := attempt do
  if (← any) = b then pure b else fail s!"expected: '{b}'"

+/--
+Skip a single byte equal to `b`, fails if different.
+-/
@[inline]
-def skipByte (b : UInt8) : Parser Unit := pbyte b *> pure ()
+def skipByte (b : UInt8) : Parser Unit :=
+  pbyte b *> pure ()

+/--
+Skip a sequence of bytes equal to the given `ByteArray`.
+-/
 def skipBytes (arr : ByteArray) : Parser Unit := do
  for b in arr do
    skipByte b

+/--
+Parse a string by matching its UTF-8 bytes, returns the string on success.
+-/
@[inline]
 def pstring (s : String) : Parser String := do
  skipBytes s.toUTF8
  return s

+/--
+Skip a string by matching its UTF-8 bytes.
+-/
@[inline]
 def skipString (s : String) : Parser Unit := pstring s *> pure ()

@ -63,14 +88,24 @@ Skip a `Char` that can be represented in 1 byte. If `c` uses more than 1 byte it
@[inline]
 def skipByteChar (c : Char) : Parser Unit := skipByte c.toUInt8

+/--
+Parse an ASCII digit `0-9` as a `Char`.
+-/
@[inline]
 def digit : Parser Char := attempt do
  let b ← any
  if '0'.toUInt8 ≤ b ∧ b ≤ '9'.toUInt8 then return Char.ofUInt8 b else fail s!"digit expected"

+/--
+Convert a byte representing `'0'..'9'` to a `Nat`.
+-/
@[inline]
-private def digitToNat (b : UInt8) : Nat := (b - '0'.toUInt8).toNat
+private def digitToNat (b : UInt8) : Nat :=
+  (b - '0'.toUInt8).toNat

+/--
+Parse zero or more ASCII digits into a `Nat`, continuing until non-digit or EOF.
+-/
@[inline]
 private partial def digitsCore (acc : Nat) : Parser Nat := fun it =>
  /-
@ -92,11 +127,17 @@ where
    else
      (acc, it)

+/--
+Parse one or more ASCII digits into a `Nat`.
+-/
@[inline]
 def digits : Parser Nat := do
  let d ← digit
  digitsCore (digitToNat d.toUInt8)

+/--
+Parse a hex digit `0-9`, `a-f`, or `A-F` as a `Char`.
+-/
@[inline]
 def hexDigit : Parser Char := attempt do
  let b ← any
@ -104,6 +145,20 @@ def hexDigit : Parser Char := attempt do
   ∨ ('a'.toUInt8 ≤ b ∧ b ≤ 'f'.toUInt8)
   ∨ ('A'.toUInt8 ≤ b ∧ b ≤ 'F'.toUInt8) then return Char.ofUInt8 b else fail s!"hex digit expected"

+/--
+Parse an octal digit `0-7` as a `Char`.
+-/
+@[inline]
+def octDigit : Parser Char := attempt do
+  let b ← any
+  if '0'.toUInt8 ≤ b ∧ b ≤ '7'.toUInt8 then
+    return Char.ofUInt8 b
+  else
+    fail s!"octal digit expected"
+
+/--
+Parse an ASCII letter `a-z` or `A-Z` as a `Char`.
+-/
@[inline]
 def asciiLetter : Parser Char := attempt do
  let b ← any
@ -122,17 +177,125 @@ private partial def skipWs (it : ByteArray.Iterator) : ByteArray.Iterator :=
  else
   it

+/--
+Skip whitespace: tabs, newlines, carriage returns, and spaces.
+-/
@[inline]
 def ws : Parser Unit := fun it =>
  .success (skipWs it) ()

-def take (n : Nat) : Parser ByteArray := fun it =>
-  let subarr := it.array.extract it.idx (it.idx + n)
-  if subarr.size != n then
-    .error it s!"expected: {n} bytes"
+/--
+Parse `n` bytes from the input into a `ByteSlice`, errors if not enough bytes.
+-/
+def take (n : Nat) : Parser ByteSlice := fun it =>
+  if it.remainingBytes < n then
+    .error it .eof
  else
-    .success (it.forward n) subarr
+    .success (it.forward n) (it.array[it.idx...(it.idx+n)])
+
+/--
+Parses while a predicate is satisfied.
+-/
+@[inline]
+partial def takeWhile (pred : UInt8 → Bool) : Parser ByteSlice :=
+  fun it =>
+    let rec findEnd (count : Nat) (iter : ByteArray.Iterator) : Nat × ByteArray.Iterator :=
+      if ¬iter.hasNext then (count, iter)
+      else if pred iter.curr then findEnd (count + 1) iter.next
+      else (count, iter)
+
+    let (length, newIt) := findEnd 0 it
+    .success newIt (it.array[it.idx...(it.idx + length)])
+
+/--
+Parses until a predicate is satisfied (exclusive).
+-/
+@[inline]
+def takeUntil (pred : UInt8 → Bool) : Parser ByteSlice :=
+  takeWhile (fun b => ¬pred b)
+
+/--
+Skips while a predicate is satisfied.
+-/
+@[inline]
+partial def skipWhile (pred : UInt8 → Bool) : Parser Unit :=
+  fun it =>
+    let rec findEnd (count : Nat) (iter : ByteArray.Iterator) : ByteArray.Iterator :=
+      if ¬iter.hasNext then iter
+      else if pred iter.curr then findEnd (count + 1) iter.next
+      else iter
+
+    .success (findEnd 0 it) ()
+
+/--
+Skips until a predicate is satisfied.
+-/
+@[inline]
+def skipUntil (pred : UInt8 → Bool) : Parser Unit :=
+  skipWhile (fun b => ¬pred b)
+
+/--
+Parses while a predicate is satisfied, up to a given limit.
+-/
+@[inline]
+partial def takeWhileUpTo (pred : UInt8 → Bool) (limit : Nat) : Parser ByteSlice :=
+  fun it =>
+    let rec findEnd (count : Nat) (iter : ByteArray.Iterator) : Nat × ByteArray.Iterator :=
+      if count ≥ limit then (count, iter)
+      else if ¬iter.hasNext then (count, iter)
+      else if pred iter.curr then findEnd (count + 1) iter.next
+      else (count, iter)
+
+    let (length, newIt) := findEnd 0 it
+    .success newIt (it.array[it.idx...(it.idx + length)])
+
+/--
+Parses while a predicate is satisfied, up to a given limit, requiring at least one byte.
+-/
+@[inline]
+def takeWhileUpTo1 (pred : UInt8 → Bool) (limit : Nat) : Parser ByteSlice :=
+  fun it =>
+    let rec findEnd (count : Nat) (iter : ByteArray.Iterator) : Nat × ByteArray.Iterator :=
+      if count ≥ limit then (count, iter)
+      else if ¬iter.hasNext then (count, iter)
+      else if pred iter.curr then findEnd (count + 1) iter.next
+      else (count, iter)
+
+    let (length, newIt) := findEnd 0 it
+    if length = 0 then
+      .error it (if newIt.atEnd then .eof else .other "expected at least one char")
+    else
+      .success newIt (it.array[it.idx...(it.idx + length)])
+
+/--
+Parses until a predicate is satisfied (exclusive), up to a given limit.
+-/
+@[inline]
+def takeUntilUpTo (pred : UInt8 → Bool) (limit : Nat) : Parser ByteSlice :=
+  takeWhileUpTo (fun b => ¬pred b) limit
+
+/--
+Skips while a predicate is satisfied, up to a given limit.
+-/
+@[inline]
+partial def skipWhileUpTo (pred : UInt8 → Bool) (limit : Nat) : Parser Unit :=
+  fun it =>
+    let rec findEnd (count : Nat) (iter : ByteArray.Iterator) : ByteArray.Iterator :=
+      if count ≥ limit then iter
+      else if ¬iter.hasNext then iter
+      else if pred iter.curr then findEnd (count + 1) iter.next
+      else iter
+
+    .success (findEnd 0 it) ()
+
+/--
+Skips until a predicate is satisfied, up to a given limit.
+-/
+@[inline]
+def skipUntilUpTo (pred : UInt8 → Bool) (limit : Nat) : Parser Unit :=
+  skipWhileUpTo (fun b => ¬pred b) limit

 end ByteArray
 end Parsec
-end Std.Internal
+end Internal
+end Std
--- a/src/Std/Internal/Parsec/String.lean
+++ b/src/Std/Internal/Parsec/String.lean
@ -23,36 +23,59 @@ instance : Input String.Iterator Char String.Pos where
  next' it := it.next'
  curr' it := it.curr'

+/--
+`Parser α` is a parser that consumes a `String` input using a `String.Iterator` and returns a result of type `α`.
+-/
 abbrev Parser (α : Type) : Type := Parsec String.Iterator α

+/--
+Run a `Parser` on a `String`, returns either the result or an error string with offset.
+-/
 protected def Parser.run (p : Parser α) (s : String) : Except String α :=
  match p s.mkIterator with
  | .success _ res => Except.ok res
-  | .error it err  => Except.error s!"offset {repr it.i.byteIdx}: {err}"
+  | .error it err => Except.error s!"offset {repr it.pos.byteIdx}: {err}"

-/-- Parses the given string. -/
+/--
+Parses the given string.
+-/
 def pstring (s : String) : Parser String := fun it =>
  let substr := it.extract (it.forward s.length)
  if substr = s then
    .success (it.forward s.length) substr
  else
-    .error it s!"expected: {s}"
+    .error it (.other s!"expected: {s}")

+/--
+Skips the given string.
+-/
@[inline]
 def skipString (s : String) : Parser Unit := pstring s *> pure ()

+/--
+Parses the given char.
+-/
@[inline]
 def pchar (c : Char) : Parser Char := attempt do
  if (← any) = c then pure c else fail s!"expected: '{c}'"

+/--
+Skips the given char.
+-/
@[inline]
 def skipChar (c : Char) : Parser Unit := pchar c *> pure ()

+/--
+Parse an ASCII digit `0-9` as a `Char`.
+-/
@[inline]
 def digit : Parser Char := attempt do
  let c ← any
  if '0' ≤ c ∧ c ≤ '9' then return c else fail s!"digit expected"

+/--
+Convert a byte representing `'0'..'9'` to a `Nat`.
+-/
@[inline]
 private def digitToNat (b : Char) : Nat := b.toNat - '0'.toNat

@ -77,11 +100,17 @@ where
    else
      (acc, it)

+/--
+Parse one or more ASCII digits into a `Nat`.
+-/
@[inline]
 def digits : Parser Nat := do
  let d ← digit
  digitsCore (digitToNat d)

+/--
+Parse a hex digit `0-9`, `a-f`, or `A-F` as a `Char`.
+-/
@[inline]
 def hexDigit : Parser Char := attempt do
  let c ← any
@ -89,6 +118,9 @@ def hexDigit : Parser Char := attempt do
   ∨ ('a' ≤ c ∧ c ≤ 'f')
   ∨ ('A' ≤ c ∧ c ≤ 'F') then return c else fail s!"hex digit expected"

+/--
+Parse an ASCII letter `a-z` or `A-Z` as a `Char`.
+-/
@[inline]
 def asciiLetter : Parser Char := attempt do
  let c ← any
@ -104,14 +136,20 @@ private partial def skipWs (it : String.Iterator) : String.Iterator :=
  else
   it

+/--
+Skip whitespace: tabs, newlines, carriage returns, and spaces.
+-/
@[inline]
 def ws : Parser Unit := fun it =>
  .success (skipWs it) ()

+/--
+Takes a fixed amount of chars from the iterator.
+-/
 def take (n : Nat) : Parser String := fun it =>
  let substr := it.extract (it.forward n)
  if substr.length != n then
-    .error it s!"expected: {n} codepoints"
+    .error it .eof
  else
    .success (it.forward n) substr

--- a/src/Std/Time/Zoned/Database/TzIf.lean
+++ b/src/Std/Time/Zoned/Database/TzIf.lean
@ -205,10 +205,10 @@ private def manyN (n : Nat) (p : Parser α) : Parser (Array α) := do
    result := result.push x
  return result

-private def pu64 : Parser UInt64 := ByteArray.toUInt64LE! <$> take 8
-private def pi64 : Parser Int64 := toInt64 <$> take 8
-private def pu32 : Parser UInt32 := toUInt32 <$> take 4
-private def pi32 : Parser Int32 := toInt32 <$> take 4
+private def pu64 : Parser UInt64 := ByteArray.toUInt64LE! <$> ByteSlice.toByteArray <$> take 8
+private def pi64 : Parser Int64 := toInt64 <$> ByteSlice.toByteArray <$> take 8
+private def pu32 : Parser UInt32 := toUInt32 <$> ByteSlice.toByteArray <$> take 4
+private def pi32 : Parser Int32 := toInt32 <$> ByteSlice.toByteArray <$> take 4
 private def pu8 : Parser UInt8 := any
 private def pbool : Parser Bool := (· != 0) <$> pu8

--- a/src/runtime/CMakeLists.txt
+++ b/src/runtime/CMakeLists.txt
@ -1,6 +1,6 @@
 set(RUNTIME_OBJS debug.cpp thread.cpp mpz.cpp utf8.cpp
 object.cpp apply.cpp exception.cpp interrupt.cpp memory.cpp
-stackinfo.cpp compact.cpp init_module.cpp io.cpp hash.cpp
+stackinfo.cpp compact.cpp init_module.cpp io.cpp hash.cpp byteslice.cpp
 platform.cpp alloc.cpp allocprof.cpp sharecommon.cpp stack_overflow.cpp
 process.cpp object_ref.cpp mpn.cpp mutex.cpp libuv.cpp uv/net_addr.cpp uv/event_loop.cpp
 uv/timer.cpp uv/tcp.cpp uv/udp.cpp uv/dns.cpp uv/system.cpp)
--- a/src/runtime/byteslice.cpp
+++ b/src/runtime/byteslice.cpp
@ -0,0 +1,38 @@
+/*
+Copyright (c) 2025 Lean FRO. All rights reserved.
+Released under Apache 2.0 license as described in the file LICENSE.
+
+Author: Sofia Rodrigues
+*/
+#include <lean/lean.h>
+#include "runtime/byteslice.h"
+#include "runtime/io.h"
+#include "runtime/object.h"
+#include "runtime/thread.h"
+
+namespace lean {
+
+extern "C" LEAN_EXPORT uint8_t lean_byteslice_beq(b_obj_arg a, b_obj_arg b) {
+    if (a == b) { return true; }
+
+    lean_object* bytearray_a = lean_ctor_get(a, 0);
+    size_t start_a = lean_unbox(lean_ctor_get(a, 1));
+    size_t end_a = lean_unbox(lean_ctor_get(a, 2));
+
+    lean_object* bytearray_b = lean_ctor_get(b, 0);
+    size_t start_b = lean_unbox(lean_ctor_get(b, 1));
+    size_t end_b = lean_unbox(lean_ctor_get(b, 2));
+
+    size_t size_a = end_a - start_a;
+    size_t size_b = end_b - start_b;
+
+    if (size_a != size_b) { return false; }
+
+    if (size_a == 0) { return true; }
+
+    const uint8_t* ptr_a = lean_sarray_cptr(bytearray_a) + start_a;
+    const uint8_t* ptr_b = lean_sarray_cptr(bytearray_b) + start_b;
+
+    return memcmp(ptr_a, ptr_b, size_a) == 0;
+}
+}
--- a/src/runtime/byteslice.h
+++ b/src/runtime/byteslice.h
@ -0,0 +1,8 @@
+/*
+Copyright (c) 2025 Lean FRO. All rights reserved.
+Released under Apache 2.0 license as described in the file LICENSE.
+
+Author: Sofia Rodrigues
+*/
+#pragma once
+#include <string.h>
--- a/tests/lean/run/byteslice.lean
+++ b/tests/lean/run/byteslice.lean
@ -0,0 +1,343 @@
+import Std.Data.ByteSlice
+
+-- Test data
+def ba := ByteArray.mk #[1,2,3,4,5,6,7,8,9,10]
+def bb := ByteArray.mk #[1,2,3,4,5,6,7,8,9,10,1,2,3,4,5,6,7,8,9,10]
+def emptyArray := ByteArray.mk #[]
+def singleByte := ByteArray.mk #[42]
+
+-- Basic slicing tests (existing)
+/--
+info: [1, 2, 3, 4, 5, 6, 7, 8, 9, 10]
+-/
+#guard_msgs in
+#eval bb[10...20] |>.toByteArray
+
+/--
+info: [3, 4]
+-/
+#guard_msgs in
+#eval bb[10...20][2...4] |>.toByteArray
+
+/--
+info: 4
+-/
+#guard_msgs in
+#eval bb[10...20][2...4] |>.get! 1
+
+/--
+info: true
+-/
+#guard_msgs in
+#eval bb[10...20][2...4] |>.contains 4
+
+-- Size function tests
+/--
+info: 10
+-/
+#guard_msgs in
+#eval ba.toByteSlice.size
+
+/--
+info: 0
+-/
+#guard_msgs in
+#eval emptyArray.toByteSlice.size
+
+/--
+info: 1
+-/
+#guard_msgs in
+#eval singleByte.toByteSlice.size
+
+/--
+info: 5
+-/
+#guard_msgs in
+#eval ba[2...7].size
+
+-- Element access tests
+/--
+info: 1
+-/
+#guard_msgs in
+#eval ba[0...5][0]!
+
+/--
+info: 5
+-/
+#guard_msgs in
+#eval ba[0...5][4]!
+
+-- getD (get with default) tests
+/--
+info: 3
+-/
+#guard_msgs in
+#eval ba.toByteSlice.getD 2 99
+
+/--
+info: 99
+-/
+#guard_msgs in
+#eval ba.toByteSlice.getD 15 99
+
+/--
+info: 255
+-/
+#guard_msgs in
+#eval emptyArray.toByteSlice.getD 0 255
+
+-- get! tests (with bounds checking)
+/--
+info: 0
+-/
+#guard_msgs in
+#eval ba.toByteSlice.get! 15
+
+/--
+info: 0
+-/
+#guard_msgs in
+#eval emptyArray.toByteSlice.get! 0
+
+/--
+info: 42
+-/
+#guard_msgs in
+#eval singleByte.toByteSlice.get! 0
+
+-- Contains function tests
+/--
+info: true
+-/
+#guard_msgs in
+#eval ba.toByteSlice.contains 5
+
+/--
+info: false
+-/
+#guard_msgs in
+#eval ba.toByteSlice.contains 15
+
+/--
+info: false
+-/
+#guard_msgs in
+#eval emptyArray.toByteSlice.contains 1
+
+/--
+info: true
+-/
+#guard_msgs in
+#eval bb[5...15].contains 10
+
+-- Equality tests
+/--
+info: true
+-/
+#guard_msgs in
+#eval ba[0...5] == ba[0...5]
+
+/--
+info: true
+-/
+#guard_msgs in
+#eval ba[0...3].toByteArray == bb[0...3].toByteArray
+
+/--
+info: false
+-/
+#guard_msgs in
+#eval ba[0...3] == ba[1...4]
+
+/--
+info: true
+-/
+#guard_msgs in
+#eval ByteSlice.empty == emptyArray.toByteSlice
+
+-- Edge case: empty slices
+/--
+info: 0
+-/
+#guard_msgs in
+#eval ByteSlice.empty.size
+
+/--
+info: []
+-/
+#guard_msgs in
+#eval ByteSlice.empty.toByteArray
+
+/--
+info: false
+-/
+#guard_msgs in
+#eval ByteSlice.empty.contains 0
+
+-- Edge case: slice bounds clamping
+/--
+info: [1, 2, 3, 4, 5, 6, 7, 8, 9, 10]
+-/
+#guard_msgs in
+#eval ba[0...100].toByteArray
+
+/--
+info: []
+-/
+#guard_msgs in
+#eval ba[100...200].toByteArray
+
+/--
+info: [6, 7, 8, 9, 10]
+-/
+#guard_msgs in
+#eval ba[5...100].toByteArray
+
+-- Foldr function tests
+/--
+info: 55
+-/
+#guard_msgs in
+#eval ba.toByteSlice.foldr (fun b acc => b.toNat + acc) 0
+
+/--
+info: 0
+-/
+#guard_msgs in
+#eval ByteSlice.empty.foldr (fun b acc => b.toNat + acc) 0
+
+/--
+info: 42
+-/
+#guard_msgs in
+#eval singleByte.toByteSlice.foldr (fun b acc => b.toNat + acc) 0
+
+-- Nested slicing with element access
+
+/--
+info: 7
+-/
+#guard_msgs in
+#eval bb[0...10][5...8][1]!
+
+/--
+info: true
+-/
+#guard_msgs in
+#eval bb[0...10][5...8].contains 6
+
+/--
+info: false
+-/
+#guard_msgs in
+#eval bb[0...10][5...8].contains 5
+
+-- Size consistency tests
+/--
+info: 3
+-/
+#guard_msgs in
+#eval bb[0...10][5...8].size
+
+/--
+info: 5
+-/
+#guard_msgs in
+#eval bb[5...15][0...5].size
+
+-- Boundary testing with single elements
+/--
+info: [10]
+-/
+#guard_msgs in
+#eval ba[9...10].toByteArray
+
+/--
+info: 1
+-/
+#guard_msgs in
+#eval ba[9...10].size
+
+/--
+info: 10
+-/
+#guard_msgs in
+#eval ba[9...10][0]!
+
+-- Out of bounds slice handling
+/--
+info: []
+-/
+#guard_msgs in
+#eval ba[15...20].toByteArray
+
+/--
+info: 0
+-/
+#guard_msgs in
+#eval ba[15...20].size
+
+/--
+info: []
+-/
+#guard_msgs in
+#eval ba.toByteSlice.slice 8 3 |>.toByteArray
+
+/--
+info: 0
+-/
+#guard_msgs in
+#eval ba.toByteSlice.slice 8 3 |>.size
+
+/--
+info: [1, 2, 3, 4, 5, 6, 7, 8, 9, 10]
+-/
+#guard_msgs in
+#eval ba.toByteSlice.slice 0 10 |>.toByteArray
+
+/--
+info: true
+-/
+#guard_msgs in
+#eval bb[0...10] == bb[10...20]
+
+/--
+info: [1, 2, 3]
+-/
+#guard_msgs in
+#eval bb[10...13].toByteArray
+
+-- Testing with different default values in getD
+/--
+info: 128
+-/
+#guard_msgs in
+#eval ba[0...3].getD 10 128
+
+/--
+info: 2
+-/
+#guard_msgs in
+#eval ba[0...3].getD 1 128
+
+def maxByteArray := ByteArray.mk #[255, 0, 128, 1]
+
+/--
+info: true
+-/
+#guard_msgs in
+#eval maxByteArray.toByteSlice.contains 255
+
+/--
+info: 384
+-/
+#guard_msgs in
+#eval maxByteArray.toByteSlice.foldr (fun b acc => b.toNat + acc) 0
+
+/--
+info: [255, 0]
+-/
+#guard_msgs in
+#eval maxByteArray[0...2].toByteArray