lean4-htt/src/Std/Http/Internal/Char.lean
Sebastian Ullrich 7f5fac9d9f
feat: add warn.redundantExpose for redundant @[expose]/@[no_expose] attributes (#13359)
This PR adds a `linter.redundantExpose` option (default `true`) that
warns when `@[expose]` or `@[no_expose]` attributes have no effect:

- `@[expose]` on `abbrev` (always exposed) or non-Prop `instance`
(always exposed)
- `@[expose]` on a `def` inside an `@[expose] section` (already exposed
by the section)
- `@[expose]`/`@[no_expose]` in a non-`module` file (no module system)
- `@[no_expose]` on a declaration that wouldn't be exposed by default

---------

Co-authored-by: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
2026-04-27 10:33:58 +00:00

313 lines
8.1 KiB
Text
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

/-
Copyright (c) 2025 Lean FRO, LLC. All rights reserved.
Released under Apache 2.0 license as described in the file LICENSE.
Authors: Sofia Rodrigues
-/
module
prelude
public import Init.Data.Char
public import Init.Data.String.Basic
public import Init.Data.Int
public import Init.Grind
@[expose]
public section
/-!
# HTTP Character Predicates
This module provides shared character validation predicates used across the HTTP library.
All predicates in this module are ASCII-only by design (`isAscii c` where applicable), and
intentionally exclude `obs-text` and all non-ASCII code points.
-/
namespace Std.Http.Internal.Char
set_option linter.all true
/--
Checks if a character is ASCII (Unicode code point < 128).
-/
@[inline]
def isAscii (c : Char) : Bool :=
c.toNat < 128
/--
Checks if a byte represents an ASCII character (value < 128).
-/
@[inline]
def isAsciiByte (c : UInt8) : Bool :=
c < 128
/--
Checks if a byte is a decimal digit (0-9).
-/
@[inline]
def isDigitByte (c : UInt8) : Bool :=
c >= '0'.toUInt8 && c <= '9'.toUInt8
/--
Checks if a byte is an alphabetic character (a-z or A-Z).
-/
@[inline]
def isAlphaByte (c : UInt8) : Bool :=
(c >= 'A'.toUInt8 && c <= 'Z'.toUInt8) || (c >= 'a'.toUInt8 && c <= 'z'.toUInt8)
/--
tchar = "!" / "#" / "$" / "%" / "&" / "'" / "*"
/ "+" / "-" / "." / "^" / "_" / "`" / "|" / "~"
/ DIGIT / ALPHA
; Visible token characters used to build `token`.
-/
@[inline]
def tchar (c : Char) : Bool :=
(c matches '!' | '#' | '$' | '%' | '&' | '\'' | '*' | '+' | '-' | '.' | '^' | '_' | '`' | '|' | '~') ||
Char.isDigit c ||
Char.isAlpha c
/--
vchar = %x21-7E
; Visible (printing) ASCII characters.
-/
@[inline]
def vchar (c : Char) : Bool :=
c ≥ '!' ∧ c ≤ '~'
/--
qdtext = HTAB / SP / %x21 / %x23-5B / %x5D-7E
; ASCII-only variant (no obs-text).
-/
@[inline]
def qdtext (c : Char) : Bool :=
(c matches '\t' | ' ' | '!') ||
('#' ≤ c ∧ c ≤ '[') ||
(']' ≤ c ∧ c ≤ '~')
/--
quoted-pair = "\\" ( HTAB / SP / VCHAR )
; ASCII-only variant (no obs-text).
-/
@[inline]
def quotedPairChar (c : Char) : Bool :=
(c matches '\t' | ' ') || vchar c
/--
quoted-string body character class:
( qdtext / quoted-pair payload ) in ASCII-only mode.
-/
@[inline]
def quotedStringChar (c : Char) : Bool :=
qdtext c || quotedPairChar c
theorem quotedStringChar_lt_0x80 : quotedStringChar c → c < '\x80' := by
simp [quotedStringChar, qdtext, quotedPairChar]
split <;> simp only [true_or, Char.reduceLT, imp_self]
grind [→ Char.le_def.mp, Char.lt_def.mpr, vchar]
private theorem not_quotedStringChar_ofNat_aux :
∀ c : Nat, c < 128 → ¬(qdtext (Char.ofNat c)) ∧ ¬((Char.ofNat c = '\"') (Char.ofNat c = '\\')) →
¬(quotedStringChar (Char.ofNat c)) := by
decide
theorem not_quotedStringChar_of_not_qdtext_not_dquote_backslash :
∀ c : Char, c < '\x80' → ¬(qdtext (c)) ∧ ¬((c = '\"') || (c = '\\')) →
¬(quotedStringChar c) := by
intro c hlt hq
simpa [Char.ofNat_toNat] using
(not_quotedStringChar_ofNat_aux
c.toNat hlt (by simpa [Char.ofNat_toNat] using hq))
/--
field-vchar = VCHAR
; ASCII-only variant (no obs-text).
-/
@[inline]
def fieldVchar (c : Char) : Bool :=
vchar c
/--
field-content character class:
field-vchar / SP / HTAB
; ASCII-only variant (no obs-text).
-/
@[inline]
def fieldContent (c : Char) : Bool :=
fieldVchar c || (c matches ' ' | '\t')
/--
ctext = HTAB / SP / %x21-27 / %x2A-5B / %x5D-7E
; ASCII-only variant (no obs-text).
-/
@[inline]
def ctext (c : Char) : Bool :=
(c matches '\t' | ' ') ||
('!' ≤ c ∧ c ≤ '\'') ||
('*' ≤ c ∧ c ≤ '[') ||
(']' ≤ c ∧ c ≤ '~')
/--
etagc = "!" / %x23-7E
; ASCII-only variant (no obs-text).
-/
@[inline]
def etagc (c : Char) : Bool :=
c = '!' || ('#' ≤ c ∧ c ≤ '~')
/--
OWS = *( SP / HTAB ) (character class only)
-/
@[inline]
def ows (c : Char) : Bool :=
c matches ' ' | '\t'
/--
BWS = OWS (character class alias)
-/
@[inline]
def bws (c : Char) : Bool :=
ows c
/--
RWS = 1*( SP / HTAB ) (character class is identical to `ows`)
-/
@[inline]
def rws (c : Char) : Bool :=
ows c
/--
obs-text = %x80-FF (and higher Unicode scalar values in this library's `Char` model).
-/
@[inline]
def obsText (c : Char) : Bool :=
0x80 ≤ c.toNat
/--
reason-phrase character class:
HTAB / SP / VCHAR
; ASCII-only variant (no obs-text).
Reference: https://httpwg.org/specs/rfc9110.html#reason.phrase
-/
@[inline]
def reasonPhraseChar (c : Char) : Bool :=
(c matches '\t' | ' ') || vchar c
/--
Checks if a character is a hexadecimal digit (0-9, a-f, or A-F).
-/
@[inline]
def isHexDigit (c : Char) : Bool :=
(c matches 'a' | 'b' | 'c' | 'd' | 'e' | 'f' | 'A' | 'B' | 'C' | 'D' | 'E' | 'F') ||
Char.isDigit c
/--
Checks if a byte is a hexadecimal digit (0-9, a-f, or A-F).
-/
@[inline]
def isHexDigitByte (c : UInt8) : Bool :=
(c ≥ '0'.toUInt8 && c ≤ '9'.toUInt8) ||
(c ≥ 'a'.toUInt8 && c ≤ 'f'.toUInt8) ||
(c ≥ 'A'.toUInt8 && c ≤ 'F'.toUInt8)
/--
Checks if a byte is an alphanumeric digit (0-9, a-z, or A-Z).
-/
@[inline]
def isAlphaNum (c : UInt8) : Bool :=
(c ≥ '0'.toUInt8 && c ≤ '9'.toUInt8) ||
(c ≥ 'a'.toUInt8 && c ≤ 'z'.toUInt8) ||
(c ≥ 'A'.toUInt8 && c ≤ 'Z'.toUInt8)
/--
Checks whether `c` is an ASCII alphanumeric character.
-/
@[inline]
def isAsciiAlphaNumChar (c : Char) : Bool :=
isAscii c && (Char.isDigit c || Char.isAlpha c)
/--
Checks if a character is valid after the first character of a URI scheme.
Valid characters are ASCII alphanumeric, `+`, `-`, and `.`.
-/
@[inline]
def isValidSchemeChar (c : Char) : Bool :=
isAsciiAlphaNumChar c || (c matches '+' | '-' | '.')
/--
Checks if a character is valid for use in a domain name.
Valid characters are ASCII alphanumeric, hyphens, and dots.
-/
@[inline]
def isValidDomainNameChar (c : Char) : Bool :=
isAsciiAlphaNumChar c || (c matches '-' | '.')
/--
Checks if a byte is an unreserved character according to RFC 3986. Unreserved characters are:
alphanumeric, hyphen, period, underscore, and tilde.
-/
@[inline]
def isUnreserved (c : UInt8) : Bool :=
isAlphaNum c ||
(c = '-'.toUInt8 || c = '.'.toUInt8 || c = '_'.toUInt8 || c = '~'.toUInt8)
/--
Checks if a byte is a sub-delimiter character according to RFC 3986.
Sub-delimiters are: `!`, `$`, `&`, `'`, `(`, `)`, `*`, `+`, `,`, `;`, `=`.
-/
@[inline]
def isSubDelims (c : UInt8) : Bool :=
c = '!'.toUInt8 || c = '$'.toUInt8 || c = '&'.toUInt8 || c = ('\'' : Char).toUInt8 ||
c = '('.toUInt8 || c = ')'.toUInt8 || c = '*'.toUInt8 || c = '+'.toUInt8 ||
c = ','.toUInt8 || c = ';'.toUInt8 || c = '='.toUInt8
/--
Checks if a byte is a valid path character (`pchar`) according to RFC 3986.
`pchar = unreserved / pct-encoded / sub-delims / ":" / "@"`
Note: The percent-encoding (`pct-encoded`) is handled separately by `isEncodedChar`,
so this predicate only covers the non-percent characters.
-/
@[inline]
def isPChar (c : UInt8) : Bool :=
isUnreserved c || isSubDelims c || c = ':'.toUInt8 || c = '@'.toUInt8
/--
Checks if a byte is a valid character in a URI query component according to RFC 3986.
`query = *( pchar / "/" / "?" )`
-/
@[inline]
def isQueryChar (c : UInt8) : Bool :=
isPChar c || c = '/'.toUInt8 || c = '?'.toUInt8
/--
Checks if a byte is a valid character in a URI fragment component according to RFC 3986.
`fragment = *( pchar / "/" / "?" )`
-/
@[inline]
def isFragmentChar (c : UInt8) : Bool :=
isPChar c || c = '/'.toUInt8 || c = '?'.toUInt8
/--
Checks if a byte is a valid character in a URI userinfo component according to RFC 3986.
`userinfo = *( unreserved/ sub-delims / ":" )`
Note: It avoids the pct-encoded of the original grammar because it is used with `Encoding.lean`
that provides it.
-/
@[inline]
def isUserInfoChar (c : UInt8) : Bool :=
isUnreserved c || isSubDelims c || c = ':'.toUInt8
/--
Checks if a byte is a valid character in a URI query component,
excluding the typical key/value separators `&` and `=`.
Inspired by `query = *( pchar / "/" / "?" )` from RFC 3986,
but disallows `&` and `=` so they can be treated as structural separators.
-/
@[inline]
def isQueryDataChar (c : UInt8) : Bool :=
isQueryChar c && c ≠ '&'.toUInt8 && c ≠ '='.toUInt8
end Std.Http.Internal.Char