fix: corrects the handling of datetime size for certain specifiers during parsing (#7571)

This PR fixes #7478 by modifying `number` specifiers from `atLeast size`
to `flexible size` for parsing. This change allows:
- 1 repetition to accept 1 or more characters
- More than 1 repetition to require exactly that many characters

For `year` specifiers, the number of repetitions is always strictly
enforced, requiring exactly the specified amount.

---------

Co-authored-by: Markus Himmel <markus@himmel-villmar.de>
This commit is contained in:
Sofia Rodrigues 2025-03-26 13:11:13 -03:00 committed by GitHub
parent 74b1c29a48
commit 5ad6edc8d0
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
5 changed files with 160 additions and 27 deletions

View file

@ -129,16 +129,22 @@ Format strings are used to convert between `String` representations and date/tim
The table below shows the available format specifiers. Some specifiers can be repeated to control truncation or offsets.
When a character is repeated `n` times, it usually truncates the value to `n` characters.
The `number` type format specifiers, such as `h` and `K`, are parsed based on the number of repetitions.
If the repetition count is one, the format allows values ranging from 1 up to the maximum capacity of
the respective data type.
The supported formats include:
- `G`: Represents the era, such as AD (Anno Domini) or BC (Before Christ).
- `G`, `GG`, `GGG` (short): Displays the era in a short format (e.g., "AD").
- `GGGG` (full): Displays the era in a full format (e.g., "Anno Domini").
- `GGGGG` (narrow): Displays the era in a narrow format (e.g., "A").
- `y`: Represents the year of the era.
- `y`: Represents the year in its full form, without a fixed length. It can handle years of any size, (e.g., "1", "2025", or "12345678").
- `yy`: Displays the year in a two-digit format, showing the last two digits (e.g., "04" for 2004).
- `yyyy`: Displays the year in a four-digit format (e.g., "2004").
- `yyyy+`: Extended format for years with more than four digits.
- `u`: Represents the year.
- `u`: Represents the year in its full form, without a fixed length. It can handle years of any size, (e.g., "1", "2025", or "12345678").
- `uu`: Two-digit year format, showing the last two digits (e.g., "04" for 2004).
- `uuuu`: Displays the year in a four-digit format (e.g., "2004" or "-1000").
- `uuuu+`: Extended format for handling years with more than four digits (e.g., "12345" or "-12345"). Useful for historical dates far into the past or future!

View file

@ -98,6 +98,8 @@ end Fraction
`Year` represents different year formatting styles based on the number of pattern letters.
-/
inductive Year
/-- Any size (e.g., "19000000000000") -/
| any
/-- Two-digit year format (e.g., "23" for 2023) -/
| twoDigit
/-- Four-digit year format (e.g., "2023") -/
@ -112,10 +114,12 @@ namespace Year
`classify` classifies the number of pattern letters into a `Year` format.
-/
def classify (num : Nat) : Option Year :=
if num = 2 then
some (.twoDigit)
if num = 1 then
some .any
else if num = 2 then
some .twoDigit
else if num = 4 then
some (.fourDigit)
some .fourDigit
else if num > 4 num = 3 then
some (.extended num)
else
@ -787,11 +791,13 @@ private def formatWith (modifier : Modifier) (data: TypeFormat modifier) : Strin
let info := data.toInt
let info := if info ≤ 0 then -info + 1 else info
match format with
| .any => pad 0 (data.toInt)
| .twoDigit => pad 2 (info % 100)
| .fourDigit => pad 4 info
| .extended n => pad n info
| .u format =>
match format with
| .any => pad 0 (data.toInt)
| .twoDigit => pad 2 (data.toInt % 100)
| .fourDigit => pad 4 data.toInt
| .extended n => pad n data.toInt
@ -1076,6 +1082,9 @@ private def parseAtLeastNum (size : Nat) : Parser Nat :=
let end_ ← manyChars (satisfy Char.isDigit)
pure (start ++ end_)
private def parseFlexibleNum (size : Nat) : Parser Nat :=
if size = 1 then parseAtLeastNum 1 else parseNum size
private def parseFractionNum (size : Nat) (pad : Nat) : Parser Nat :=
String.toNat! <$> rightPad pad '0' <$> exactlyChars (satisfy Char.isDigit) size
@ -1121,30 +1130,32 @@ private def parseWith : (mod : Modifier) → Parser (TypeFormat mod)
| .narrow => parseEraNarrow
| .y format =>
match format with
| .twoDigit => (2000 + ·) <$> (Int.ofNat <$> parseNum 2)
| .any => Int.ofNat <$> parseAtLeastNum 1
| .twoDigit => (2000 + ·) <$> Int.ofNat <$> parseNum 2
| .fourDigit => Int.ofNat <$> parseNum 4
| .extended n => Int.ofNat <$> parseAtLeastNum n
| .extended n => Int.ofNat <$> parseNum n
| .u format =>
match format with
| .twoDigit => (2000 + ·) <$> (parseSigned <| parseNum 2)
| .fourDigit => parseSigned <| parseAtLeastNum 4
| .extended n => parseSigned <| parseAtLeastNum n
| .D format => Sigma.mk true <$> parseNatToBounded (parseAtLeastNum format.padding)
| .any => parseSigned <| parseAtLeastNum 1
| .twoDigit => (2000 + ·) <$> Int.ofNat <$> parseNum 2
| .fourDigit => parseSigned <| parseNum 4
| .extended n => parseSigned <| parseNum n
| .D format => Sigma.mk true <$> parseNatToBounded (parseFlexibleNum format.padding)
| .MorL format =>
match format with
| .inl format => parseNatToBounded (parseAtLeastNum format.padding)
| .inl format => parseNatToBounded (parseFlexibleNum format.padding)
| .inr .short => parseMonthShort
| .inr .full => parseMonthLong
| .inr .narrow => parseMonthNarrow
| .d format => parseNatToBounded (parseAtLeastNum format.padding)
| .d format => parseNatToBounded (parseFlexibleNum format.padding)
| .Qorq format =>
match format with
| .inl format => parseNatToBounded (parseAtLeastNum format.padding)
| .inl format => parseNatToBounded (parseFlexibleNum format.padding)
| .inr .short => parseQuarterShort
| .inr .full => parseQuarterLong
| .inr .narrow => parseQuarterNumber
| .w format => parseNatToBounded (parseAtLeastNum format.padding)
| .W format => parseNatToBounded (parseAtLeastNum format.padding)
| .w format => parseNatToBounded (parseFlexibleNum format.padding)
| .W format => parseNatToBounded (parseFlexibleNum format.padding)
| .E format =>
match format with
| .short => parseWeekdayShort
@ -1152,29 +1163,29 @@ private def parseWith : (mod : Modifier) → Parser (TypeFormat mod)
| .narrow => parseWeekdayNarrow
| .eorc format =>
match format with
| .inl format => Weekday.ofOrdinal <$> parseNatToBounded (parseAtLeastNum format.padding)
| .inl format => Weekday.ofOrdinal <$> parseNatToBounded (parseFlexibleNum format.padding)
| .inr .short => parseWeekdayShort
| .inr .full => parseWeekdayLong
| .inr .narrow => parseWeekdayNarrow
| .F format => parseNatToBounded (parseAtLeastNum format.padding)
| .F format => parseNatToBounded (parseFlexibleNum format.padding)
| .a format =>
match format with
| .short => parseMarkerShort
| .full => parseMarkerLong
| .narrow => parseMarkerNarrow
| .h format => parseNatToBounded (parseAtLeastNum format.padding)
| .K format => parseNatToBounded (parseAtLeastNum format.padding)
| .k format => parseNatToBounded (parseAtLeastNum format.padding)
| .H format => parseNatToBounded (parseAtLeastNum format.padding)
| .m format => parseNatToBounded (parseAtLeastNum format.padding)
| .s format => parseNatToBounded (parseAtLeastNum format.padding)
| .h format => parseNatToBounded (parseFlexibleNum format.padding)
| .K format => parseNatToBounded (parseFlexibleNum format.padding)
| .k format => parseNatToBounded (parseFlexibleNum format.padding)
| .H format => parseNatToBounded (parseFlexibleNum format.padding)
| .m format => parseNatToBounded (parseFlexibleNum format.padding)
| .s format => parseNatToBounded (parseFlexibleNum format.padding)
| .S format =>
match format with
| .nano => parseNatToBounded (parseAtLeastNum 9)
| .nano => parseNatToBounded (parseFlexibleNum 9)
| .truncated n => parseNatToBounded (parseFractionNum n 9)
| .A format => Millisecond.Offset.ofNat <$> (parseAtLeastNum format.padding)
| .n format => parseNatToBounded (parseAtLeastNum format.padding)
| .N format => Nanosecond.Offset.ofNat <$> (parseAtLeastNum format.padding)
| .A format => Millisecond.Offset.ofNat <$> (parseFlexibleNum format.padding)
| .n format => parseNatToBounded (parseFlexibleNum format.padding)
| .N format => Nanosecond.Offset.ofNat <$> (parseFlexibleNum format.padding)
| .V => parseIdentifier
| .z format =>
match format with

View file

@ -29,6 +29,7 @@ private def convertFraction : Fraction → MacroM (TSyntax `term)
| .truncated digits => `(Std.Time.Fraction.truncated $(quote digits))
private def convertYear : Year → MacroM (TSyntax `term)
| .any => `(Std.Time.Year.any)
| .twoDigit => `(Std.Time.Year.twoDigit)
| .fourDigit => `(Std.Time.Year.fourDigit)
| .extended n => `(Std.Time.Year.extended $(quote n))

View file

@ -27,6 +27,7 @@ private def convertFraction : Fraction → MacroM (TSyntax `term)
| .truncated digits => `(Std.Time.Fraction.truncated $(quote digits))
private def convertYear : Year → MacroM (TSyntax `term)
| .any => `(Std.Time.Year.any)
| .twoDigit => `(Std.Time.Year.twoDigit)
| .fourDigit => `(Std.Time.Year.fourDigit)
| .extended n => `(Std.Time.Year.extended $(quote n))

View file

@ -12,6 +12,7 @@ def Time12Hour : GenericFormat .any := datespec("hh:mm:ss aa")
def FullDayTimeZone : GenericFormat .any := datespec("EEEE, MMMM dd, uuuu HH:mm:ss ZZZ")
def CustomDayTime : GenericFormat .any := datespec("EEE dd MMM uuuu HH:mm")
def EraDate : GenericFormat .any := datespec("MM D, uuuu G")
def DateSmall : GenericFormat .any := datespec("uu-MM-dd")
-- Dates
@ -795,13 +796,20 @@ info: 1
let t : ZonedDateTime := .ofPlainDateTime datetime("2018-12-31T12:00:00") (TimeZone.ZoneRules.ofTimeZone TimeZone.UTC)
IO.println s!"{t.format "w"}"
/--
info: Except.error "offset 0: condition not satisfied"
-/
#guard_msgs in
#eval DateSmall.parse "-23-12-12"
/-
Truncation Test
-/
/--
info: ("19343232432-01-04T01:04:03.000000000",
Except.ok (datetime("19343232432-01-04T01:04:03.000000000")),
Except.error "offset 4: expected: -",
datetime("1932-01-02T05:04:03.000000000"))
-/
#guard_msgs in
@ -810,3 +818,109 @@ info: ("19343232432-01-04T01:04:03.000000000",
let s := r.toLeanDateTimeString
let r := PlainDateTime.parse s
(s, r, datetime("1932-01-02T05:04:03.000000000"))
def tuple2Mk (a : f) (b : g) := some (a, b)
def tuple3Mk (a : f) (b : g) (c : h) := some (a, b, c)
def tuple4Mk (a : f) (b : g) (c : h) (d : i) := some (a, b, c, d)
def tuple5Mk (a : f) (b : g) (c : h) (d : i) (e : j) := some (a, b, c, d, e)
def tuple6Mk (a : f) (b : g) (c : h) (d : i) (e : j) (k : z) := some (a, b, c, d, e, k)
/-
Parsing Length Tests
-/
def uFormat : GenericFormat .any := datespec("u uu uuuu uuuuu")
#eval do assert! (uFormat.parseBuilder tuple4Mk "1 11 1211 12311" |>.isOk)
#eval do assert! (uFormat.parseBuilder tuple4Mk "12 11 1211 12311" |>.isOk)
#eval do assert! (uFormat.parseBuilder tuple4Mk "123443 11 1211 12311" |>.isOk)
#eval do assert! (uFormat.parseBuilder tuple4Mk "-1 11 1211 12311" |>.isOk)
#eval do assert! (uFormat.parseBuilder tuple4Mk "1 11 -1211 12311" |>.isOk)
#eval do assert! (uFormat.parseBuilder tuple4Mk "1 11 1211 -12311" |>.isOk)
#eval do assert! (not <| uFormat.parseBuilder tuple4Mk "1 -11 1211 12311" |>.isOk)
#eval do assert! (not <| uFormat.parseBuilder tuple4Mk "11 1211 12134" |>.isOk)
#eval do assert! (not <| uFormat.parseBuilder tuple4Mk "1 1 12 1234" |>.isOk)
#eval do assert! (not <| uFormat.parseBuilder tuple4Mk "1 11 1213 111123" |>.isOk)
#eval do assert! (not <| uFormat.parseBuilder tuple4Mk "1 367 1211 12311" |>.isOk)
def yFormat : GenericFormat .any := datespec("y yy yyyy yyyyy")
#eval do assert! (yFormat.parseBuilder tuple4Mk "1 11 1211 12311" |>.isOk)
#eval do assert! (yFormat.parseBuilder tuple4Mk "12 11 1211 12311" |>.isOk)
#eval do assert! (yFormat.parseBuilder tuple4Mk "123443 11 1211 12311" |>.isOk)
#eval do assert! (not <| yFormat.parseBuilder tuple4Mk "-1 11 1211 12311" |>.isOk)
#eval do assert! (not <| yFormat.parseBuilder tuple4Mk "1 -11 1211 12311" |>.isOk)
#eval do assert! (not <| yFormat.parseBuilder tuple4Mk "1 11 -1211 12311" |>.isOk)
#eval do assert! (not <| yFormat.parseBuilder tuple4Mk "1 11 1211 -12311" |>.isOk)
#eval do assert! (not <| yFormat.parseBuilder tuple4Mk "11 1211 12134" |>.isOk)
#eval do assert! (not <| yFormat.parseBuilder tuple4Mk "1 1 12 1234" |>.isOk)
#eval do assert! (not <| yFormat.parseBuilder tuple4Mk "1 11 1213 111123" |>.isOk)
#eval do assert! (not <| yFormat.parseBuilder tuple4Mk "1 367 1211 12311" |>.isOk)
def dFormat : GenericFormat .any := datespec("D DD DDD")
#eval do assert! (dFormat.parseBuilder tuple3Mk "1 12 123" |>.isOk)
#eval do assert! (dFormat.parseBuilder tuple3Mk "323 12 123" |>.isOk)
#eval do assert! (not <| dFormat.parseBuilder tuple3Mk "1 12 1234" |>.isOk)
#eval do assert! (not <| dFormat.parseBuilder tuple3Mk "1 123 123" |>.isOk)
#eval do assert! (not <| dFormat.parseBuilder tuple3Mk "367 12 123" |>.isOk)
def dddFormat : GenericFormat .any := datespec("d dd ddd dddd ddddd")
#eval do assert! (dddFormat.parseBuilder tuple5Mk "1 12 031 0031 00031" |>.isOk)
#eval do assert! (dddFormat.parseBuilder tuple5Mk "000031 12 031 0031 00031" |>.isOk)
#eval do assert! (not <| dddFormat.parseBuilder tuple5Mk "1 12 0031 00031" |>.isOk)
#eval do assert! (not <| dddFormat.parseBuilder tuple5Mk "1 031 0031 000031" |>.isOk)
def wFormat : GenericFormat .any := datespec("w ww www wwww")
#eval do assert! (wFormat.parseBuilder tuple4Mk "1 01 031 0031" |>.isOk)
#eval do assert! (wFormat.parseBuilder tuple4Mk "2 01 031 0031" |>.isOk)
#eval do assert! (not <| wFormat.parseBuilder tuple4Mk "2 01 031 00310" |>.isOk)
#eval do assert! (not <| wFormat.parseBuilder tuple4Mk "2 01 031 031" |>.isOk)
def qFormat : GenericFormat .any := datespec("q qq")
#eval do assert! (qFormat.parseBuilder tuple2Mk "1 02" |>.isOk)
#eval do assert! (qFormat.parseBuilder tuple2Mk "3 03" |>.isOk)
#eval do assert! (not <| qFormat.parseBuilder tuple2Mk "12 32" |>.isOk)
#eval do assert! (not <| qFormat.parseBuilder tuple2Mk "000001 003" |>.isOk)
def WFormat : GenericFormat .any := datespec("W WW")
#eval do assert! (WFormat.parseBuilder tuple2Mk "1 06" |>.isOk)
#eval do assert! (WFormat.parseBuilder tuple2Mk "3 03" |>.isOk)
#eval do assert! (not <| WFormat.parseBuilder tuple2Mk "12 32" |>.isOk)
#eval do assert! (not <| WFormat.parseBuilder tuple2Mk "000001 003" |>.isOk)
def eFormat : GenericFormat .any := datespec("e ee")
#eval do assert! (eFormat.parseBuilder tuple2Mk "1 07" |>.isOk)
#eval do assert! (eFormat.parseBuilder tuple2Mk "3 03" |>.isOk)
#eval do assert! (not <| eFormat.parseBuilder tuple2Mk "12 32" |>.isOk)
#eval do assert! (not <| eFormat.parseBuilder tuple2Mk "000001 003" |>.isOk)
def FFormat : GenericFormat .any := datespec("F FF")
#eval do assert! (FFormat.parseBuilder tuple2Mk "1 04" |>.isOk)
#eval do assert! (FFormat.parseBuilder tuple2Mk "3 03" |>.isOk)
#eval do assert! (not <| FFormat.parseBuilder tuple2Mk "12 32" |>.isOk)
#eval do assert! (not <| FFormat.parseBuilder tuple2Mk "000001 003" |>.isOk)
def hFormat : GenericFormat .any := datespec("h hh")
#eval do assert! (hFormat.parseBuilder tuple2Mk "1 09" |>.isOk)
#eval do assert! (hFormat.parseBuilder tuple2Mk "12 12" |>.isOk)
#eval do assert! (not <| hFormat.parseBuilder tuple2Mk "12 32" |>.isOk)
#eval do assert! (not <| hFormat.parseBuilder tuple2Mk "000001 003" |>.isOk)