diff --git a/src/Init/Data/String/Slice.lean b/src/Init/Data/String/Slice.lean index 3f7f23e4b3..e3a5f80870 100644 --- a/src/Init/Data/String/Slice.lean +++ b/src/Init/Data/String/Slice.lean @@ -1249,7 +1249,8 @@ def foldr {α : Type u} (f : Char → α → α) (init : α) (s : Slice) : α := Checks whether the slice can be interpreted as the decimal representation of a natural number. A slice can be interpreted as a decimal natural number if it is not empty and all the characters in -it are digits. +it are digits. Underscores ({lit}`_`) are allowed as digit separators for readability, but cannot appear +at the start, at the end, or consecutively. Use {name (scope := "Init.Data.String.Slice")}`toNat?` or {name (scope := "Init.Data.String.Slice")}`toNat!` to convert such a slice to a natural number. @@ -1260,21 +1261,39 @@ Examples: * {lean}`"5".toSlice.isNat = true` * {lean}`"05".toSlice.isNat = true` * {lean}`"587".toSlice.isNat = true` + * {lean}`"1_000".toSlice.isNat = true` + * {lean}`"100_000_000".toSlice.isNat = true` * {lean}`"-587".toSlice.isNat = false` * {lean}`" 5".toSlice.isNat = false` * {lean}`"2+3".toSlice.isNat = false` * {lean}`"0xff".toSlice.isNat = false` + * {lean}`"_123".toSlice.isNat = false` + * {lean}`"123_".toSlice.isNat = false` + * {lean}`"12__34".toSlice.isNat = false` -/ @[inline] def isNat (s : Slice) : Bool := - !s.isEmpty && s.all Char.isDigit + if s.isEmpty then + false + else + -- Track: isFirst, lastWasUnderscore, lastCharWasDigit, valid + let result := s.foldl (fun (isFirst, lastWasUnderscore, _lastCharWasDigit, valid) c => + let isDigit := c.isDigit + let isUnderscore := c = '_' + let newValid := valid && (isDigit || isUnderscore) && + !(isFirst && isUnderscore) && -- Cannot start with underscore + !(lastWasUnderscore && isUnderscore) -- No consecutive underscores + (false, isUnderscore, isDigit, newValid)) + (true, false, false, true) + -- Must be valid and last character must have been a digit (not underscore) + result.2.2.2 && result.2.2.1 /-- Interprets a slice as the decimal representation of a natural number, returning it. Returns {name}`none` if the slice does not contain a decimal natural number. A slice can be interpreted as a decimal natural number if it is not empty and all the characters in -it are digits. +it are digits. Underscores ({lit}`_`) are allowed as digit separators and are ignored during parsing. Use {name}`isNat` to check whether {name}`toNat?` would return {name}`some`. {name (scope := "Init.Data.String.Slice")}`toNat!` is an alternative that panics instead of @@ -1285,6 +1304,8 @@ Examples: * {lean}`"0".toSlice.toNat? = some 0` * {lean}`"5".toSlice.toNat? = some 5` * {lean}`"587".toSlice.toNat? = some 587` + * {lean}`"1_000".toSlice.toNat? = some 1000` + * {lean}`"100_000_000".toSlice.toNat? = some 100000000` * {lean}`"-587".toSlice.toNat? = none` * {lean}`" 5".toSlice.toNat? = none` * {lean}`"2+3".toSlice.toNat? = none` @@ -1292,7 +1313,7 @@ Examples: -/ def toNat? (s : Slice) : Option Nat := if s.isNat then - some <| s.foldl (fun n c => n * 10 + (c.toNat - '0'.toNat)) 0 + some <| s.foldl (fun n c => if c = '_' then n else n * 10 + (c.toNat - '0'.toNat)) 0 else none @@ -1301,7 +1322,7 @@ Interprets a slice as the decimal representation of a natural number, returning slice does not contain a decimal natural number. A slice can be interpreted as a decimal natural number if it is not empty and all the characters in -it are digits. +it are digits. Underscores ({lit}`_`) are allowed as digit separators and are ignored during parsing. Use {name}`isNat` to check whether {name}`toNat!` would return a value. {name}`toNat?` is a safer alternative that returns {name}`none` instead of panicking when the string is not a natural number. @@ -1310,10 +1331,11 @@ Examples: * {lean}`"0".toSlice.toNat! = 0` * {lean}`"5".toSlice.toNat! = 5` * {lean}`"587".toSlice.toNat! = 587` + * {lean}`"1_000".toSlice.toNat! = 1000` -/ def toNat! (s : Slice) : Nat := if s.isNat then - s.foldl (fun n c => n * 10 + (c.toNat - '0'.toNat)) 0 + s.foldl (fun n c => if c = '_' then n else n * 10 + (c.toNat - '0'.toNat)) 0 else panic! "Nat expected" diff --git a/src/Init/Data/String/Substring.lean b/src/Init/Data/String/Substring.lean index d9007f7013..0f7df7c0b2 100644 --- a/src/Init/Data/String/Substring.lean +++ b/src/Init/Data/String/Substring.lean @@ -403,25 +403,39 @@ Examples: Checks whether the substring can be interpreted as the decimal representation of a natural number. A substring can be interpreted as a decimal natural number if it is not empty and all the characters -in it are digits. +in it are digits. Underscores ({lit}`_`) are allowed as digit separators for readability, but cannot appear +at the start, at the end, or consecutively. Use `Substring.toNat?` to convert such a substring to a natural number. -/ @[inline] def isNat (s : Substring.Raw) : Bool := - !s.isEmpty && s.all fun c => c.isDigit + if s.isEmpty then + false + else + -- Track: isFirst, lastWasUnderscore, lastCharWasDigit, valid + let result := s.foldl (fun (isFirst, lastWasUnderscore, _lastCharWasDigit, valid) c => + let isDigit := c.isDigit + let isUnderscore := c = '_' + let newValid := valid && (isDigit || isUnderscore) && + !(isFirst && isUnderscore) && -- Cannot start with underscore + !(lastWasUnderscore && isUnderscore) -- No consecutive underscores + (false, isUnderscore, isDigit, newValid)) + (true, false, false, true) + -- Must be valid and last character must have been a digit (not underscore) + result.2.2.2 && result.2.2.1 /-- Checks whether the substring can be interpreted as the decimal representation of a natural number, returning the number if it can. A substring can be interpreted as a decimal natural number if it is not empty and all the characters -in it are digits. +in it are digits. Underscores ({lit}`_`) are allowed as digit separators and are ignored during parsing. Use `Substring.isNat` to check whether the substring is such a substring. -/ def toNat? (s : Substring.Raw) : Option Nat := if s.isNat then - some <| s.foldl (fun n c => n*10 + (c.toNat - '0'.toNat)) 0 + some <| s.foldl (fun n c => if c = '_' then n else n*10 + (c.toNat - '0'.toNat)) 0 else none diff --git a/tests/lean/run/string_toNat_underscores.lean b/tests/lean/run/string_toNat_underscores.lean new file mode 100644 index 0000000000..23d6e6d867 --- /dev/null +++ b/tests/lean/run/string_toNat_underscores.lean @@ -0,0 +1,88 @@ +/-! +Tests for `String.toNat?` and `String.toInt?` with underscore support. + +This ensures that numeric parsing functions accept the same underscore digit separators +that are allowed in Lean numeric literals, for consistency. + +See: https://github.com/leanprover/lean4/issues/11538 +-/ + +-- Basic underscore tests for String.Slice.isNat +#guard "1_000".toSlice.isNat = true +#guard "100_000".toSlice.isNat = true +#guard "1_000_000".toSlice.isNat = true +#guard "100_000_000".toSlice.isNat = true + +-- Edge cases for isNat +#guard "_123".toSlice.isNat = false -- Cannot start with underscore +#guard "123_".toSlice.isNat = false -- Cannot end with underscore +#guard "12__34".toSlice.isNat = false -- Cannot have consecutive underscores +#guard "1_2_3".toSlice.isNat = true -- Single underscores are fine + +-- toNat? basic tests +#guard "1_000".toSlice.toNat? = some 1000 +#guard "100_000".toSlice.toNat? = some 100000 +#guard "1_000_000".toSlice.toNat? = some 1000000 +#guard "100_000_000".toSlice.toNat? = some 100000000 + +-- More complex patterns +#guard "1_2_3_4_5".toSlice.toNat? = some 12345 +#guard "9_99".toSlice.toNat? = some 999 +#guard "5_0_0_0".toSlice.toNat? = some 5000 + +-- Edge cases that should fail +#guard "_123".toSlice.toNat? = none +#guard "123_".toSlice.toNat? = none +#guard "12__34".toSlice.toNat? = none +#guard "12_3_".toSlice.toNat? = none +#guard "_".toSlice.toNat? = none + +-- Verify numeric values are correct +#guard "1_234".toSlice.toNat? = "1234".toSlice.toNat? +#guard "987_654_321".toSlice.toNat? = "987654321".toSlice.toNat? + +-- Tests for String.toNat? (not just Slice) +#guard "1_000".toNat? = some 1000 +#guard "100_000".toNat? = some 100000 +#guard "1_000_000".toNat? = some 1000000 +#guard "_123".toNat? = none +#guard "123_".toNat? = none + +-- Tests for String.Slice.isInt +#guard "-1_000".toSlice.isInt = true +#guard "-100_000".toSlice.isInt = true +#guard "1_000".toSlice.isInt = true -- Positive numbers are also ints + +-- Edge cases for isInt +#guard "-_123".toSlice.isInt = false -- Cannot have underscore right after minus +#guard "-123_".toSlice.isInt = false -- Cannot end with underscore +#guard "-12__34".toSlice.isInt = false -- Cannot have consecutive underscores + +-- toInt? basic tests +#guard "-1_000".toSlice.toInt? = some (-1000) +#guard "-100_000".toSlice.toInt? = some (-100000) +#guard "-1_000_000".toSlice.toInt? = some (-1000000) +#guard "1_000".toSlice.toInt? = some 1000 +#guard "100_000".toSlice.toInt? = some 100000 + +-- Edge cases for toInt? +#guard "-_123".toSlice.toInt? = none +#guard "-123_".toSlice.toInt? = none +#guard "-12__34".toSlice.toInt? = none + +-- Verify numeric values are correct +#guard "-1_234".toSlice.toInt? = "-1234".toSlice.toInt? +#guard "987_654_321".toSlice.toInt? = "987654321".toSlice.toInt? + +-- Tests for String.toInt? (not just Slice) +#guard "-1_000".toInt? = some (-1000) +#guard "1_000".toInt? = some 1000 +#guard "-_123".toInt? = none +#guard "-123_".toInt? = none + +-- Compatibility with existing behavior (no underscores) +#guard "0".toNat? = some 0 +#guard "123".toNat? = some 123 +#guard "".toNat? = none +#guard "-456".toInt? = some (-456) +#guard "789".toInt? = some 789