lean4-htt/tests/lean/run/string_gaps.lean
Markus Himmel fa5d08b7de
refactor: use String.Slice in String.take and variants (#11180)
This PR redefines `String.take` and variants to operate on
`String.Slice`. While previously functions returning a substring of the
input sometimes returned `String` and sometimes returned
`Substring.Raw`, they now uniformly return `String.Slice`.

This is a BREAKING change, because many functions now have a different
return type. So for example, if `s` is a string and `f` is a function
accepting a string, `f (s.drop 1)` will no longer compile because
`s.drop 1` is a `String.Slice`. To fix this, insert a call to `copy` to
restore the old behavior: `f (s.drop 1).copy`.

Of course, in many cases, there will be more efficient options. For
example, don't write `f <| s.drop 1 |>.copy |>.dropEnd 1 |>.copy`, write
`f <| s.drop 1 |>.dropEnd 1 |>.copy` instead. Also, instead of `(s.drop
1).copy = "Hello"`, write `s.drop 1 == "Hello".toSlice` instead.
2025-11-18 16:13:48 +00:00

166 lines
3.5 KiB
Text

import Lean.Parser.Extension
import Lean.Elab.Term
/-!
# Testing string gaps in string literals
String gaps are described in RFC #2838
-/
/-!
A string gap with no trailing space.
-/
/-- info: "ab" -/
#guard_msgs in
#eval "a\
b"
/-!
A string gap with trailing space before the `b`, which is consumed.
-/
/-- info: "ab" -/
#guard_msgs in
#eval "a\
b"
/-!
A string gap with space before the gap, which is not consumed.
-/
/-- info: "a b" -/
#guard_msgs in
#eval "a \
b"
/-!
Multiple string gaps in a row.
-/
/-- info: "a b" -/
#guard_msgs in
#eval "a \
\
\
b"
/-!
Two tests from the RFC.
-/
/-- info: "this is a string" -/
#guard_msgs in
#eval "this is \
a string"
/-- info: "this is a string" -/
#guard_msgs in
#eval "this is \
a string"
/-!
Two examples of how spaces are accounted for in string gaps. `\x20` is a way to force a leading space.
-/
/-- info: "there are three spaces between the brackets < >" -/
#guard_msgs in
#eval "there are three spaces between the brackets < \
>"
/-- info: "there are three spaces between the brackets < >" -/
#guard_msgs in
#eval "there are three spaces between the brackets <\
\x20 >"
/-!
Using `\n` to terminate a string gap, which is a technique suggested by Mario for using string gaps to write
multiline literals in an indented context.
-/
/-- info: "this is\n a string with two space indent" -/
#guard_msgs in
#eval "this is\
\n a string with two space indent"
/-!
Similar tests but for interpolated strings.
-/
/-- info: "ab" -/
#guard_msgs in
#eval s!"a\
b"
/-- info: "ab" -/
#guard_msgs in
#eval s!"a\
b"
/-- info: "ab" -/
#guard_msgs in
#eval s!"a\
b"
/-!
The `{` terminates the string gap.
-/
/-- info: "ab" -/
#guard_msgs in
#eval s!"a\
{"b"}\
"
open Lean
/-!
## Testing whitespace handling with specific line terminators
-/
/-!
Standard string gap, with LF
-/
/-- info: "ab" -/
#guard_msgs in
#eval show MetaM String from do
let stx ← ofExcept <| Parser.runParserCategory (← getEnv) `term "\"a\\\n b\""
let some s := stx.isStrLit? | failure
return s
/-!
Isolated CR, which is an error
-/
/-- error: <input>:1:3: invalid escape sequence -/
#guard_msgs (error, drop info) in
#eval show MetaM String from do
let stx ← ofExcept <| Parser.runParserCategory (← getEnv) `term "\"a\\\r b\""
let some s := stx.isStrLit? | failure
return s
/-!
Not a string gap since there's no end-of-line.
-/
/-- error: <input>:1:3: invalid escape sequence -/
#guard_msgs (error, drop info) in
#eval show MetaM String from do
let stx ← ofExcept <| Parser.runParserCategory (← getEnv) `term "\"a\\ b\""
let some s := stx.isStrLit? | failure
return s
/-!
## Scala-style stripMargin
This is a test that string gaps could be paired with a new string elaboration syntax
for indented multiline string literals.
-/
def String.dedent (s : String) : Option String :=
let parts := s.split (· == '\n') |>.map String.trimLeft
match parts with
| [] => ""
| [p] => p
| p₀ :: parts =>
if !parts.all (·.startsWith "|") then
none
else
p₀ ++ "\n" ++ String.intercalate "\n" (parts.map fun p => p.drop 1 |>.copy)
elab "d!" s:str : term => do
let some s := s.raw.isStrLit? | Lean.Elab.throwIllFormedSyntax
let some s := String.dedent s | Lean.Elab.throwIllFormedSyntax
pure $ Lean.mkStrLit s
/-- info: "this is line 1\n line 2, indented\nline 3" -/
#guard_msgs in
#eval d!"this is \
line 1
| line 2, indented
|line 3"