fix: apply newlines before and after comments when formatting syntax (#8626)

This PR closes #3791, making sure that the Syntax formatter inserts
whitespace before and after comments in the leading and trailing text of
Syntax to avoid having comments comment out any following syntax, and to
avoid comments' lexical syntax from being interpreted as being part of
another syntax. If the text contains newlines before or after any
comments, they are formatted as hard newlines rather than soft newlines.
For example, `--` comments will have a hard newline after them. Note:
metaprograms generating Syntax with comments should be sure to include
newlines at the ends of `--` comments.
This commit is contained in:
Kyle Miller 2025-06-26 12:23:35 -07:00 committed by GitHub
parent 7ed716f904
commit b56ad5a7d2
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
6 changed files with 235 additions and 42 deletions

View file

@ -31,7 +31,7 @@ structure Context where
structure State where
stxTrav : Syntax.Traverser
/-- Textual content of `stack` up to the first whitespace (not enclosed in an escaped ident). We assume that the textual
content of `stack` is modified only by `pushText` and `pushLine`, so `leadWord` is adjusted there accordingly. -/
content of `stack` is modified only by `push` and `pushWhitespace`, so `leadWord` is adjusted there accordingly. -/
leadWord : String := ""
/-- When the `leadWord` is nonempty, whether it is an identifier. Identifiers get space inserted between them. -/
leadWordIdent : Bool := false
@ -351,18 +351,21 @@ def parseToken (s : String) : FormatterM ParserState :=
} ((← read).table) (Parser.mkParserState s)
def pushToken (info : SourceInfo) (tk : String) (ident : Bool) : FormatterM Unit := do
match info with
| SourceInfo.original _ _ ss _ =>
-- preserve non-whitespace content (i.e. comments)
if let SourceInfo.original _ _ ss _ := info then
-- preserve non-whitespace content (comments)
let ss' := ss.trim
if !ss'.isEmpty then
let ws := { ss with startPos := ss'.stopPos }
if ws.contains '\n' then
push s!"\n{ss'}"
unless ss'.isEmpty do
let preNL := Substring.contains { ss with stopPos := ss'.startPos } '\n'
let postNL := Substring.contains { ss with startPos := ss'.stopPos } '\n'
if postNL then
pushWhitespace "\n"
else if !(← get).leadWord.isEmpty then
pushLine
push ss'.toString
if preNL then
pushWhitespace "\n"
else
push s!" {ss'}"
modify fun st => { st with leadWord := "", leadWordIdent := false }
| _ => pure ()
pushLine
let st ← get
-- If there is no space between `tk` and the next word, see if we should insert a discretionary space.
@ -400,21 +403,27 @@ def pushToken (info : SourceInfo) (tk : String) (ident : Bool) : FormatterM Unit
push tk -- preserve special whitespace for tokens like ":=\n"
modify fun st => { st with leadWord := if tk.trimLeft == tk then tk else "", leadWordIdent := ident }
match info with
| SourceInfo.original ss _ _ _ =>
-- preserve non-whitespace content (i.e. comments)
if let SourceInfo.original ss _ _ _ := info then
-- preserve non-whitespace content (comments)
let ss' := ss.trim
if !ss'.isEmpty then
let ws := { ss with startPos := ss'.stopPos }
if ws.contains '\n' then do
-- Indentation is automatically increased when entering a category, but comments should be aligned
-- with the actual token, so dedent
indent (push s!"{ss'}\n") (some ((0:Int) - Std.Format.getIndent (← getOptions)))
else
pushLine
push ss'.toString
modify fun st => { st with leadWord := "" }
| _ => pure ()
unless ss'.isEmpty do
let preNL := Substring.contains { ss with stopPos := ss'.startPos } '\n'
let postNL := Substring.contains { ss with startPos := ss'.stopPos } '\n'
-- Indentation is automatically increased when entering a category, but comments should be aligned
-- with the actual token, so dedent
indent (indent := some (-Std.Format.getIndent (← getOptions))) do
if postNL then
pushWhitespace "\n"
else
pushLine
pushWhitespace ss'.toString
if preNL then
pushWhitespace "\n"
else
-- It is conceivable that the start of comment syntax could be misinterpreted as part of a token,
-- so add the beginning of it as the leadWord.
let ctk := ss' |>.takeWhile (!·.isWhitespace) |>.toString
modify fun st => { st with leadWord := ctk }
@[combinator_formatter symbolNoAntiquot]
def symbolNoAntiquot.formatter (sym : String) : Formatter := do

View file

@ -7,21 +7,28 @@ open Lean.Elab.Term
open Std.Format open Std
unsafe def main (args : List String) : IO Unit := do
let (debug, f) : Bool × String := match args with
| [f, "-d"] => (true, f)
| [f] => (false, f)
| _ => panic! "usage: file [-d]"
let env ← mkEmptyEnvironment
let stx ← Lean.Parser.testParseFile env args.head!
let (f, _) ← (tryFinally (PrettyPrinter.ppModule stx) printTraces).toIO { options := Options.empty.setBool `trace.PrettyPrinter.format debug, fileName := "", fileMap := default } { env := env }
IO.print f
let stx' ← Lean.Parser.testParseModule env args.head! (toString f)
if stx' != stx then
let stx := stx.raw.getArg 1
let stx' := stx'.raw.getArg 1
stx.getArgs.size.forM fun i _ => do
if stx.getArg i != stx'.getArg i then
throw $ IO.userError s!"reparsing failed:\n{stx.getArg i}\n{stx'.getArg i}"
let (debug, fn) : Bool × String := match args with
| [f, "-d"] => (true, f)
| [f] => (false, f)
| _ => panic! "usage: file [-d]"
let env ← mkEmptyEnvironment
let stx ← Lean.Parser.testParseFile env fn
let act : CoreM Format := do
withOptions (fun opts =>
opts
-- Name sanitization clears inline comments attached to identifiers.
|>.set `pp.sanitizeNames false
|>.set `trace.PrettyPrinter.format debug) do
tryFinally (PrettyPrinter.ppModule stx) printTraces
let (f, _) ← act.toIO { fileName := "", fileMap := default } { env := env }
IO.print f
let stx' ← Lean.Parser.testParseModule env fn (toString f)
if stx' != stx then
let stx := stx.raw.getArg 1
let stx' := stx'.raw.getArg 1
stx.getArgs.size.forM fun i _ => do
if stx.getArg i != stx'.getArg i then
throw $ IO.userError s!"reparsing failed:\n{stx.getArg i}\n{stx'.getArg i}"
-- abbreviated Prelude.lean, which can be parsed without elaboration
#eval main ["Reformat/Input.lean"]

View file

@ -11,6 +11,7 @@ universe u v w
def id {α : Sort u} (a : α) : α :=
a
/-
The kernel definitional equality test (t =?= s) has special support for idDelta applications.
It implements the following rules
@ -29,6 +30,7 @@ theorems generated by the equation Compiler.
def idDelta {α : Sort u} (a : α) : α :=
a
/- `idRhs` is an auxiliary declaration used to implement "smart unfolding". It is used as a marker. -/
@[macro_inline, reducible]
def idRhs (α : Sort u) (a : α) : α :=
@ -107,6 +109,7 @@ def cast {α β : Sort u} (h : Eq α β) (a : α) : β :=
theorem congrArg {α : Sort u} {β : Sort v} {a₁ a₂ : α} (f : α → β) (h : Eq a₁ a₂) : Eq (f a₁) (f a₂) :=
h ▸ rfl
/-
Initialize the Quotient Module, which effectively adds the following definitions:
@ -165,6 +168,7 @@ inductive Bool : Type
export Bool (false true)
/- Remark: Subtype must take a Sort instead of Type because of the axiom strongIndefiniteDescription. -/
structure Subtype {α : Sort u} (p : α → Prop) :=
(val : α)
@ -190,6 +194,7 @@ def typedExpr (α : Sort u) (a : α) : α :=
def namedPattern {α : Sort u} (x a : α) : α :=
a
/- Auxiliary axiom used to implement `sorry`. -/
axiom sorryAx (α : Sort u) (synthetic := true) : α
@ -225,6 +230,7 @@ instance (α : Sort u) {β : α → Sort v} [(a : α) → Inhabited (β a)] : In
structure PLift (α : Sort u) : Type u := up ::
(down : α)
/- Bijection between α and PLift α -/
theorem PLift.upDown {α : Sort u} : ∀ (b : PLift α), Eq (up (down b)) b
| up a => rfl
@ -232,6 +238,7 @@ theorem PLift.upDown {α : Sort u} : ∀ (b : PLift α), Eq (up (down b)) b
theorem PLift.downUp {α : Sort u} (a : α) : Eq (down (up a)) a :=
rfl
/- Pointed types -/
structure PointedType :=
(type : Type u)
@ -244,6 +251,7 @@ instance : Inhabited PointedType.{u} :=
structure ULift.{r, s} (α : Type s) : Type (max s r) := up ::
(down : α)
/- Bijection between α and ULift.{v} α -/
theorem ULift.upDown {α : Type u} : ∀ (b : ULift.{v} α), Eq (up (down b)) b
| up a => rfl
@ -311,6 +319,7 @@ open BEq (beq)
instance {α : Type u} [DecidableEq α] : BEq α :=
⟨fun a b => decide (Eq a b)⟩
-- We use "dependent" if-then-else to be able to communicate the if-then-else condition
-- to the branches
@[macro_inline]

View file

@ -10,10 +10,12 @@
1 + 2 +
3
-- should propagate through multiple macro expansions
===>
binop% HAdd.hAdd✝ (1 + 2)
3
-- should propagate through multiple macro expansions
• [Term] 1 + 2 + 3 : Nat @ ⟨5, 7⟩†-⟨5, 16⟩† @ Lean.Elab.Term.Op.elabBinOp
• [Term] 1 + 2 + 3 : Nat @ ⟨5, 7⟩†-⟨5, 16⟩†
• [Term] 1 + 2 : Nat @ ⟨5, 7⟩-⟨5, 12⟩ @ «_aux_Init_Notation___macroRules_term_+__2»

165
tests/lean/run/3791.lean Normal file
View file

@ -0,0 +1,165 @@
import Lean
/-!
# Formatter: preserve hard newlines around comments (#3791)
This is a regression test for https://github.com/leanprover/lean4/issues/3791
The issue was that the formatter was not preserving whether there were hard newlines
around comments in Syntax.
Note: one should turn off the sanitizer when pretty printing user syntax,
since the sanitizer clears comments from all identifiers.
-/
open Lean
deriving instance Repr for Std.Format.FlattenBehavior, Format
elab "#parse_cmd " update?:("!")? dbg?:("?")? s:str : command => Elab.Command.liftTermElabM do
withOptions (fun opts => opts.set pp.sanitizeNames.name false) do
let s := s.getString
let mut cmd ← ofExcept <| Parser.runParserCategory (← getEnv) `command s
if update?.isSome then
cmd := cmd.updateLeading
let fmt ← PrettyPrinter.ppCommand ⟨cmd⟩
if dbg?.isSome then
dbg_trace "{repr cmd}"; dbg_trace "{repr fmt}"
logInfo (toString f!"{fmt}")
elab "#parse_term " update?:("!")? dbg?:("?")? s:str : command => Elab.Command.liftTermElabM do
withOptions (fun opts => opts.set pp.sanitizeNames.name false) do
let s := s.getString
let mut term ← ofExcept <| Parser.runParserCategory (← getEnv) `term s
if update?.isSome then
term := term.updateLeading
let fmt ← PrettyPrinter.ppTerm ⟨term⟩
if dbg?.isSome then
dbg_trace "{repr term}"; dbg_trace "{repr fmt}"
logInfo (toString f!"{fmt}")
/-!
Newline before and after comment, puts hard newlines before and after.
("commented out" comes from #3791; formerly `1 = 1` was being commented out by the comment
in the output.)
-/
/--
info: theorem ohno :
-- commented out:
1 = 1 :=
trivial
-/
#guard_msgs in #parse_cmd "theorem ohno : \n\
-- commented out:\n\
1 = 1 := trivial"
/-!
No newline before but a newline after, puts hard newline after.
-/
/--
info: theorem ohno : /- commented out: -/
1 = 1 :=
trivial
-/
#guard_msgs in #parse_cmd "theorem ohno : /- commented out: -/\n\
1 = 1 := trivial"
/-!
Now newlines before or after, puts no newlines.
-/
/--
info: theorem ohno : /- commented out: -/ 1 = 1 :=
trivial
-/
#guard_msgs in #parse_cmd "theorem ohno : /- commented out: -/ 1 = 1 := trivial"
/-!
Inserts whitespace before comment, but not a hard newline.
There's a hard newline after the comment.
-/
/--
info: ‹∀ x : , -- commented out:
x = x
-/
#guard_msgs in #parse_term "∀ x : ,-- commented out:\n\
x = x"
/-!
When it's wide enough, gets a discretionary newline after.
-/
/--
info: ‹∀ xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx, /- commented out: -/
x = x
-/
#guard_msgs in
#parse_term "∀ xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx,/- commented out: -/x = x"
/-!
When it's wide enough, gets a discretionary newline before.
-/
/--
info: ‹∀ xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx,
/- commented out: -/ x = x
-/
#guard_msgs in
#parse_term "∀ xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx,/- commented out: -/x = x"
/-!
Hard newlines before and after in terms.
-/
/--
info: ‹∀ x : ,
-- commented out:
x = x
-/
#guard_msgs in #parse_term "∀ x : , \n\
-- commented out:\n\
x = x"
/-!
No newlines before or after, but discretionary whitespace.
-/
/-- info: ‹∀ x : , /- commented out: -/ x = x -/
#guard_msgs in #parse_term "∀ x : , \
/- commented out: -/\
x = x"
/-!
Inline comments
-/
/-- info: [1, 2, 3 /- last comment -/ ] -/
#guard_msgs in #parse_term "[\n1,2,3 /- last comment -/]"
/--
info: [1, 2,
3 -- last comment
]
-/
#guard_msgs in #parse_term "[\n1,2,3 -- last comment\n]"
/-- info: (f /- c -/ x) -/
#guard_msgs in #parse_term "(f /- c -/ x)"
/-!
Comments after a term.
-/
/-- info: (x /-comment-/ ) -/
#guard_msgs in #parse_term "(x /-comment-/)"
/--
info: (x --comment
)
-/
#guard_msgs in #parse_term "(x --comment\n)"
/-!
Comments before a term. The `!` mode applies `updateLeading`, and the newline causes the comment
to be applied to the succeeding term.
-/
/--
info: (
/-comment-/ 1)
-/
#guard_msgs in #parse_term ! "(\n/-comment-/ 1)"
/--
info: (
-- comment
1)
-/
#guard_msgs in #parse_term ! "(\n-- comment\n 1)"

View file

@ -14,8 +14,9 @@ def checkDelab (e : Expr) (tgt? : Option Term) (name? : Option Name := none) : T
let stx ← delab e
match tgt? with
| some tgt =>
if toString (← PrettyPrinter.ppTerm stx) != toString (← PrettyPrinter.ppTerm tgt?.get!) then
throwError "{pfix} missed target\n{← PrettyPrinter.ppTerm stx}\n!=\n{← PrettyPrinter.ppTerm tgt}\n\nExpr: {e}\nType: {← inferType e}"
let tgt := tgt.raw.rewriteBottomUp Syntax.unsetTrailing
if toString (← PrettyPrinter.ppTerm stx) != toString (← PrettyPrinter.ppTerm ⟨tgt⟩) then
throwError "{pfix} missed target\n{← PrettyPrinter.ppTerm stx}\n!=\n{← PrettyPrinter.ppTerm ⟨tgt⟩}\n\nExpr: {e}\nType: {← inferType e}"
| _ => pure ()
let e' ←