diff --git a/src/Lean/PrettyPrinter/Formatter.lean b/src/Lean/PrettyPrinter/Formatter.lean index 35775eae87..dc5e8f2f40 100644 --- a/src/Lean/PrettyPrinter/Formatter.lean +++ b/src/Lean/PrettyPrinter/Formatter.lean @@ -31,7 +31,7 @@ structure Context where structure State where stxTrav : Syntax.Traverser /-- Textual content of `stack` up to the first whitespace (not enclosed in an escaped ident). We assume that the textual - content of `stack` is modified only by `pushText` and `pushLine`, so `leadWord` is adjusted there accordingly. -/ + content of `stack` is modified only by `push` and `pushWhitespace`, so `leadWord` is adjusted there accordingly. -/ leadWord : String := "" /-- When the `leadWord` is nonempty, whether it is an identifier. Identifiers get space inserted between them. -/ leadWordIdent : Bool := false @@ -351,18 +351,21 @@ def parseToken (s : String) : FormatterM ParserState := } ((← read).table) (Parser.mkParserState s) def pushToken (info : SourceInfo) (tk : String) (ident : Bool) : FormatterM Unit := do - match info with - | SourceInfo.original _ _ ss _ => - -- preserve non-whitespace content (i.e. comments) + if let SourceInfo.original _ _ ss _ := info then + -- preserve non-whitespace content (comments) let ss' := ss.trim - if !ss'.isEmpty then - let ws := { ss with startPos := ss'.stopPos } - if ws.contains '\n' then - push s!"\n{ss'}" + unless ss'.isEmpty do + let preNL := Substring.contains { ss with stopPos := ss'.startPos } '\n' + let postNL := Substring.contains { ss with startPos := ss'.stopPos } '\n' + if postNL then + pushWhitespace "\n" + else if !(← get).leadWord.isEmpty then + pushLine + push ss'.toString + if preNL then + pushWhitespace "\n" else - push s!" {ss'}" - modify fun st => { st with leadWord := "", leadWordIdent := false } - | _ => pure () + pushLine let st ← get -- If there is no space between `tk` and the next word, see if we should insert a discretionary space. @@ -400,21 +403,27 @@ def pushToken (info : SourceInfo) (tk : String) (ident : Bool) : FormatterM Unit push tk -- preserve special whitespace for tokens like ":=\n" modify fun st => { st with leadWord := if tk.trimLeft == tk then tk else "", leadWordIdent := ident } - match info with - | SourceInfo.original ss _ _ _ => - -- preserve non-whitespace content (i.e. comments) + if let SourceInfo.original ss _ _ _ := info then + -- preserve non-whitespace content (comments) let ss' := ss.trim - if !ss'.isEmpty then - let ws := { ss with startPos := ss'.stopPos } - if ws.contains '\n' then do - -- Indentation is automatically increased when entering a category, but comments should be aligned - -- with the actual token, so dedent - indent (push s!"{ss'}\n") (some ((0:Int) - Std.Format.getIndent (← getOptions))) - else - pushLine - push ss'.toString - modify fun st => { st with leadWord := "" } - | _ => pure () + unless ss'.isEmpty do + let preNL := Substring.contains { ss with stopPos := ss'.startPos } '\n' + let postNL := Substring.contains { ss with startPos := ss'.stopPos } '\n' + -- Indentation is automatically increased when entering a category, but comments should be aligned + -- with the actual token, so dedent + indent (indent := some (-Std.Format.getIndent (← getOptions))) do + if postNL then + pushWhitespace "\n" + else + pushLine + pushWhitespace ss'.toString + if preNL then + pushWhitespace "\n" + else + -- It is conceivable that the start of comment syntax could be misinterpreted as part of a token, + -- so add the beginning of it as the leadWord. + let ctk := ss' |>.takeWhile (!·.isWhitespace) |>.toString + modify fun st => { st with leadWord := ctk } @[combinator_formatter symbolNoAntiquot] def symbolNoAntiquot.formatter (sym : String) : Formatter := do diff --git a/tests/lean/Reformat.lean b/tests/lean/Reformat.lean index de42de2ade..8261dc1802 100644 --- a/tests/lean/Reformat.lean +++ b/tests/lean/Reformat.lean @@ -7,21 +7,28 @@ open Lean.Elab.Term open Std.Format open Std unsafe def main (args : List String) : IO Unit := do -let (debug, f) : Bool × String := match args with - | [f, "-d"] => (true, f) - | [f] => (false, f) - | _ => panic! "usage: file [-d]" -let env ← mkEmptyEnvironment -let stx ← Lean.Parser.testParseFile env args.head! -let (f, _) ← (tryFinally (PrettyPrinter.ppModule stx) printTraces).toIO { options := Options.empty.setBool `trace.PrettyPrinter.format debug, fileName := "", fileMap := default } { env := env } -IO.print f -let stx' ← Lean.Parser.testParseModule env args.head! (toString f) -if stx' != stx then - let stx := stx.raw.getArg 1 - let stx' := stx'.raw.getArg 1 - stx.getArgs.size.forM fun i _ => do - if stx.getArg i != stx'.getArg i then - throw $ IO.userError s!"reparsing failed:\n{stx.getArg i}\n{stx'.getArg i}" + let (debug, fn) : Bool × String := match args with + | [f, "-d"] => (true, f) + | [f] => (false, f) + | _ => panic! "usage: file [-d]" + let env ← mkEmptyEnvironment + let stx ← Lean.Parser.testParseFile env fn + let act : CoreM Format := do + withOptions (fun opts => + opts + -- Name sanitization clears inline comments attached to identifiers. + |>.set `pp.sanitizeNames false + |>.set `trace.PrettyPrinter.format debug) do + tryFinally (PrettyPrinter.ppModule stx) printTraces + let (f, _) ← act.toIO { fileName := "", fileMap := default } { env := env } + IO.print f + let stx' ← Lean.Parser.testParseModule env fn (toString f) + if stx' != stx then + let stx := stx.raw.getArg 1 + let stx' := stx'.raw.getArg 1 + stx.getArgs.size.forM fun i _ => do + if stx.getArg i != stx'.getArg i then + throw $ IO.userError s!"reparsing failed:\n{stx.getArg i}\n{stx'.getArg i}" -- abbreviated Prelude.lean, which can be parsed without elaboration #eval main ["Reformat/Input.lean"] diff --git a/tests/lean/Reformat.lean.expected.out b/tests/lean/Reformat.lean.expected.out index bbd2a710a4..f1ef8630d2 100644 --- a/tests/lean/Reformat.lean.expected.out +++ b/tests/lean/Reformat.lean.expected.out @@ -11,6 +11,7 @@ universe u v w def id {α : Sort u} (a : α) : α := a + /- The kernel definitional equality test (t =?= s) has special support for idDelta applications. It implements the following rules @@ -29,6 +30,7 @@ theorems generated by the equation Compiler. def idDelta {α : Sort u} (a : α) : α := a + /- `idRhs` is an auxiliary declaration used to implement "smart unfolding". It is used as a marker. -/ @[macro_inline, reducible] def idRhs (α : Sort u) (a : α) : α := @@ -107,6 +109,7 @@ def cast {α β : Sort u} (h : Eq α β) (a : α) : β := theorem congrArg {α : Sort u} {β : Sort v} {a₁ a₂ : α} (f : α → β) (h : Eq a₁ a₂) : Eq (f a₁) (f a₂) := h ▸ rfl + /- Initialize the Quotient Module, which effectively adds the following definitions: @@ -165,6 +168,7 @@ inductive Bool : Type export Bool (false true) + /- Remark: Subtype must take a Sort instead of Type because of the axiom strongIndefiniteDescription. -/ structure Subtype {α : Sort u} (p : α → Prop) := (val : α) @@ -190,6 +194,7 @@ def typedExpr (α : Sort u) (a : α) : α := def namedPattern {α : Sort u} (x a : α) : α := a + /- Auxiliary axiom used to implement `sorry`. -/ axiom sorryAx (α : Sort u) (synthetic := true) : α @@ -225,6 +230,7 @@ instance (α : Sort u) {β : α → Sort v} [(a : α) → Inhabited (β a)] : In structure PLift (α : Sort u) : Type u := up :: (down : α) + /- Bijection between α and PLift α -/ theorem PLift.upDown {α : Sort u} : ∀ (b : PLift α), Eq (up (down b)) b | up a => rfl @@ -232,6 +238,7 @@ theorem PLift.upDown {α : Sort u} : ∀ (b : PLift α), Eq (up (down b)) b theorem PLift.downUp {α : Sort u} (a : α) : Eq (down (up a)) a := rfl + /- Pointed types -/ structure PointedType := (type : Type u) @@ -244,6 +251,7 @@ instance : Inhabited PointedType.{u} := structure ULift.{r, s} (α : Type s) : Type (max s r) := up :: (down : α) + /- Bijection between α and ULift.{v} α -/ theorem ULift.upDown {α : Type u} : ∀ (b : ULift.{v} α), Eq (up (down b)) b | up a => rfl @@ -311,6 +319,7 @@ open BEq (beq) instance {α : Type u} [DecidableEq α] : BEq α := ⟨fun a b => decide (Eq a b)⟩ + -- We use "dependent" if-then-else to be able to communicate the if-then-else condition -- to the branches @[macro_inline] diff --git a/tests/lean/binopInfoTree.lean.expected.out b/tests/lean/binopInfoTree.lean.expected.out index cd792ab5d6..147f279334 100644 --- a/tests/lean/binopInfoTree.lean.expected.out +++ b/tests/lean/binopInfoTree.lean.expected.out @@ -10,10 +10,12 @@ 1 + 2 + 3 -- should propagate through multiple macro expansions + ===> binop% HAdd.hAdd✝ (1 + 2) 3 -- should propagate through multiple macro expansions + • [Term] 1 + 2 + 3 : Nat @ ⟨5, 7⟩†-⟨5, 16⟩† @ Lean.Elab.Term.Op.elabBinOp • [Term] 1 + 2 + 3 : Nat @ ⟨5, 7⟩†-⟨5, 16⟩† • [Term] 1 + 2 : Nat @ ⟨5, 7⟩-⟨5, 12⟩ @ «_aux_Init_Notation___macroRules_term_+__2» diff --git a/tests/lean/run/3791.lean b/tests/lean/run/3791.lean new file mode 100644 index 0000000000..70e8515653 --- /dev/null +++ b/tests/lean/run/3791.lean @@ -0,0 +1,165 @@ +import Lean +/-! +# Formatter: preserve hard newlines around comments (#3791) + +This is a regression test for https://github.com/leanprover/lean4/issues/3791 + +The issue was that the formatter was not preserving whether there were hard newlines +around comments in Syntax. + +Note: one should turn off the sanitizer when pretty printing user syntax, +since the sanitizer clears comments from all identifiers. +-/ + +open Lean + +deriving instance Repr for Std.Format.FlattenBehavior, Format + +elab "#parse_cmd " update?:("!")? dbg?:("?")? s:str : command => Elab.Command.liftTermElabM do + withOptions (fun opts => opts.set pp.sanitizeNames.name false) do + let s := s.getString + let mut cmd ← ofExcept <| Parser.runParserCategory (← getEnv) `command s + if update?.isSome then + cmd := cmd.updateLeading + let fmt ← PrettyPrinter.ppCommand ⟨cmd⟩ + if dbg?.isSome then + dbg_trace "{repr cmd}"; dbg_trace "{repr fmt}" + logInfo (toString f!"‹{fmt}›") + +elab "#parse_term " update?:("!")? dbg?:("?")? s:str : command => Elab.Command.liftTermElabM do + withOptions (fun opts => opts.set pp.sanitizeNames.name false) do + let s := s.getString + let mut term ← ofExcept <| Parser.runParserCategory (← getEnv) `term s + if update?.isSome then + term := term.updateLeading + let fmt ← PrettyPrinter.ppTerm ⟨term⟩ + if dbg?.isSome then + dbg_trace "{repr term}"; dbg_trace "{repr fmt}" + logInfo (toString f!"‹{fmt}›") + +/-! +Newline before and after comment, puts hard newlines before and after. +("commented out" comes from #3791; formerly `1 = 1` was being commented out by the comment +in the output.) +-/ +/-- +info: ‹theorem ohno : + -- commented out: + 1 = 1 := + trivial› +-/ +#guard_msgs in #parse_cmd "theorem ohno : \n\ + -- commented out:\n\ + 1 = 1 := trivial" + +/-! +No newline before but a newline after, puts hard newline after. +-/ +/-- +info: ‹theorem ohno : /- commented out: -/ + 1 = 1 := + trivial› +-/ +#guard_msgs in #parse_cmd "theorem ohno : /- commented out: -/\n\ + 1 = 1 := trivial" + +/-! +Now newlines before or after, puts no newlines. +-/ +/-- +info: ‹theorem ohno : /- commented out: -/ 1 = 1 := + trivial› +-/ +#guard_msgs in #parse_cmd "theorem ohno : /- commented out: -/ 1 = 1 := trivial" + +/-! +Inserts whitespace before comment, but not a hard newline. +There's a hard newline after the comment. +-/ +/-- +info: ‹∀ x : ℕ, -- commented out: + x = x› +-/ +#guard_msgs in #parse_term "∀ x : ℕ,-- commented out:\n\ + x = x" + +/-! +When it's wide enough, gets a discretionary newline after. +-/ +/-- +info: ‹∀ xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx, /- commented out: -/ + x = x› +-/ +#guard_msgs in +#parse_term "∀ xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx,/- commented out: -/x = x" + +/-! +When it's wide enough, gets a discretionary newline before. +-/ +/-- +info: ‹∀ xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx, + /- commented out: -/ x = x› +-/ +#guard_msgs in +#parse_term "∀ xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx,/- commented out: -/x = x" + +/-! +Hard newlines before and after in terms. +-/ +/-- +info: ‹∀ x : ℕ, + -- commented out: + x = x› +-/ +#guard_msgs in #parse_term "∀ x : ℕ, \n\ + -- commented out:\n\ + x = x" + +/-! +No newlines before or after, but discretionary whitespace. +-/ +/-- info: ‹∀ x : ℕ, /- commented out: -/ x = x› -/ +#guard_msgs in #parse_term "∀ x : ℕ, \ + /- commented out: -/\ + x = x" + +/-! +Inline comments +-/ +/-- info: ‹[1, 2, 3 /- last comment -/ ]› -/ +#guard_msgs in #parse_term "[\n1,2,3 /- last comment -/]" +/-- +info: ‹[1, 2, + 3 -- last comment + ]› +-/ +#guard_msgs in #parse_term "[\n1,2,3 -- last comment\n]" +/-- info: ‹(f /- c -/ x)› -/ +#guard_msgs in #parse_term "(f /- c -/ x)" + +/-! +Comments after a term. +-/ +/-- info: ‹(x /-comment-/ )› -/ +#guard_msgs in #parse_term "(x /-comment-/)" +/-- +info: ‹(x --comment + )› +-/ +#guard_msgs in #parse_term "(x --comment\n)" + +/-! +Comments before a term. The `!` mode applies `updateLeading`, and the newline causes the comment +to be applied to the succeeding term. +-/ +/-- +info: ‹( +/-comment-/ 1)› +-/ +#guard_msgs in #parse_term ! "(\n/-comment-/ 1)" +/-- +info: ‹( +-- comment +1)› +-/ +#guard_msgs in #parse_term ! "(\n-- comment\n 1)" diff --git a/tests/lean/run/PPTopDownAnalyze.lean b/tests/lean/run/PPTopDownAnalyze.lean index 5c34033539..770e2dfa44 100644 --- a/tests/lean/run/PPTopDownAnalyze.lean +++ b/tests/lean/run/PPTopDownAnalyze.lean @@ -14,8 +14,9 @@ def checkDelab (e : Expr) (tgt? : Option Term) (name? : Option Name := none) : T let stx ← delab e match tgt? with | some tgt => - if toString (← PrettyPrinter.ppTerm stx) != toString (← PrettyPrinter.ppTerm tgt?.get!) then - throwError "{pfix} missed target\n{← PrettyPrinter.ppTerm stx}\n!=\n{← PrettyPrinter.ppTerm tgt}\n\nExpr: {e}\nType: {← inferType e}" + let tgt := tgt.raw.rewriteBottomUp Syntax.unsetTrailing + if toString (← PrettyPrinter.ppTerm stx) != toString (← PrettyPrinter.ppTerm ⟨tgt⟩) then + throwError "{pfix} missed target\n{← PrettyPrinter.ppTerm stx}\n!=\n{← PrettyPrinter.ppTerm ⟨tgt⟩}\n\nExpr: {e}\nType: {← inferType e}" | _ => pure () let e' ←