diff --git a/src/Lean/Elab/DocString/Builtin.lean b/src/Lean/Elab/DocString/Builtin.lean index 093dbaa447..42a977ff63 100644 --- a/src/Lean/Elab/DocString/Builtin.lean +++ b/src/Lean/Elab/DocString/Builtin.lean @@ -917,7 +917,11 @@ def lean (name : Option Ident := none) (error warning : flag false) («show» : let scopes := (← get).scopes let (cmds, cmdState, trees) ← withSaveInfoContext do let mut cmdState : Command.State := { env, maxRecDepth := ← MonadRecDepth.getMaxRecDepth, scopes } - let mut pstate : Parser.ModuleParserState := {pos := pos, recovering := false} + let mut pstate : Parser.ModuleParserState := { + pos + recovering := false + hasLeading := false + } let mut cmds := #[] repeat let scope := cmdState.scopes.head! diff --git a/src/Lean/Parser/Module.lean b/src/Lean/Parser/Module.lean index cd82f7496c..61d8f4e236 100644 --- a/src/Lean/Parser/Module.lean +++ b/src/Lean/Parser/Module.lean @@ -26,7 +26,12 @@ end Module structure ModuleParserState where pos : String.Pos.Raw := 0 - recovering : Bool := false + recovering : Bool := false + /-- + Whether there is leading whitespace before `pos`. + Used to associate whitespace at the start of the file with the first token of the file. + -/ + hasLeading : Bool := true deriving Inhabited private partial def mkErrorMessage (c : InputContext) (pos : String.Pos.Raw) (stk : SyntaxStack) (e : Parser.Error) : Message := Id.run do @@ -60,6 +65,12 @@ where if let .original (trailing := trailing) .. := stx.getTailInfo then pure (some trailing) else none +private def setStartOfFileLeading (stx : Syntax) : Syntax × Bool := Id.run do + let some (.original leading pos trailing endPos) ← stx.getHeadInfo? + | return (stx, false) + let info := .original { leading with startPos := 0 } pos trailing endPos + return (stx.setHeadInfo info, true) + def parseHeader (inputCtx : InputContext) : IO (TSyntax ``Module.header × ModuleParserState × MessageLog) := do let dummyEnv ← mkEmptyEnvironment let p := andthenFn whitespace Module.header.fn @@ -96,10 +107,12 @@ def parseHeader (inputCtx : InputContext) : IO (TSyntax ``Module.header × Modul consider using separate `public import {mod}` and `import all {mod}` directives \ in order to import public data into the public scope and private data into the \ private scope." - pure (⟨stx⟩, {pos := s.pos, recovering := s.hasError}, messages) + let (stx, isLeadingSet) := setStartOfFileLeading stx + pure (⟨stx⟩, { pos := s.pos, recovering := s.hasError, hasLeading := ! isLeadingSet }, messages) -private def mkEOI (pos : String.Pos.Raw) : Syntax := - let atom := mkAtom (SourceInfo.original "".toRawSubstring pos "".toRawSubstring pos) "" +private def mkEOI (inputCtx : InputContext) (pos : String.Pos.Raw) : Syntax := + let emptyAt := inputCtx.substring pos pos + let atom := mkAtom (SourceInfo.original emptyAt pos emptyAt pos) "" mkNode ``Command.eoi #[atom] def isTerminalCommand (s : Syntax) : Bool := @@ -122,7 +135,7 @@ partial def parseCommand (inputCtx : InputContext) (pmctx : ParserModuleContext) let mut stx := Syntax.missing -- will always be assigned below repeat if inputCtx.atEnd pos then - stx := mkEOI pos + stx := mkEOI inputCtx pos break let pos' := pos let p := andthenFn whitespace topLevelCommandParserFn @@ -155,7 +168,10 @@ partial def parseCommand (inputCtx : InputContext) (pmctx : ParserModuleContext) else stx := s.stxStack.back break - return (stx, { pos, recovering }, messages) + if mps.hasLeading then + -- File header was empty - set start-of-file leading whitespace on first token in `stx` + (stx, _) := setStartOfFileLeading stx + return (stx, { pos, recovering, hasLeading := false }, messages) -- only useful for testing since most Lean files cannot be parsed without elaboration @@ -165,7 +181,7 @@ partial def testParseModuleAux (env : Environment) (inputCtx : InputContext) (s | (stx, state, msgs) => if isTerminalCommand stx then if !msgs.hasUnreported then - pure stxs + pure (stxs.push stx) else do msgs.forM fun msg => msg.toString >>= IO.println throw (IO.userError "failed to parse file") @@ -173,14 +189,14 @@ partial def testParseModuleAux (env : Environment) (inputCtx : InputContext) (s parse state msgs (stxs.push stx) parse s msgs stxs -def testParseModule (env : Environment) (fname contents : String) : IO (TSyntax ``Parser.Module.module) := do +def testParseModule (env : Environment) (fname contents : String) : IO Syntax := do let inputCtx := mkInputContext contents fname let (header, state, messages) ← parseHeader inputCtx let cmds ← testParseModuleAux env inputCtx state messages #[] let stx := mkNode `Lean.Parser.Module.module #[header, mkListNode cmds] - pure ⟨stx.raw.updateLeading⟩ + pure stx -def testParseFile (env : Environment) (fname : System.FilePath) : IO (TSyntax ``Parser.Module.module) := do +def testParseFile (env : Environment) (fname : System.FilePath) : IO Syntax := do let contents ← IO.FS.readFile fname testParseModule env fname.toString contents diff --git a/tests/elab/Reformat.lean b/tests/elab/Reformat.lean index 544b85e500..9a16c73ce9 100644 --- a/tests/elab/Reformat.lean +++ b/tests/elab/Reformat.lean @@ -12,20 +12,27 @@ unsafe def main (args : List String) : IO Unit := do | [f] => (false, f) | _ => panic! "usage: file [-d]" let env ← mkEmptyEnvironment - let stx ← Lean.Parser.testParseFile env fn + let stx₀ ← Lean.Parser.testParseFile env fn + -- `testParseFile` matches the real pipeline (no `updateLeading`), but reformatting + -- needs leading info to correctly place inter-declaration comments, just like + -- `script/reformat.lean` which explicitly calls `updateLeading`. + let stx : Syntax := stx₀.updateLeading let act : CoreM Format := do withOptions (fun opts => opts -- Name sanitization clears inline comments attached to identifiers. |>.set `pp.sanitizeNames false |>.set `trace.PrettyPrinter.format debug) do - tryFinally (PrettyPrinter.ppModule stx) printTraces + tryFinally (PrettyPrinter.ppModule ⟨stx⟩) printTraces let (f, _) ← act.toIO { fileName := "", fileMap := default } { env := env } + -- `testParseModule` includes the EOI token in the command list (matching the real + -- pipeline), so `ppModule` emits two trailing newlines via the `ppLine >> ppLine` + -- separator for EOI's empty command text. IO.print f let stx' ← Lean.Parser.testParseModule env fn (toString f) if stx' != stx then - let stx := stx.raw.getArg 1 - let stx' := stx'.raw.getArg 1 + let stx := stx.getArg 1 + let stx' := stx'.getArg 1 stx.getArgs.size.forM fun i _ => do if stx.getArg i != stx'.getArg i then throw $ IO.userError s!"reparsing failed:\n{stx.getArg i}\n{stx'.getArg i}" diff --git a/tests/elab/Reformat.lean.out.expected b/tests/elab/Reformat.lean.out.expected index f1ef8630d2..2f5a1d8483 100644 --- a/tests/elab/Reformat.lean.out.expected +++ b/tests/elab/Reformat.lean.out.expected @@ -326,3 +326,5 @@ instance {α : Type u} [DecidableEq α] : BEq α := def dite {α : Sort u} (c : Prop) [h : Decidable c] (t : c → α) (e : Not c → α) : α := Decidable.casesOn (motive := fun _ => α) h e t + + diff --git a/tests/elab/Reparen.lean b/tests/elab/Reparen.lean index f9a95be3d6..765e0c91e5 100644 --- a/tests/elab/Reparen.lean +++ b/tests/elab/Reparen.lean @@ -33,10 +33,13 @@ let (debug, f) : Bool × String := match args with | _ => panic! "usage: file [-d]"; let env ← mkEmptyEnvironment; let stx ← Lean.Parser.testParseFile env args.head!; -let header := stx.raw.getArg 0; +-- `testParseFile` matches the real pipeline (no `updateLeading`), but reprinting +-- needs leading info to correctly place inter-declaration whitespace. +let stx := stx.updateLeading; +let header := stx.getArg 0; let some s ← pure header.reprint | throw $ IO.userError "header reprint failed"; IO.print s; -let cmds := (stx.raw.getArg 1).getArgs; +let cmds := (stx.getArg 1).getArgs; cmds.forM $ fun cmd => do let cmd := unparen cmd; let (cmd, _) ← (tryFinally (PrettyPrinter.parenthesizeCommand cmd) printTraces).toIO { options := Options.empty.set `trace.PrettyPrinter.parenthesize debug, fileName := "", fileMap := default } { env := env }; diff --git a/tests/lean/run/moduleHeaderLeadingWhitespace.lean b/tests/lean/run/moduleHeaderLeadingWhitespace.lean new file mode 100644 index 0000000000..ac11f46942 --- /dev/null +++ b/tests/lean/run/moduleHeaderLeadingWhitespace.lean @@ -0,0 +1,95 @@ +-- Tests that file-start whitespace and comments are associated with the leading +-- of the first actual token in the file (in the module header or first command). +import Lean + +open Lean Parser + +/-- Parse a header string and return the leading Substring.Raw of the first token. -/ +def getFirstTokenLeading (input : String) : IO (Option Substring.Raw) := do + let inputCtx := mkInputContext input "" + let (header, _, _) ← parseHeader inputCtx + return match header.raw.getHeadInfo? with + | some (.original leading ..) => some leading + | _ => none + +-- Test 1: file-start line comment before import +-- The leading of the `import` keyword should start at position 0 and contain the comment. +/-- +info: leading startPos: 0 +leading content: -- file-start comment + -/ +#guard_msgs in +#eval do + let some leading ← getFirstTokenLeading "-- file-start comment\nimport Foo\n" + | IO.println "no leading found" + IO.println s!"leading startPos: {leading.startPos.byteIdx}" + IO.println s!"leading content: {Substring.Raw.toString leading}" + +-- Test 2: file-start block comment before import +/-- +info: leading startPos: 0 +leading content: /- block comment -/ + -/ +#guard_msgs in +#eval do + let some leading ← getFirstTokenLeading "/- block comment -/\nimport Foo\n" + | IO.println "no leading found" + IO.println s!"leading startPos: {leading.startPos.byteIdx}" + IO.println s!"leading content: {Substring.Raw.toString leading}" + +-- Test 3: no file-start whitespace (import is the very first thing) +-- The leading should start at position 0 with stopPos 0 (empty leading). +/-- +info: leading startPos: 0 +leading stopPos: 0 -/ +#guard_msgs in +#eval do + let some leading ← getFirstTokenLeading "import Foo\n" + | IO.println "no leading found" + IO.println s!"leading startPos: {leading.startPos.byteIdx}" + IO.println s!"leading stopPos: {leading.stopPos.byteIdx}" + +-- Test 4: file with only newlines at the start (no comments) +/-- +info: leading startPos: 0 +leading content: + + -/ +#guard_msgs in +#eval do + let some leading ← getFirstTokenLeading "\n\nimport Foo\n" + | IO.println "no leading found" + IO.println s!"leading startPos: {leading.startPos.byteIdx}" + IO.println s!"leading content: {Substring.Raw.toString leading}" + +-- Test 5: entirely empty file (only a comment, no header or body) +-- The EOI token's leading should include the file-start comment. +/-- +info: first token leading startPos: 0 +first token leading content: -- just a comment + -/ +#guard_msgs in +#eval do + let env ← mkEmptyEnvironment + let stx ← testParseModule env "" "-- just a comment\n" + match stx.getHeadInfo? with + | some (.original leading ..) => + IO.println s!"first token leading startPos: {leading.startPos.byteIdx}" + IO.println s!"first token leading content: {Substring.Raw.toString leading}" + | _ => IO.println "no original leading found" + +-- Test 6: empty header (no imports) - file-start comment before a def +-- The leading of the first command token should include the file-start comment. +/-- +info: first token leading startPos: 0 +first token leading content: -- file-start comment + -/ +#guard_msgs in +#eval do + let env ← mkEmptyEnvironment + let stx ← testParseModule env "" "-- file-start comment\ndef foo := 1\n" + match stx.getHeadInfo? with + | some (.original leading ..) => + IO.println s!"first token leading startPos: {leading.startPos.byteIdx}" + IO.println s!"first token leading content: {Substring.Raw.toString leading}" + | _ => IO.println "no original leading found"