From 2fe6d8a70b63eebcbcc240c60fa717f3f2b5e830 Mon Sep 17 00:00:00 2001 From: jrr6 <7482866+jrr6@users.noreply.github.com> Date: Thu, 26 Jun 2025 19:56:19 -0400 Subject: [PATCH] feat: add word-level hint suggestion diffs (#8574) This PR adds an additional diff mode to the error-message hint suggestion widget that displays diffs per word rather than per character. --- src/Lean/Meta/Hint.lean | 189 +++++++++++++++++++--- tests/lean/run/hintSuggestionMessage.lean | 56 ++++++- tests/lean/run/hintWordDiff.lean | 102 ++++++++++++ tests/lean/run/simpUnusedArgs.lean | 2 +- 4 files changed, 323 insertions(+), 26 deletions(-) create mode 100644 tests/lean/run/hintWordDiff.lean diff --git a/src/Lean/Meta/Hint.lean b/src/Lean/Meta/Hint.lean index 09a5d9bcbd..ee0005f995 100644 --- a/src/Lean/Meta/Hint.lean +++ b/src/Lean/Meta/Hint.lean @@ -102,15 +102,33 @@ private def mkDiffString (ds : Array (Diff.Action × String)) : String := | (.skip , s) => s rangeStrs.foldl (· ++ ·) "" +/-- The granularity at which to display an inline diff for a suggested edit. -/ +inductive DiffGranularity where + /-- Automatically select diff granularity based on edit distance. -/ + | auto + /-- Character-level diff. -/ + | char + /-- Diff using whitespace-separated tokens. -/ + | word + /-- + "Monolithic" diff: shows a deletion of the entire existing source, followed by an insertion of the + entire suggestion. + -/ + | all + /-- A code action suggestion associated with a hint in a message. -Refer to `TryThis.Suggestion`; this extends that structure with a `span?` field, allowing a single -hint to suggest modifications at different locations. If `span?` is not specified, then the syntax -reference provided to `MessageData.hint` will be used. +Refer to `TryThis.Suggestion`. This extends that structure with the following fields: +* `span?`: the span at which this suggestion should apply. This allows a single hint to suggest + modifications at different locations. If `span?` is not specified, then the syntax reference + provided to `MessageData.hint` will be used. +* `diffGranularity`: the granularity at which the diff for this suggestion should be rendered in the + Infoview. See `DiffMode` for the possible granularities. This is `.auto` by default. -/ structure Suggestion extends toTryThisSuggestion : TryThis.Suggestion where span? : Option Syntax := none + diffGranularity : DiffGranularity := .auto instance : Coe TryThis.SuggestionText Suggestion where coe t := { suggestion := t } @@ -119,34 +137,159 @@ instance : ToMessageData Suggestion where toMessageData s := toMessageData s.toTryThisSuggestion /-- -Produces a diff that splits either on characters, tokens, or not at all, depending on the edit -distance between the arguments. +Produces a diff that splits either on characters, tokens, or not at all, depending on the selected +`diffMode`. Guarantees that all actions in the output will be maximally grouped; that is, instead of returning `#[(.insert, "a"), (.insert, "b")]`, it will return `#[(.insert, "ab")]`. -/ -partial def readableDiff (s s' : String) : Array (Diff.Action × String) := - -- TODO: add additional diff granularities - let minLength := min s.length s'.length - -- The coefficient on this value can be tuned: - let maxCharDiffDistance := minLength +partial def readableDiff (s s' : String) (granularity : DiffGranularity := .auto) : Array (Diff.Action × String) := + match granularity with + | .char => charDiff + | .word => wordDiff + | .all => maxDiff + | .auto => + let minLength := min s.length s'.length + -- The coefficients on these values can be tuned: + let maxCharDiffDistance := minLength / 5 + let maxWordDiffDistance := minLength / 2 + (max s.length s'.length) / 2 - let charDiff := Diff.diff (splitChars s) (splitChars s') - -- Note: this is just a rough heuristic, since the diff has no notion of substitution - let approxEditDistance := charDiff.filter (·.1 != .skip) |>.size - let preStrDiff := joinEdits charDiff - -- Given that `Diff.diff` returns a minimal diff, any length-≤3 diff can only have edits at the - -- front and back, or at a single interior point. This will always be fairly readable (and - -- splitting by a larger unit would likely only be worse). Otherwise, we should only use the - -- per-character diff if the edit distance isn't so large that it will be hard to read: - if preStrDiff.size ≤ 3 || approxEditDistance ≤ maxCharDiffDistance then - preStrDiff.map fun (act, cs) => (act, String.mk cs.toList) - else - #[(.delete, s), (.insert, s')] + let charDiffRaw := Diff.diff (splitChars s) (splitChars s') + -- Note: this is just a rough heuristic, since the diff has no notion of substitution + let approxEditDistance := charDiffRaw.filter (·.1 != .skip) |>.size + let charArrDiff := joinEdits charDiffRaw + + -- Given that `Diff.diff` returns a minimal diff, any length-≤3 diff can only have edits at the + -- front and back, or at a single interior point. This will always be fairly readable (and + -- splitting by a larger unit would likely only be worse) + if charArrDiff.size ≤ 3 || approxEditDistance ≤ maxCharDiffDistance then + charArrDiff.map fun (act, cs) => (act, String.mk cs.toList) + else if approxEditDistance ≤ maxWordDiffDistance then + wordDiff + else + maxDiff where + /- + Note on whitespace insertion: + Because we display diffs fully inline, we must trade off between accurately rendering changes to + whitespace and accurately previewing what will be inserted. We err on the side of the latter. + Within a "run" of deletions or insertions, we maintain the whitespace from the deleted/inserted + text and mark it as a deletion/insertion. After an unchanged word or a substitution (i.e., a + deletion and insertion without an intervening unchanged word), we show a whitespace diff iff the + old whitespace did not contain a line break (as rendering a deleted newline still visually + suggests a line break in the new output); otherwise, we use the whitespace from the new version + but mark it as unchanged, since there was also whitespace here originally too. Within a + substitution, we omit whitespace entirely. After an insertion, we show the new whitespace and mark + it as an insertion. After a deletion, we render the old whitespace as a deletion unless it + contains a newline, for the same reason mentioned previously. + -/ + wordDiff := Id.run do + let (words, wss) := splitWords s + let (words', wss') := splitWords s' + let diff := Diff.diff words words' + let mut withWs := #[] + let mut (wssIdx, wss'Idx) := (0, 0) + let mut inSubst := false + for h : diffIdx in [:diff.size] do + let (a₁, s₁) := diff[diffIdx] + withWs := withWs.push (a₁, s₁) + if let some (a₂, s₂) := diff[diffIdx + 1]? then + match a₁, a₂ with + | .skip, .delete => + -- Unchanged word: show whitespace diff unless this is followed by a deleted terminal + -- substring of the old, in which case show the old whitespace (since there is no new) + let ws := wss[wssIdx]! + let wsDiff := if let some ws' := wss'[wss'Idx]? then + mkWhitespaceDiff ws ws' + else + #[(.delete, ws)] + withWs := withWs ++ wsDiff + wssIdx := wssIdx + 1 + wss'Idx := wss'Idx + 1 + | .skip, .skip | .skip, .insert => + -- Unchanged word: inverse of the above case: new has whitespace here, and old does too so + -- long as we haven't reached an appended terminal new portion + let ws' := wss'[wss'Idx]! + let wsDiff := if let some ws := wss[wssIdx]? then + mkWhitespaceDiff ws ws' + else + #[(.insert, ws')] + withWs := withWs ++ wsDiff + wssIdx := wssIdx + 1 + wss'Idx := wss'Idx + 1 + | .insert, .insert => + -- Insertion separator: include whitespace, and mark it as inserted + let ws := wss'[wss'Idx]! + withWs := withWs.push (.insert, ws) + wss'Idx := wss'Idx + 1 + | .insert, .skip => + -- End of insertion: if this was a substitution, new and old have whitespace here; if it + -- wasn't, only new has whitespace here + let ws' := wss'[wss'Idx]! + let wsDiff := if inSubst then + mkWhitespaceDiff wss[wssIdx]! ws' + else + #[(.insert, ws')] + withWs := withWs ++ wsDiff + wss'Idx := wss'Idx + 1 + if inSubst then wssIdx := wssIdx + 1 + inSubst := false + | .delete, .delete => + -- Deletion separator: include and mark as deleted + let ws := wss[wssIdx]! + withWs := withWs.push (.delete, ws) + wssIdx := wssIdx + 1 + | .delete, .skip => + -- End of deletion: include the deletion's whitespace as deleted iff it is not a newline + -- (see earlier note); in principle, we should never have a substitution ending with a + -- deletion (`diff` should prefer `a̵b̲` to `b̲a̵`), but we handle this in case `diff` changes + let ws := wss[wssIdx]! + unless inSubst || ws.contains '\n' do + withWs := withWs.push (.delete, ws) + wssIdx := wssIdx + 1 + if inSubst then wss'Idx := wss'Idx + 1 + inSubst := false + | .insert, .delete | .delete, .insert => + -- "Substitution point": don't include any whitespace, since we're switching this word + inSubst := true + withWs + |> joinEdits + |>.map fun (act, ss) => (act, ss.foldl (· ++ ·) "") + + charDiff := + Diff.diff (splitChars s) (splitChars s') |> joinCharDiff + + /-- Given a `Char` diff, produces an equivalent `String` diff, joining actions of the same kind. -/ + joinCharDiff (d : Array (Diff.Action × Char)) := + joinEdits d |>.map fun (act, cs) => (act, String.mk cs.toList) + + maxDiff := + #[(.delete, s), (.insert, s')] + + mkWhitespaceDiff (oldWs newWs : String) := + if !oldWs.contains '\n' then + Diff.diff oldWs.data.toArray newWs.data.toArray |> joinCharDiff + else + #[(.skip, newWs)] + splitChars (s : String) : Array Char := s.toList.toArray + splitWords (s : String) : Array String × Array String := + splitWordsAux s 0 0 #[] #[] + + splitWordsAux (s : String) (b : String.Pos) (i : String.Pos) (r ws : Array String) : Array String × Array String := + if h : s.atEnd i then + (r.push (s.extract b i), ws) + else + have := Nat.sub_lt_sub_left (Nat.gt_of_not_le (mt decide_eq_true h)) (String.lt_next s _) + if (s.get i).isWhitespace then + let skipped := (Substring.mk s i s.endPos).takeWhile (·.isWhitespace) + let i' := skipped.stopPos + splitWordsAux s i' i' (r.push (s.extract b i)) (ws.push (s.extract i i')) + else + splitWordsAux s b (s.next i) r ws + joinEdits {α} (ds : Array (Diff.Action × α)) : Array (Diff.Action × Array α) := ds.foldl (init := #[]) fun acc (act, c) => if h : acc.isEmpty then @@ -178,7 +321,7 @@ def mkSuggestionsMessage (suggestions : Array Suggestion) let suggestionText := suggestionArr[0]!.2.1 let map ← getFileMap let rangeContents := Substring.mk map.source range.start range.stop |>.toString - let edits := readableDiff rangeContents suggestionText + let edits := readableDiff rangeContents suggestionText suggestion.diffGranularity let diffJson := mkDiffJson edits let json := json% { diff: $diffJson, diff --git a/tests/lean/run/hintSuggestionMessage.lean b/tests/lean/run/hintSuggestionMessage.lean index e056ed84fb..0f26fbd079 100644 --- a/tests/lean/run/hintSuggestionMessage.lean +++ b/tests/lean/run/hintSuggestionMessage.lean @@ -194,7 +194,7 @@ error: Invalid Hint: Use one of these instead • first second thir̵d̵ ̵f̵o̵u̵r̵t̵h̵ ̵f̵i̵f̵t̵h̵ - • f̵i̵r̵st̵ ̵s̵econd t̵h̵i̵r̵d̵ ̵fourth fifth + • f̵i̵r̵s̵t̵ ̵second t̵h̵i̵r̵d̵ ̵fourth fifth • f̵i̵r̵s̵t̵ ̵s̵e̵c̵o̵n̵d̵ ̵t̵h̵i̵r̵d̵ ̵f̵o̵u̵r̵t̵h̵ ̵f̵i̵f̵t̵h̵f̲i̲ ̲s̲e̲ ̲t̲h̲ ̲f̲o̲ ̲f̲i̲ • f̵i̵r̵s̵t̵ ̵s̵e̵c̵o̵n̵d̵ ̵t̵h̵i̵r̵d̵ ̵f̵o̵u̵r̵t̵h̵ ̵f̵i̵f̵t̵h̵s̲e̲c̲o̲n̲d̲ -/ @@ -211,7 +211,7 @@ error: Invalid Hint: Try one of these • let x := 4̵2̵ - ̵3̲1̲;̲ ̵2̵ ̵*̵ ̵x + ̵ ̵ ̵2̵ ̵*̵3̲1̲;̲ x • l̵e̵t̵ ̵x̵ ̵:̵=̵ ̵4̵2̵ ̵ ̵ ̵2̵ ̵*̵ ̵x̵l̲e̲t̲ ̲(̲a̲,̲ ̲b̲,̲ ̲c̲)̲ ̲:̲=̲ ̲(̲1̲,̲ ̲2̲,̲ ̲3̲)̲ ̲ ̲ ̲b̲ @@ -222,3 +222,55 @@ Hint: Try one of these #check suggest_replacement% let x := 42 2 * x + +/-! ## Forced diff granularities -/ + +namespace DiffGranularity +scoped syntax "select " "granularity " term : command + +open Elab in +elab_rules : command + | `(command| select granularity $t) => do + let tp := mkConst ``DiffGranularity + let g ← unsafe Command.runTermElabM (fun _ => + Term.elabTerm t tp >>= monadLift ∘ evalExpr DiffGranularity tp) + let hint ← Command.liftCoreM <| MessageData.hint m!"Hint" #[{ + suggestion := + if g matches .all then + -- Ensure we wouldn't have used `.all` by default + "selected granularity .all" + else + "many granularity words not matching the source" + diffGranularity := g + }] + logInfo hint + +/-- +info: + +Hint: Hint + s̵e̵m̲a̲n̲y̲ ̲g̲r̲a̲n̲u̲le̵c̵a̲r̲i̲ty̲ w̲o̲r̲d̲s̲ ̲n̲o̲t̲ ̲m̲a̲t̲c̲h̲i̲n̲gr̵a̵n̵u̵l̵a̵r̵i̵ ̲ty̵h̲e̲ .̵c̵h̵a̵s̲o̲u̲rc̲e̲ +-/ +#guard_msgs (whitespace := exact) in +select granularity .char + +/-- +info: + +Hint: Hint + s̵e̵l̵e̵c̵t̵m̲a̲n̲y̲ granularity .̵w̵o̵r̵d̵w̲o̲r̲d̲s̲ ̲n̲o̲t̲ ̲m̲a̲t̲c̲h̲i̲n̲g̲ ̲t̲h̲e̲ ̲s̲o̲u̲r̲c̲e̲ +-/ +#guard_msgs (whitespace := exact) in +select granularity .word + +/-- +info: + +Hint: Hint + s̵e̵l̵e̵c̵t̵ ̵g̵r̵a̵n̵u̵l̵a̵r̵i̵t̵y̵ ̵.̵a̵l̵l̵s̲e̲l̲e̲c̲t̲e̲d̲ ̲g̲r̲a̲n̲u̲l̲a̲r̲i̲t̲y̲ ̲.̲a̲l̲l̲ +-/ +#guard_msgs (whitespace := exact) in +select granularity .all + + +end DiffGranularity diff --git a/tests/lean/run/hintWordDiff.lean b/tests/lean/run/hintWordDiff.lean new file mode 100644 index 0000000000..b9c48b2c5b --- /dev/null +++ b/tests/lean/run/hintWordDiff.lean @@ -0,0 +1,102 @@ +import Lean.Meta.Hint + +/-! # Word-level diffs in hint suggestions + +Word-level diffs in hint suggestions require careful display of whitespace, since it is frequently +impossible to both accurately render both deleted and inserted whitespace while also displaying a +snippet that recognizably maps to what will be inserted. These tests exercise the behavior of the +word-level diff generator to ensure that it is reasonable. +-/ + +open Lean Meta Hint + +-- We reproduce this function here because it is private in `Lean.Meta.Hint` +private def mkDiffString (ds : Array (Diff.Action × String)) : String := + let rangeStrs := ds.map fun + | (.insert, s) => String.mk (s.data.flatMap ([·, '\u0332'])) -- U+0332 Combining Low Line + | (.delete, s) => String.mk (s.data.flatMap ([·, '\u0335'])) -- U+0335 Combining Short Stroke Overlay + | (.skip , s) => s + rangeStrs.foldl (· ++ ·) "" + +/-! ## Test Word Diff Behavior -/ + +/-- info: "one two ̲t̲h̲r̲e̲e̲" -/ +#guard_msgs in +#eval readableDiff.wordDiff "one two" "one two three" |> mkDiffString + +/-- info: "o̵n̵e̵a̲ two ̲t̲h̲r̲e̲e̲" -/ +#guard_msgs in +#eval readableDiff.wordDiff "one\ntwo" "a two three" |> mkDiffString + +/-- info: "o̵n̵e̵a̲ ̵\n̲two ̲t̲h̲r̲e̲e̲" -/ +#guard_msgs in +#eval readableDiff.wordDiff "one two" "a\ntwo three" |> mkDiffString + +/-- info: "one two" -/ +#guard_msgs in +#eval readableDiff.wordDiff "one\ntwo" "one two" |> mkDiffString + +/-- info: "one t̵w̵o̵ ̵three" -/ +#guard_msgs in +#eval readableDiff.wordDiff "one two three" "one three" |> mkDiffString + +/-- info: "a ̵b" -/ +#guard_msgs in +#eval readableDiff.wordDiff "a b" "a b" |> mkDiffString + +/-- info: "a ̵b̵" -/ +#guard_msgs in +#eval readableDiff.wordDiff "a b" "a" |> mkDiffString + +/-- info: "a̵b c" -/ +#guard_msgs in +#eval readableDiff.wordDiff "a\nb c" "b c" |> mkDiffString + +/-- +info: "a l̵o̵n̵g̵e̵r̵l̲o̲n̲g̲\tstring w̵i̵t̵h̵ ̵ ̵ ̵w̵h̵i̵t̵e̵s̵p̵a̵c̵e̵in strange ̵\n̲\n̲p̵l̵a̵c̵e̵s̵\n̵a̵n̵d̵ ̵u̵n̵u̵s̵u̵a̵l̵spaces" +-/ +#guard_msgs in +#eval readableDiff.wordDiff + "a longer\nstring with whitespace\n in strange\nplaces and\n\nunusual\tspaces" + "a long\tstring in strange\n\nspaces" + |> mkDiffString + +/-- +info: def f (̵x̵ ̵:̵ ̵N̵a̵t̵)̵x̲ := x + 1 +#check let c̵x̲ := 3̵1̵5̲ in + f c̵x̲ +-/ +#guard_msgs in +#eval IO.println <| readableDiff.wordDiff + "def f (x : Nat) := x + 1\n#check let c := 31 in\n f c" + "def f x := x + 1\n#check let x := 5 in\n f x" + |> mkDiffString + +/-! ## Test Word Granularity Selection -/ + +/-- info: "simp [mergeTR.go, m̵e̵r̵g̵e̵,̵ ̵reverseAux_eq]" -/ +#guard_msgs in +#eval readableDiff + "simp [mergeTR.go, merge, reverseAux_eq]" "simp [mergeTR.go, reverseAux_eq]" |> mkDiffString + +/-- +info: "r̵w̵s̲i̲m̲p̲ only [̵m̵e̵r̵g̵e̵T̵R̵.̵g̵o̵]̵[̲m̲e̲r̲g̲e̲T̲R̲.̲g̲o̲,̲ ̲m̲e̲r̲g̲e̲,̲ ̲r̲e̲v̲e̲r̲s̲e̲A̲u̲x̲_̲e̲q̲]̲" +-/ +#guard_msgs in +#eval readableDiff + "rw only [mergeTR.go]" + "simp only [mergeTR.go, merge, reverseAux_eq]" |> mkDiffString + +/-! ## Test Whitespace-Diff Handling -/ + +/-- info: " ̵rw ̵ ̵ ̵only [h]" -/ +#guard_msgs in +#eval readableDiff.wordDiff " rw only [h]" " rw only [h]" |> mkDiffString + +/-- info: " ̲ ̲w1 w2 ̵ ̵ ̵\t̲w3 ̲ ̲ ̲ ̲ ̲ ̲ ̲w̲4̲" -/ +#guard_msgs in +#eval readableDiff.wordDiff "w1\nw2 w3" " w1 w2\tw3 w4" |> mkDiffString + +/-- info: " ̵ ̵x ̲ ̲ ̲ ̲ ̲y̵ ̵ ̵ ̵z ̵" -/ +#guard_msgs in +#eval readableDiff.wordDiff " x y z " "x z" |> mkDiffString diff --git a/tests/lean/run/simpUnusedArgs.lean b/tests/lean/run/simpUnusedArgs.lean index 8616d55858..da51fdbd2b 100644 --- a/tests/lean/run/simpUnusedArgs.lean +++ b/tests/lean/run/simpUnusedArgs.lean @@ -72,7 +72,7 @@ warning: This simp argument is unused: (some_def.eq_def) Hint: Omit it from the simp argument list. - simp [some_def, (̵some_def.eq_def)̵,̵ ̵s̵o̵m̵e̵_̵d̵e̵f̵.̵e̵q̵_̵d̵e̵f̵] + simp [some_def, (̵s̵o̵m̵e̵_̵d̵e̵f̵.̵e̵q̵_̵d̵e̵f̵)̵,̵ ̵some_def.eq_def] note: this linter can be disabled with `set_option linter.unusedSimpArgs false` --- warning: This simp argument is unused: