feat: add word-level hint suggestion diffs (#8574)

This PR adds an additional diff mode to the error-message hint
suggestion widget that displays diffs per word rather than per
character.
This commit is contained in:
jrr6 2025-06-26 19:56:19 -04:00 committed by GitHub
parent b1a306cf69
commit 2fe6d8a70b
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
4 changed files with 323 additions and 26 deletions

View file

@ -102,15 +102,33 @@ private def mkDiffString (ds : Array (Diff.Action × String)) : String :=
| (.skip , s) => s
rangeStrs.foldl (· ++ ·) ""
/-- The granularity at which to display an inline diff for a suggested edit. -/
inductive DiffGranularity where
/-- Automatically select diff granularity based on edit distance. -/
| auto
/-- Character-level diff. -/
| char
/-- Diff using whitespace-separated tokens. -/
| word
/--
"Monolithic" diff: shows a deletion of the entire existing source, followed by an insertion of the
entire suggestion.
-/
| all
/--
A code action suggestion associated with a hint in a message.
Refer to `TryThis.Suggestion`; this extends that structure with a `span?` field, allowing a single
hint to suggest modifications at different locations. If `span?` is not specified, then the syntax
reference provided to `MessageData.hint` will be used.
Refer to `TryThis.Suggestion`. This extends that structure with the following fields:
* `span?`: the span at which this suggestion should apply. This allows a single hint to suggest
modifications at different locations. If `span?` is not specified, then the syntax reference
provided to `MessageData.hint` will be used.
* `diffGranularity`: the granularity at which the diff for this suggestion should be rendered in the
Infoview. See `DiffMode` for the possible granularities. This is `.auto` by default.
-/
structure Suggestion extends toTryThisSuggestion : TryThis.Suggestion where
span? : Option Syntax := none
diffGranularity : DiffGranularity := .auto
instance : Coe TryThis.SuggestionText Suggestion where
coe t := { suggestion := t }
@ -119,34 +137,159 @@ instance : ToMessageData Suggestion where
toMessageData s := toMessageData s.toTryThisSuggestion
/--
Produces a diff that splits either on characters, tokens, or not at all, depending on the edit
distance between the arguments.
Produces a diff that splits either on characters, tokens, or not at all, depending on the selected
`diffMode`.
Guarantees that all actions in the output will be maximally grouped; that is, instead of returning
`#[(.insert, "a"), (.insert, "b")]`, it will return `#[(.insert, "ab")]`.
-/
partial def readableDiff (s s' : String) : Array (Diff.Action × String) :=
-- TODO: add additional diff granularities
let minLength := min s.length s'.length
-- The coefficient on this value can be tuned:
let maxCharDiffDistance := minLength
partial def readableDiff (s s' : String) (granularity : DiffGranularity := .auto) : Array (Diff.Action × String) :=
match granularity with
| .char => charDiff
| .word => wordDiff
| .all => maxDiff
| .auto =>
let minLength := min s.length s'.length
-- The coefficients on these values can be tuned:
let maxCharDiffDistance := minLength / 5
let maxWordDiffDistance := minLength / 2 + (max s.length s'.length) / 2
let charDiff := Diff.diff (splitChars s) (splitChars s')
-- Note: this is just a rough heuristic, since the diff has no notion of substitution
let approxEditDistance := charDiff.filter (·.1 != .skip) |>.size
let preStrDiff := joinEdits charDiff
-- Given that `Diff.diff` returns a minimal diff, any length-≤3 diff can only have edits at the
-- front and back, or at a single interior point. This will always be fairly readable (and
-- splitting by a larger unit would likely only be worse). Otherwise, we should only use the
-- per-character diff if the edit distance isn't so large that it will be hard to read:
if preStrDiff.size ≤ 3 || approxEditDistance ≤ maxCharDiffDistance then
preStrDiff.map fun (act, cs) => (act, String.mk cs.toList)
else
#[(.delete, s), (.insert, s')]
let charDiffRaw := Diff.diff (splitChars s) (splitChars s')
-- Note: this is just a rough heuristic, since the diff has no notion of substitution
let approxEditDistance := charDiffRaw.filter (·.1 != .skip) |>.size
let charArrDiff := joinEdits charDiffRaw
-- Given that `Diff.diff` returns a minimal diff, any length-≤3 diff can only have edits at the
-- front and back, or at a single interior point. This will always be fairly readable (and
-- splitting by a larger unit would likely only be worse)
if charArrDiff.size ≤ 3 || approxEditDistance ≤ maxCharDiffDistance then
charArrDiff.map fun (act, cs) => (act, String.mk cs.toList)
else if approxEditDistance ≤ maxWordDiffDistance then
wordDiff
else
maxDiff
where
/-
Note on whitespace insertion:
Because we display diffs fully inline, we must trade off between accurately rendering changes to
whitespace and accurately previewing what will be inserted. We err on the side of the latter.
Within a "run" of deletions or insertions, we maintain the whitespace from the deleted/inserted
text and mark it as a deletion/insertion. After an unchanged word or a substitution (i.e., a
deletion and insertion without an intervening unchanged word), we show a whitespace diff iff the
old whitespace did not contain a line break (as rendering a deleted newline still visually
suggests a line break in the new output); otherwise, we use the whitespace from the new version
but mark it as unchanged, since there was also whitespace here originally too. Within a
substitution, we omit whitespace entirely. After an insertion, we show the new whitespace and mark
it as an insertion. After a deletion, we render the old whitespace as a deletion unless it
contains a newline, for the same reason mentioned previously.
-/
wordDiff := Id.run do
let (words, wss) := splitWords s
let (words', wss') := splitWords s'
let diff := Diff.diff words words'
let mut withWs := #[]
let mut (wssIdx, wss'Idx) := (0, 0)
let mut inSubst := false
for h : diffIdx in [:diff.size] do
let (a₁, s₁) := diff[diffIdx]
withWs := withWs.push (a₁, s₁)
if let some (a₂, s₂) := diff[diffIdx + 1]? then
match a₁, a₂ with
| .skip, .delete =>
-- Unchanged word: show whitespace diff unless this is followed by a deleted terminal
-- substring of the old, in which case show the old whitespace (since there is no new)
let ws := wss[wssIdx]!
let wsDiff := if let some ws' := wss'[wss'Idx]? then
mkWhitespaceDiff ws ws'
else
#[(.delete, ws)]
withWs := withWs ++ wsDiff
wssIdx := wssIdx + 1
wss'Idx := wss'Idx + 1
| .skip, .skip | .skip, .insert =>
-- Unchanged word: inverse of the above case: new has whitespace here, and old does too so
-- long as we haven't reached an appended terminal new portion
let ws' := wss'[wss'Idx]!
let wsDiff := if let some ws := wss[wssIdx]? then
mkWhitespaceDiff ws ws'
else
#[(.insert, ws')]
withWs := withWs ++ wsDiff
wssIdx := wssIdx + 1
wss'Idx := wss'Idx + 1
| .insert, .insert =>
-- Insertion separator: include whitespace, and mark it as inserted
let ws := wss'[wss'Idx]!
withWs := withWs.push (.insert, ws)
wss'Idx := wss'Idx + 1
| .insert, .skip =>
-- End of insertion: if this was a substitution, new and old have whitespace here; if it
-- wasn't, only new has whitespace here
let ws' := wss'[wss'Idx]!
let wsDiff := if inSubst then
mkWhitespaceDiff wss[wssIdx]! ws'
else
#[(.insert, ws')]
withWs := withWs ++ wsDiff
wss'Idx := wss'Idx + 1
if inSubst then wssIdx := wssIdx + 1
inSubst := false
| .delete, .delete =>
-- Deletion separator: include and mark as deleted
let ws := wss[wssIdx]!
withWs := withWs.push (.delete, ws)
wssIdx := wssIdx + 1
| .delete, .skip =>
-- End of deletion: include the deletion's whitespace as deleted iff it is not a newline
-- (see earlier note); in principle, we should never have a substitution ending with a
-- deletion (`diff` should prefer `a̵b̲` to `b̲a̵`), but we handle this in case `diff` changes
let ws := wss[wssIdx]!
unless inSubst || ws.contains '\n' do
withWs := withWs.push (.delete, ws)
wssIdx := wssIdx + 1
if inSubst then wss'Idx := wss'Idx + 1
inSubst := false
| .insert, .delete | .delete, .insert =>
-- "Substitution point": don't include any whitespace, since we're switching this word
inSubst := true
withWs
|> joinEdits
|>.map fun (act, ss) => (act, ss.foldl (· ++ ·) "")
charDiff :=
Diff.diff (splitChars s) (splitChars s') |> joinCharDiff
/-- Given a `Char` diff, produces an equivalent `String` diff, joining actions of the same kind. -/
joinCharDiff (d : Array (Diff.Action × Char)) :=
joinEdits d |>.map fun (act, cs) => (act, String.mk cs.toList)
maxDiff :=
#[(.delete, s), (.insert, s')]
mkWhitespaceDiff (oldWs newWs : String) :=
if !oldWs.contains '\n' then
Diff.diff oldWs.data.toArray newWs.data.toArray |> joinCharDiff
else
#[(.skip, newWs)]
splitChars (s : String) : Array Char :=
s.toList.toArray
splitWords (s : String) : Array String × Array String :=
splitWordsAux s 0 0 #[] #[]
splitWordsAux (s : String) (b : String.Pos) (i : String.Pos) (r ws : Array String) : Array String × Array String :=
if h : s.atEnd i then
(r.push (s.extract b i), ws)
else
have := Nat.sub_lt_sub_left (Nat.gt_of_not_le (mt decide_eq_true h)) (String.lt_next s _)
if (s.get i).isWhitespace then
let skipped := (Substring.mk s i s.endPos).takeWhile (·.isWhitespace)
let i' := skipped.stopPos
splitWordsAux s i' i' (r.push (s.extract b i)) (ws.push (s.extract i i'))
else
splitWordsAux s b (s.next i) r ws
joinEdits {α} (ds : Array (Diff.Action × α)) : Array (Diff.Action × Array α) :=
ds.foldl (init := #[]) fun acc (act, c) =>
if h : acc.isEmpty then
@ -178,7 +321,7 @@ def mkSuggestionsMessage (suggestions : Array Suggestion)
let suggestionText := suggestionArr[0]!.2.1
let map ← getFileMap
let rangeContents := Substring.mk map.source range.start range.stop |>.toString
let edits := readableDiff rangeContents suggestionText
let edits := readableDiff rangeContents suggestionText suggestion.diffGranularity
let diffJson := mkDiffJson edits
let json := json% {
diff: $diffJson,

View file

@ -194,7 +194,7 @@ error: Invalid
Hint: Use one of these instead
• first second thir̵d̵ ̵f̵o̵u̵r̵t̵h̵ ̵f̵i̵f̵t̵h̵
• f̵i̵r̵st̵ ̵s̵econd t̵h̵i̵r̵d̵ ̵fourth fifth
• f̵i̵r̵s̵t̵ ̵second t̵h̵i̵r̵d̵ ̵fourth fifth
• f̵i̵r̵s̵t̵ ̵s̵e̵c̵o̵n̵d̵ ̵t̵h̵i̵r̵d̵ ̵f̵o̵u̵r̵t̵h̵ ̵f̵i̵f̵t̵h̵f̲i̲ ̲s̲e̲ ̲t̲h̲ ̲f̲o̲ ̲f̲i̲
• f̵i̵r̵s̵t̵ ̵s̵e̵c̵o̵n̵d̵ ̵t̵h̵i̵r̵d̵ ̵f̵o̵u̵r̵t̵h̵ ̵f̵i̵f̵t̵h̵s̲e̲c̲o̲n̲d̲
-/
@ -211,7 +211,7 @@ error: Invalid
Hint: Try one of these
• let x := 4̵2̵
̵3̲1̲;̲ ̵2̵ ̵*̵ ̵x
̵ ̵ ̵2̵ ̵*̵3̲1̲;̲ x
• l̵e̵t̵ ̵x̵ ̵:̵=̵ ̵4̵2̵
̵ ̵ ̵2̵ ̵*̵ ̵x̵l̲e̲t̲ ̲(̲a̲,̲ ̲b̲,̲ ̲c̲)̲ ̲:̲=̲ ̲(̲1̲,̲ ̲2̲,̲ ̲3̲)̲
̲ ̲ ̲b̲
@ -222,3 +222,55 @@ Hint: Try one of these
#check suggest_replacement%
let x := 42
2 * x
/-! ## Forced diff granularities -/
namespace DiffGranularity
scoped syntax "select " "granularity " term : command
open Elab in
elab_rules : command
| `(command| select granularity $t) => do
let tp := mkConst ``DiffGranularity
let g ← unsafe Command.runTermElabM (fun _ =>
Term.elabTerm t tp >>= monadLift ∘ evalExpr DiffGranularity tp)
let hint ← Command.liftCoreM <| MessageData.hint m!"Hint" #[{
suggestion :=
if g matches .all then
-- Ensure we wouldn't have used `.all` by default
"selected granularity .all"
else
"many granularity words not matching the source"
diffGranularity := g
}]
logInfo hint
/--
info:
Hint: Hint
s̵e̵m̲a̲n̲y̲ ̲g̲r̲a̲n̲u̲le̵c̵a̲r̲i̲ty̲ w̲o̲r̲d̲s̲ ̲n̲o̲t̲ ̲m̲a̲t̲c̲h̲i̲n̲gr̵a̵n̵u̵l̵a̵r̵i̵ ̲ty̵h̲e̲ .̵c̵h̵a̵s̲o̲u̲rc̲e̲
-/
#guard_msgs (whitespace := exact) in
select granularity .char
/--
info:
Hint: Hint
s̵e̵l̵e̵c̵t̵m̲a̲n̲y̲ granularity .̵w̵o̵r̵d̵w̲o̲r̲d̲s̲ ̲n̲o̲t̲ ̲m̲a̲t̲c̲h̲i̲n̲g̲ ̲t̲h̲e̲ ̲s̲o̲u̲r̲c̲e̲
-/
#guard_msgs (whitespace := exact) in
select granularity .word
/--
info:
Hint: Hint
s̵e̵l̵e̵c̵t̵ ̵g̵r̵a̵n̵u̵l̵a̵r̵i̵t̵y̵ ̵.̵a̵l̵l̵s̲e̲l̲e̲c̲t̲e̲d̲ ̲g̲r̲a̲n̲u̲l̲a̲r̲i̲t̲y̲ ̲.̲a̲l̲l̲
-/
#guard_msgs (whitespace := exact) in
select granularity .all
end DiffGranularity

View file

@ -0,0 +1,102 @@
import Lean.Meta.Hint
/-! # Word-level diffs in hint suggestions
Word-level diffs in hint suggestions require careful display of whitespace, since it is frequently
impossible to both accurately render both deleted and inserted whitespace while also displaying a
snippet that recognizably maps to what will be inserted. These tests exercise the behavior of the
word-level diff generator to ensure that it is reasonable.
-/
open Lean Meta Hint
-- We reproduce this function here because it is private in `Lean.Meta.Hint`
private def mkDiffString (ds : Array (Diff.Action × String)) : String :=
let rangeStrs := ds.map fun
| (.insert, s) => String.mk (s.data.flatMap ([·, '\u0332'])) -- U+0332 Combining Low Line
| (.delete, s) => String.mk (s.data.flatMap ([·, '\u0335'])) -- U+0335 Combining Short Stroke Overlay
| (.skip , s) => s
rangeStrs.foldl (· ++ ·) ""
/-! ## Test Word Diff Behavior -/
/-- info: "one two ̲t̲h̲r̲e̲e̲" -/
#guard_msgs in
#eval readableDiff.wordDiff "one two" "one two three" |> mkDiffString
/-- info: "o̵n̵e̵a̲ two ̲t̲h̲r̲e̲e̲" -/
#guard_msgs in
#eval readableDiff.wordDiff "one\ntwo" "a two three" |> mkDiffString
/-- info: "o̵n̵e̵a̲ ̵\n̲two ̲t̲h̲r̲e̲e̲" -/
#guard_msgs in
#eval readableDiff.wordDiff "one two" "a\ntwo three" |> mkDiffString
/-- info: "one two" -/
#guard_msgs in
#eval readableDiff.wordDiff "one\ntwo" "one two" |> mkDiffString
/-- info: "one t̵w̵o̵ ̵three" -/
#guard_msgs in
#eval readableDiff.wordDiff "one two three" "one three" |> mkDiffString
/-- info: "a ̵b" -/
#guard_msgs in
#eval readableDiff.wordDiff "a b" "a b" |> mkDiffString
/-- info: "a ̵b̵" -/
#guard_msgs in
#eval readableDiff.wordDiff "a b" "a" |> mkDiffString
/-- info: "a̵b c" -/
#guard_msgs in
#eval readableDiff.wordDiff "a\nb c" "b c" |> mkDiffString
/--
info: "a l̵o̵n̵g̵e̵r̵l̲o̲n̲g̲\tstring w̵i̵t̵h̵ ̵ ̵ ̵w̵h̵i̵t̵e̵s̵p̵a̵c̵e̵in strange ̵\n̲\n̲p̵l̵a̵c̵e̵s̵\n̵a̵n̵d̵ ̵u̵n̵u̵s̵u̵a̵l̵spaces"
-/
#guard_msgs in
#eval readableDiff.wordDiff
"a longer\nstring with whitespace\n in strange\nplaces and\n\nunusual\tspaces"
"a long\tstring in strange\n\nspaces"
|> mkDiffString
/--
info: def f (̵x̵ ̵:̵ ̵N̵a̵t̵)̵x̲ := x + 1
#check let c̵x̲ := 3̵1̵5̲ in
f c̵x̲
-/
#guard_msgs in
#eval IO.println <| readableDiff.wordDiff
"def f (x : Nat) := x + 1\n#check let c := 31 in\n f c"
"def f x := x + 1\n#check let x := 5 in\n f x"
|> mkDiffString
/-! ## Test Word Granularity Selection -/
/-- info: "simp [mergeTR.go, m̵e̵r̵g̵e̵,̵ ̵reverseAux_eq]" -/
#guard_msgs in
#eval readableDiff
"simp [mergeTR.go, merge, reverseAux_eq]" "simp [mergeTR.go, reverseAux_eq]" |> mkDiffString
/--
info: "r̵w̵s̲i̲m̲p̲ only [̵m̵e̵r̵g̵e̵T̵R̵.̵g̵o̵]̵[̲m̲e̲r̲g̲e̲T̲R̲.̲g̲o̲,̲ ̲m̲e̲r̲g̲e̲,̲ ̲r̲e̲v̲e̲r̲s̲e̲A̲u̲x̲_̲e̲q̲]̲"
-/
#guard_msgs in
#eval readableDiff
"rw only [mergeTR.go]"
"simp only [mergeTR.go, merge, reverseAux_eq]" |> mkDiffString
/-! ## Test Whitespace-Diff Handling -/
/-- info: " ̵rw ̵ ̵ ̵only [h]" -/
#guard_msgs in
#eval readableDiff.wordDiff " rw only [h]" " rw only [h]" |> mkDiffString
/-- info: " ̲ ̲w1 w2 ̵ ̵ ̵\t̲w3 ̲ ̲ ̲ ̲ ̲ ̲ ̲w̲4̲" -/
#guard_msgs in
#eval readableDiff.wordDiff "w1\nw2 w3" " w1 w2\tw3 w4" |> mkDiffString
/-- info: " ̵ ̵x ̲ ̲ ̲ ̲ ̲y̵ ̵ ̵ ̵z ̵" -/
#guard_msgs in
#eval readableDiff.wordDiff " x y z " "x z" |> mkDiffString

View file

@ -72,7 +72,7 @@ warning: This simp argument is unused:
(some_def.eq_def)
Hint: Omit it from the simp argument list.
simp [some_def, (̵some_def.eq_def)̵,̵ ̵s̵o̵m̵e̵_̵d̵e̵f̵.̵e̵q̵_̵d̵e̵f̵]
simp [some_def, (̵s̵o̵m̵e̵_̵d̵e̵f̵.̵e̵q̵_̵d̵e̵f̵)̵,̵ ̵some_def.eq_def]
note: this linter can be disabled with `set_option linter.unusedSimpArgs false`
---
warning: This simp argument is unused: