diff --git a/doc/examples/IJCAR2026/README.md b/doc/examples/IJCAR2026/README.md new file mode 100644 index 0000000000..441da94d27 --- /dev/null +++ b/doc/examples/IJCAR2026/README.md @@ -0,0 +1,6 @@ +# IJCAR 2026: `grind`, An SMT-Inspired Tactic for Lean 4 + +Ancillary materials for the paper. + +- `examples.lean`: interactive examples from the paper +- `analyze_grind_loc.py`: script used for the evaluation section, analyzing `grind` adoption and lines-of-code changes in Mathlib diff --git a/doc/examples/IJCAR2026/analyze_grind_loc.py b/doc/examples/IJCAR2026/analyze_grind_loc.py new file mode 100755 index 0000000000..49fd4149fa --- /dev/null +++ b/doc/examples/IJCAR2026/analyze_grind_loc.py @@ -0,0 +1,401 @@ +#!/usr/bin/env python3 +""" +Analyze grind adoption LoC changes in mathlib. + +For each theorem/lemma in master that uses grind, find the most recent +commit where it didn't use grind, and measure the LoC change. + +This script was used in preparing the "Evaluation" section of the grind paper. +""" + +import subprocess +import re +import csv +import sys +from pathlib import Path +from dataclasses import dataclass +from concurrent.futures import ThreadPoolExecutor, as_completed +from typing import Iterator +from functools import lru_cache + + +@dataclass +class GrindUsage: + file: str + line_no: int + decl_name: str + decl_type: str # theorem, lemma, def, example, etc. + + +@dataclass +class LocChange: + file: str + decl_name: str + decl_type: str + old_loc: int + new_loc: int + loc_saved: int + commit_sha: str + commit_date: str + + +def run_git(args: list[str], repo: str = ".") -> str: + """Run a git command and return stdout.""" + result = subprocess.run( + ["git", "-C", repo] + args, + capture_output=True, text=True, check=True + ) + return result.stdout + + +def run_git_safe(args: list[str], repo: str = ".") -> str | None: + """Run a git command, return None on failure.""" + result = subprocess.run( + ["git", "-C", repo] + args, + capture_output=True, text=True + ) + if result.returncode != 0: + return None + return result.stdout + + +@lru_cache(maxsize=4096) +def get_file_at_commit(repo: str, commit: str, file_path: str) -> str | None: + """Get file contents at a specific commit (cached).""" + return run_git_safe(["show", f"{commit}:{file_path}"], repo) + + +def find_grind_usages(repo: str = ".") -> tuple[list[GrindUsage], int, int]: + """Find all declarations using grind in current master. + Returns (usages, total_grind_calls, grind_in_decls) where: + - total_grind_calls is the count of grind tactic calls (after filtering comments/attrs) + - grind_in_decls is the count of those that are inside named declarations + """ + # Use git grep to find lines containing 'grind' (excludes lake packages) + result = run_git(["grep", "-n", "grind", "master", "--", "Mathlib/"], repo) + + usages = [] + seen = set() # (file, decl_name) to dedupe + total_grind_calls = 0 + grind_in_decls = 0 + + for line in result.strip().split('\n'): + if not line: + continue + # Format: master:path/to/file.lean:123:line content + match = re.match(r'^master:(.+\.lean):(\d+):(.*)$', line) + if not match: + continue + + file_path, line_no_str, content = match.groups() + line_no = int(line_no_str) + + # Skip comments and attributes (not tactic calls) + content_stripped = content.strip() + if content_stripped.startswith('--') or content_stripped.startswith('/-'): + continue + if content_stripped.startswith('attribute'): + continue + if '@[' in content and 'grind' in content: + # Could be an attribute like @[grind =], skip + if 'by' not in content and ':=' not in content: + continue + + total_grind_calls += 1 + + # Find the declaration this grind belongs to + decl_name, decl_type = find_decl_at_line(repo, file_path, line_no) + if decl_name is None: + continue + + grind_in_decls += 1 + + key = (file_path, decl_name) + if key in seen: + continue + seen.add(key) + + usages.append(GrindUsage( + file=file_path, + line_no=line_no, + decl_name=decl_name, + decl_type=decl_type + )) + + return usages, total_grind_calls, grind_in_decls + + +def find_decl_at_line(repo: str, file_path: str, grind_line: int) -> tuple[str | None, str | None]: + """ + Find the declaration name and type that contains the grind at the given line. + Search backwards from grind_line to find the most recent declaration. + """ + # Get file content at master + content = get_file_at_commit(repo, "master", file_path) + if content is None: + return None, None + + lines = content.split('\n') + + # Search backwards from grind_line for a declaration + # Match declarations with optional leading modifiers and attributes + decl_pattern = re.compile(r'^(?:@\[.*?\]\s*)*(?:private\s+|protected\s+|noncomputable\s+|scoped\s+)*(theorem|lemma|def|example|instance|abbrev|structure|class)\s+(\w+)') + + for i in range(grind_line - 1, -1, -1): + if i >= len(lines): + continue + line = lines[i] + match = decl_pattern.match(line) + if match: + return match.group(2), match.group(1) + + return None, None + + +def find_grind_introduction_commit(repo: str, file_path: str, decl_name: str) -> str | None: + """ + Find the commit that introduced grind to this declaration. + Returns None if the declaration was born with grind. + """ + # First, find the line range of the declaration in master + content = get_file_at_commit(repo, "master", file_path) + if content is None: + return None + + lines = content.split('\n') + decl_start = None + decl_end = None + + # Find declaration start + decl_pattern = re.compile(rf'^(?:@\[.*?\]\s*)*(?:private\s+|protected\s+|noncomputable\s+|scoped\s+)*(theorem|lemma|def|example|instance|abbrev|structure|class)\s+{re.escape(decl_name)}\b') + for i, line in enumerate(lines): + if decl_pattern.match(line): + decl_start = i + break + + if decl_start is None: + return None + + # Find declaration end (next top-level declaration or EOF) + end_patterns = re.compile(r'^(?:private\s+|protected\s+|noncomputable\s+|scoped\s+)*(theorem|lemma|def|example|instance|abbrev|structure|class|namespace|section|end\s|@\[|#|/-)') + for i in range(decl_start + 1, len(lines)): + line = lines[i] + if line and not line[0].isspace() and end_patterns.match(line): + decl_end = i + break + if decl_end is None: + decl_end = len(lines) + + # Find grind line within declaration + grind_line = None + for i in range(decl_start, decl_end): + if 'grind' in lines[i]: + grind_line = i + 1 # 1-indexed + break + + if grind_line is None: + return None + + # Use git blame to find when that grind line was added + blame_result = run_git_safe(["blame", "-L", f"{grind_line},{grind_line}", "--porcelain", "master", "--", file_path], repo) + if blame_result is None: + return None + + # First line of porcelain output is the commit SHA + first_line = blame_result.split('\n')[0] + commit_sha = first_line.split()[0] + + # Check if this declaration existed before this commit (without grind) + parent_sha = run_git_safe(["rev-parse", f"{commit_sha}^"], repo) + if parent_sha is None: + return None # Initial commit, born with grind + parent_sha = parent_sha.strip() + + # Check if declaration existed in parent + parent_content = get_file_at_commit(repo, parent_sha, file_path) + if parent_content is None: + # File didn't exist in parent - might be new file or renamed + return None + + # Check if declaration existed and didn't have grind + if decl_name not in parent_content: + return None # Declaration didn't exist - born with grind + + # Check if it already had grind in parent + parent_lines = parent_content.split('\n') + in_decl = False + for line in parent_lines: + if decl_pattern.match(line): + in_decl = True + elif in_decl: + if line and not line[0].isspace() and end_patterns.match(line): + break + if 'grind' in line: + # Already had grind in parent — not the introduction commit + return None + + return commit_sha + + +def extract_proof_loc(repo: str, file_path: str, decl_name: str, commit: str) -> int | None: + """ + Extract the number of lines in a declaration's proof at a given commit. + Returns None if the declaration doesn't exist at that commit. + """ + content = get_file_at_commit(repo, commit, file_path) + if content is None: + return None + + lines = content.split('\n') + + # Find declaration start + decl_pattern = re.compile(rf'^(?:@\[.*?\]\s*)*(?:private\s+|protected\s+|noncomputable\s+|scoped\s+)*(theorem|lemma|def|example|instance|abbrev|structure|class)\s+{re.escape(decl_name)}\b') + decl_start = None + for i, line in enumerate(lines): + if decl_pattern.match(line): + decl_start = i + break + + if decl_start is None: + return None + + # Find declaration end + end_patterns = re.compile(r'^(?:private\s+|protected\s+|noncomputable\s+|scoped\s+)*(theorem|lemma|def|example|instance|abbrev|structure|class|namespace|section|end\s|@\[|#|/-)') + decl_end = None + for i in range(decl_start + 1, len(lines)): + line = lines[i] + if line and not line[0].isspace() and end_patterns.match(line): + decl_end = i + break + if decl_end is None: + decl_end = len(lines) + + # Count non-empty lines in declaration + loc = sum(1 for i in range(decl_start, decl_end) if lines[i].strip()) + return loc + + +def get_commit_date(repo: str, sha: str) -> str: + """Get the date of a commit.""" + result = run_git(["log", "-1", "--format=%ci", sha], repo) + return result.strip().split()[0] # Just the date part + + +def analyze_usage_detailed(repo: str, usage: GrindUsage) -> tuple[LocChange | None, str]: + """Analyze a single grind usage, returning (result, skip_reason).""" + commit = find_grind_introduction_commit(repo, usage.file, usage.decl_name) + if commit is None: + return None, "born_with_grind" + + parent = run_git_safe(["rev-parse", f"{commit}^"], repo) + if parent is None: + return None, "no_parent" + parent = parent.strip() + + old_loc = extract_proof_loc(repo, usage.file, usage.decl_name, parent) + new_loc = extract_proof_loc(repo, usage.file, usage.decl_name, "master") + + if old_loc is None: + return None, "old_loc_failed" + if new_loc is None: + return None, "new_loc_failed" + + commit_date = get_commit_date(repo, commit) + + return LocChange( + file=usage.file, + decl_name=usage.decl_name, + decl_type=usage.decl_type, + old_loc=old_loc, + new_loc=new_loc, + loc_saved=old_loc - new_loc, + commit_sha=commit[:12], + commit_date=commit_date + ), "success" + + +def main(repo: str = "."): + print("Finding grind usages in master...", file=sys.stderr) + usages, total_grind_calls, grind_in_decls = find_grind_usages(repo) + print(f"Found {len(usages)} declarations using grind ({grind_in_decls}/{total_grind_calls} grind calls)", file=sys.stderr) + + print("Analyzing git history (this may take a while)...", file=sys.stderr) + results: list[LocChange] = [] + skip_reasons: dict[str, int] = {} + + with ThreadPoolExecutor(max_workers=64) as executor: + futures = {executor.submit(analyze_usage_detailed, repo, usage): usage for usage in usages} + + for i, future in enumerate(as_completed(futures)): + if (i + 1) % 50 == 0: + print(f" Progress: {i + 1}/{len(usages)}", file=sys.stderr, flush=True) + + result, reason = future.result() + if result: + results.append(result) + else: + skip_reasons[reason] = skip_reasons.get(reason, 0) + 1 + + total_skipped = sum(skip_reasons.values()) + print(f"\nAnalyzed {len(results)} declarations, skipped {total_skipped}:", file=sys.stderr) + for reason, count in sorted(skip_reasons.items(), key=lambda x: -x[1]): + print(f" - {reason}: {count}", file=sys.stderr) + + # Sort by LoC saved (descending) + results.sort(key=lambda r: r.loc_saved, reverse=True) + + # Output CSV + writer = csv.writer(sys.stdout) + writer.writerow(["file", "declaration", "type", "old_loc", "new_loc", "loc_saved", "commit", "date"]) + for r in results: + writer.writerow([r.file, r.decl_name, r.decl_type, r.old_loc, r.new_loc, r.loc_saved, r.commit_sha, r.commit_date]) + + # Summary stats to stderr + total_old = sum(r.old_loc for r in results) if results else 0 + total_new = sum(r.new_loc for r in results) if results else 0 + total_saved = sum(r.loc_saved for r in results) if results else 0 + avg_saved = total_saved / len(results) if results else 0 + + print("\n" + "=" * 60, file=sys.stderr) + print("GRIND ADOPTION LOC ANALYSIS", file=sys.stderr) + print("=" * 60, file=sys.stderr) + + print("\n## Declaration Counts\n", file=sys.stderr) + print(f" Total grind tactic calls: {total_grind_calls}", file=sys.stderr) + print(f" In named declarations: {grind_in_decls} ({total_grind_calls - grind_in_decls} in anonymous/other)", file=sys.stderr) + print(f" Unique declarations: {len(usages)}", file=sys.stderr) + print(f" Converted to grind: {len(results)}", file=sys.stderr) + print(f" Born with grind: {skip_reasons.get('born_with_grind', 0)}", file=sys.stderr) + if skip_reasons.get('old_loc_failed', 0) > 0: + print(f" Could not trace history: {skip_reasons.get('old_loc_failed', 0)}", file=sys.stderr) + + print("\n## Lines of Code Impact\n", file=sys.stderr) + print(f" Total LoC before grind: {total_old}", file=sys.stderr) + print(f" Total LoC after grind: {total_new}", file=sys.stderr) + print(f" Total LoC saved: {total_saved}", file=sys.stderr) + print(f" Average LoC saved per theorem: {avg_saved:.1f}", file=sys.stderr) + big_savings = sum(1 for r in results if r.loc_saved >= 10) + print(f" Declarations shrunk by 10+ lines: {big_savings}", file=sys.stderr) + + if results: + print("\n## Top 10 Biggest LoC Savings\n", file=sys.stderr) + for r in results[:10]: + print(f" {r.loc_saved:+4d} lines: {r.decl_name} ({r.file})", file=sys.stderr) + + # Show any that got bigger (negative savings) + got_bigger = [r for r in results if r.loc_saved < 0] + if got_bigger: + print(f"\n## Declarations That Got Bigger ({len(got_bigger)} total)\n", file=sys.stderr) + print(" (showing 5 worst):", file=sys.stderr) + for r in got_bigger[-5:]: # Show worst 5 + print(f" {r.loc_saved:+4d} lines: {r.decl_name} ({r.file})", file=sys.stderr) + + print("\n" + "=" * 60, file=sys.stderr) + + +if __name__ == "__main__": + import argparse + parser = argparse.ArgumentParser(description="Analyze grind LoC savings") + parser.add_argument("--repo", "-r", default=".", help="Repository path") + args = parser.parse_args() + main(args.repo) diff --git a/doc/examples/IJCAR2026/examples.lean b/doc/examples/IJCAR2026/examples.lean new file mode 100644 index 0000000000..619803b52e --- /dev/null +++ b/doc/examples/IJCAR2026/examples.lean @@ -0,0 +1,127 @@ +/- Examples from the paper "grind: An SMT-Inspired Tactic for Lean 4" -/ +open Lean Grind + +/- Congruence closure. -/ + +example (f : Nat → Nat) (h : a = b) : f (f b) = f (f a) := by grind + +/- +E-matching. + +Any `f` that is the left inverse of `g` would work on this example. +-/ +def f (x : Nat) := x - 1 +def g (x : Nat) := x + 1 + +@[grind =] theorem fg : f (g x) = x := by simp [f, g] +example : f a = b → a = g c → b = c := by grind + +/- +Any `R` that is transitive and symmetric would work on this example. +-/ +def R : Nat → Nat → Prop := (· % 7 = · % 7) +@[grind →] theorem Rtrans : R x y → R y z → R x z := by grind [R] +@[grind →] theorem Rsymm : R x y → R y x := by grind [R] +example : R a b → R c b → R d c → R a d := by grind + +/- Big step operational semantics example. -/ + +abbrev Variable := String +def State := Variable → Nat +inductive Stmt : Type where + | skip : Stmt + | assign : Variable → (State → Nat) → Stmt + | seq : Stmt → Stmt → Stmt + | ifThenElse : (State → Prop) → Stmt → Stmt → Stmt + | whileDo : (State → Prop) → Stmt → Stmt +infix:60 ";; " => Stmt.seq +export Stmt (skip assign seq ifThenElse whileDo) +set_option quotPrecheck false in +notation s:70 "[" x:70 "↦" n:70 "]" => (fun v ↦ if v = x then n else s v) +inductive BigStep : Stmt → State → State → Prop where + | skip (s : State) : BigStep skip s s + | assign (x : Variable) (a : State → Nat) (s : State) : BigStep (assign x a) s (s[x ↦ a s]) + | seq {S T : Stmt} {s t u : State} (hS : BigStep S s t) (hT : BigStep T t u) : + BigStep (S;; T) s u + | if_true {B : State → Prop} {s t : State} (hcond : B s) (S T : Stmt) (hbody : BigStep S s t) : + BigStep (ifThenElse B S T) s t + | if_false {B : State → Prop} {s t : State} (hcond : ¬ B s) (S T : Stmt) (hbody : BigStep T s t) : + BigStep (ifThenElse B S T) s t + | while_true {B S s t u} (hcond : B s) (hbody : BigStep S s t) (hrest : BigStep (whileDo B S) t u) : + BigStep (whileDo B S) s u + | while_false {B S s} (hcond : ¬ B s) : BigStep (whileDo B S) s s +notation:55 "(" S:55 "," s:55 ")" " ==> " t:55 => BigStep S s t + +example {B S T s t} (hcond : B s) : (ifThenElse B S T, s) ==> t → (S, s) ==> t := by + grind [cases BigStep] + +theorem cases_if_of_true {B S T s t} (hcond : B s) : (ifThenElse B S T, s) ==> t → (S, s) ==> t := by + grind [cases BigStep] + +theorem cases_if_of_false {B S T s t} (hcond : ¬ B s) : (ifThenElse B S T, s) ==> t → (T, s) ==> t := by + grind [cases BigStep] + +example {B S T s t} : (ifThenElse B S T, s) ==> t ↔ (B s ∧ (S, s) ==> t) ∨ (¬ B s ∧ (T, s) ==> t) := by + grind [BigStep] -- shortcut for `cases BigStep` and `intro BigStep` + +attribute [grind] BigStep +theorem if_iff {B S T s t} : (ifThenElse B S T, s) ==> + t ↔ (B s ∧ (S, s) ==> t) ∨ (¬ B s ∧ (T, s) ==> t) := by grind + +/- Dependent pattern matching. -/ + +inductive Vec (α : Type u) : Nat → Type u + | nil : Vec α 0 + | cons : α → Vec α n → Vec α (n+1) +@[grind =] def Vec.head : Vec α (n+1) → α + | .cons a _ => a +example (as bs : Vec Int (n+1)) : as.head = bs.head + → (match as, bs with + | .cons a _, .cons b _ => a + b) = 2 * as.head := by grind + +/- Theory solvers. -/ + +example [CommRing α] (a b c : α) : + a + b + c = 3 → + a^2 + b^2 + c^2 = 5 → + a^3 + b^3 + c^3 = 7 → + a^4 + b^4 + c^4 = 9 := by grind + +example (x : BitVec 8) : (x - 16) * (x + 16) = x^2 := by grind + +example [CommSemiring α] [AddRightCancel α] (x y : α) : + x^2*y = 1 → x*y^2 = y → y*x = 1 := by grind + +example (a b : UInt32) : a ≤ 2 → b ≤ 3 → a + b ≤ 5 := by grind + +example [LE α] [Std.IsLinearPreorder α] (a b c d : α) : + a ≤ b → ¬ (c ≤ b) → ¬ (d ≤ c) → a ≤ d := by grind + +/- Theory combination. -/ + +example [CommRing α] [NoNatZeroDivisors α] + (a b c : α) (f : α → Nat) : + a + b + c = 3 → a^2 + b^2 + c^2 = 5 → a^3 + b^3 + c^3 = 7 → + f (a^4 + b^4) + f (9 - c^4) ≠ 1 := by grind + +/- Interactive mode. -/ + +-- Remark: Mathlib contains the definition of `Real`, `sin`, and `cos`. +axiom Real : Type +instance : Lean.Grind.CommRing Real := sorry + +axiom cos : Real → Real +axiom sin : Real → Real +axiom trig_identity : ∀ x, (cos x)^2 + (sin x)^2 = 1 + +-- Manually specify the patterns for `trig_identity` +grind_pattern trig_identity => cos x +grind_pattern trig_identity => sin x + +example : (cos x + sin x)^2 = 2 * cos x * sin x + 1 := by + grind? -- Provides code action + +example : (cos x + sin x)^2 = 2 * cos x * sin x + 1 := by + grind => + instantiate only [trig_identity] + ring