Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

WIP/RFC: Add detailed reporting for debugging generation #6

Draft
wants to merge 1 commit into
base: main
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 2 additions & 1 deletion Plausible/Tactic.lean
Original file line number Diff line number Diff line change
Expand Up @@ -166,7 +166,8 @@ elab_rules : tactic | `(tactic| plausible $[$cfg]?) => withMainContext do
traceSuccesses := cfg.traceSuccesses || (← isTracingEnabledFor `plausible.success),
traceShrink := cfg.traceShrink || (← isTracingEnabledFor `plausible.shrink.steps),
traceShrinkCandidates := cfg.traceShrinkCandidates
|| (← isTracingEnabledFor `plausible.shrink.candidates) }
|| (← isTracingEnabledFor `plausible.shrink.candidates),
detailedReportingWithName := cfg.detailedReportingWithName }
let inst ← try
synthInstance (← mkAppM ``Testable #[tgt'])
catch _ => throwError "\
Expand Down
114 changes: 87 additions & 27 deletions Plausible/Testable.lean
Original file line number Diff line number Diff line change
Expand Up @@ -82,7 +82,7 @@ inductive TestResult (p : Prop) where
where `p` holds. With a proof, the one test was sufficient to
prove that `p` holds and we do not need to keep finding examples.
-/
| success : Unit ⊕' p → TestResult p
| success : Unit ⊕' p → List String → TestResult p
/--
Give up when a well-formed example cannot be generated.
`gaveUp n` tells us that `n` invalid examples were tried.
Expand Down Expand Up @@ -125,6 +125,10 @@ structure Configuration where
-/
traceShrinkCandidates : Bool := false
/--
Enable detailed reporting with Tyche.
-/
detailedReportingWithName : Option String := none
/--
Hard code the seed to use for the RNG
-/
randomSeed : Option Nat := none
Expand All @@ -137,9 +141,10 @@ structure Configuration where
open Lean in
instance : ToExpr Configuration where
toTypeExpr := mkConst `Configuration
toExpr cfg := mkApp9 (mkConst ``Configuration.mk)
toExpr cfg := mkApp10 (mkConst ``Configuration.mk)
(toExpr cfg.numInst) (toExpr cfg.maxSize) (toExpr cfg.numRetries) (toExpr cfg.traceDiscarded)
(toExpr cfg.traceSuccesses) (toExpr cfg.traceShrink) (toExpr cfg.traceShrinkCandidates)
(toExpr cfg.detailedReportingWithName)
(toExpr cfg.randomSeed) (toExpr cfg.quiet)

/--
Expand Down Expand Up @@ -171,8 +176,8 @@ def NamedBinder (_n : String) (p : Prop) : Prop := p
namespace TestResult

def toString : TestResult p → String
| success (PSum.inl _) => "success (no proof)"
| success (PSum.inr _) => "success (proof)"
| success (PSum.inl _) _ => "success (no proof)"
| success (PSum.inr _) _ => "success (proof)"
| gaveUp n => s!"gave {n} times"
| failure _ counters _ => s!"failed {counters}"

Expand All @@ -187,7 +192,7 @@ def combine {p q : Prop} : Unit ⊕' (p → q) → Unit ⊕' p → Unit ⊕' q
def and : TestResult p → TestResult q → TestResult (p ∧ q)
| failure h xs n, _ => failure (fun h2 => h h2.left) xs n
| _, failure h xs n => failure (fun h2 => h h2.right) xs n
| success h1, success h2 => success <| combine (combine (PSum.inr And.intro) h1) h2
| success h1 xs, success h2 ys => success (combine (combine (PSum.inr And.intro) h1) h2) (xs ++ ys)
| gaveUp n, gaveUp m => gaveUp <| n + m
| gaveUp n, _ => gaveUp n
| _, gaveUp n => gaveUp n
Expand All @@ -200,8 +205,8 @@ def or : TestResult p → TestResult q → TestResult (p ∨ q)
| Or.inl h3 => h1 h3
| Or.inr h3 => h2 h3
failure h3 (xs ++ ys) (n + m)
| success h, _ => success <| combine (PSum.inr Or.inl) h
| _, success h => success <| combine (PSum.inr Or.inr) h
| success h xs, _ => success (combine (PSum.inr Or.inl) h) xs
| _, success h xs => success (combine (PSum.inr Or.inr) h) xs
| gaveUp n, gaveUp m => gaveUp <| n + m
| gaveUp n, _ => gaveUp n
| _, gaveUp n => gaveUp n
Expand All @@ -212,7 +217,7 @@ def imp (h : q → p) (r : TestResult p)
(p : Unit ⊕' (p → q) := PSum.inl ()) : TestResult q :=
match r with
| failure h2 xs n => failure (mt h h2) xs n
| success h2 => success <| combine p h2
| success h2 xs => success (combine p h2) xs
| gaveUp n => gaveUp n

/-- Test `q` by testing `p` and proving the equivalence between the two. -/
Expand All @@ -224,10 +229,10 @@ we record that value using this function so that our counter-examples
can be informative. -/
def addInfo (x : String) (h : q → p) (r : TestResult p)
(p : Unit ⊕' (p → q) := PSum.inl ()) : TestResult q :=
if let failure h2 xs n := r then
failure (mt h h2) (x :: xs) n
else
imp h r p
match r with
| failure h2 xs n => failure (mt h h2) (x :: xs) n
| success h2 xs => imp h (success h2 (x :: xs)) p
| _ => imp h r p

/-- Add some formatting to the information recorded by `addInfo`. -/
def addVarInfo {γ : Type _} [Repr γ] (var : String) (x : γ) (h : q → p) (r : TestResult p)
Expand Down Expand Up @@ -273,8 +278,8 @@ instance orTestable [Testable p] [Testable q] : Testable (p ∨ q) where
-- As a little performance optimization we can just not run the second
-- test if the first succeeds
match xp with
| success (PSum.inl h) => return success (PSum.inl h)
| success (PSum.inr h) => return success (PSum.inr <| Or.inl h)
| success (PSum.inl h) xs => return success (PSum.inl h) xs
| success (PSum.inr h) xs => return success (PSum.inr <| Or.inl h) xs
| _ =>
let xq ← runProp q cfg min
return or xp xq
Expand Down Expand Up @@ -440,7 +445,7 @@ instance (priority := low) decidableTestable {p : Prop} [PrintableProp p] [Decid
Testable p where
run := fun _ _ =>
if h : p then
return success (PSum.inr h)
return success (PSum.inr h) []
else
let s := printProp p
return failure h [s!"issue: {s} does not hold"] 0
Expand Down Expand Up @@ -500,38 +505,52 @@ def retry (cmd : Rand (TestResult p)) : Nat → Rand (TestResult p)
| n+1 => do
let r ← cmd
match r with
| .success hp => return success hp
| .success hp xs => return success hp xs
| .failure h xs n => return failure h xs n
| .gaveUp _ => retry cmd n

/-- Count the number of times the test procedure gave up. -/
def giveUp (x : Nat) : TestResult p → TestResult p
| success (PSum.inl ()) => gaveUp x
| success (PSum.inr p) => success <| (PSum.inr p)
| success (PSum.inl ()) _ => gaveUp x
| success (PSum.inr p) xs => success (PSum.inr p) xs
| gaveUp n => gaveUp <| n + x
| TestResult.failure h xs n => failure h xs n

structure RunResult (p : Prop) where
/--
The running test result.
-/
testResult : TestResult p
/--
A history of all previous test results.
-/
history : List (TestResult p)
deriving Inhabited

/-- Try `n` times to find a counter-example for `p`. -/
def Testable.runSuiteAux (p : Prop) [Testable p] (cfg : Configuration) :
TestResult p → Nat → Rand (TestResult p)
RunResult p → Nat → Rand (RunResult p)
| r, 0 => return r
| r, n+1 => do
let size := (cfg.numInst - n - 1) * cfg.maxSize / cfg.numInst
if cfg.traceSuccesses then
slimTrace s!"New sample"
slimTrace s!"Retrying up to {cfg.numRetries} times until guards hold"
let x ← retry (ReaderT.run (Testable.runProp p cfg true) ⟨size⟩) cfg.numRetries
let r' := if Option.isSome cfg.detailedReportingWithName
then {testResult := x, history := r.testResult :: r.history}
else {testResult := x, history := []}
match x with
| success (PSum.inl ()) => runSuiteAux p cfg r n
| gaveUp g => runSuiteAux p cfg (giveUp g r) n
| _ => return x
| success (PSum.inl ()) _ => runSuiteAux p cfg r' n
| gaveUp g => runSuiteAux p cfg {r' with testResult := giveUp g r.testResult} n
| _ => return r'

/-- Try to find a counter-example of `p`. -/
def Testable.runSuite (p : Prop) [Testable p] (cfg : Configuration := {}) : Rand (TestResult p) :=
Testable.runSuiteAux p cfg (success <| PSum.inl ()) cfg.numInst
def Testable.runSuite (p : Prop) [Testable p] (cfg : Configuration := {}) : Rand (RunResult p) :=
Testable.runSuiteAux p cfg {testResult := success (PSum.inl ()) [], history := []} cfg.numInst

/-- Run a test suite for `p` in `BaseIO` using the global RNG in `stdGenRef`. -/
def Testable.checkIO (p : Prop) [Testable p] (cfg : Configuration := {}) : BaseIO (TestResult p) :=
def Testable.checkIO (p : Prop) [Testable p] (cfg : Configuration := {}) : BaseIO (RunResult p) :=
letI : MonadLift Id BaseIO := ⟨fun f => return Id.run f⟩
match cfg.randomSeed with
| none => runRand (Testable.runSuite p cfg)
Expand Down Expand Up @@ -584,12 +603,53 @@ scoped elab "mk_decorations" : tactic => do

end Decorations

open Lean.Json in
def writeReport (thmName : String) (xs : List (TestResult p)) : Lean.CoreM PUnit := do
let now ← IO.monoMsNow
let transformLine x :=
let status := match x with | .success _ _ => "passed" | .failure _ _ _ => "failed" | .gaveUp _ => "gave_up"
let representation :=
match x with
| .success _ xs => s!"{xs}"
| .failure _ xs _ => s!"{xs}"
| .gaveUp _ => ""
let arguments := mkObj [] -- TODO: Actually break out the arguments
let features := mkObj [] -- TODO: Add features
let property := thmName
let runStart := .num now
mkObj [
("type", "test_case"),
("status", status),
("status_reason", ""), -- NOTE: Could add
("representation", representation),
("arguments", arguments),
("how_generated", ""), -- NOTE: Could add
("features", features),
("coverage", mkObj []),
("timing", mkObj []),
("metadata", mkObj []),
("property", property),
("run_start", runStart),
]
let jsonlines := xs.map transformLine
let results := String.intercalate "\n" (jsonlines.map (fun x => s!"{compress x}")) ++ "\n"
let fname := s!"theorem_{thmName}"
let dir := ".lean/observations"
let fpath := s!"{dir}/{fname}.jsonl"
IO.FS.createDirAll dir
let h ← IO.FS.Handle.mk fpath IO.FS.Mode.append
h.putStr results
Lean.logInfo s!"Wrote data to {fpath}"

open Decorations in
/-- Run a test suite for `p` and throw an exception if `p` does not hold. -/
def Testable.check (p : Prop) (cfg : Configuration := {})
(p' : Decorations.DecorationsOf p := by mk_decorations) [Testable p'] : Lean.CoreM PUnit := do
match ← Testable.checkIO p' cfg with
| TestResult.success _ => if !cfg.quiet then Lean.logInfo "Unable to find a counter-example"
let r ← Testable.checkIO p' cfg
if let some thmName := cfg.detailedReportingWithName then
writeReport thmName (r.testResult :: r.history).reverse
match r.testResult with
| TestResult.success _ _ => if !cfg.quiet then Lean.logInfo "Unable to find a counter-example"
| TestResult.gaveUp n =>
if !cfg.quiet then
let msg := s!"Gave up after failing to generate values that fulfill the preconditions {n} times."
Expand Down
4 changes: 4 additions & 0 deletions Test/Tyche.lean
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
import Plausible

theorem add_comm : ∀ (a b : Nat), a < b -> a + b = b + a := by
plausible (config := {detailedReportingWithName := "add_comm"})