When evals fail, exit with status code 1 (#61)

maraisr · web-flow · commit 223a61920a43 · 2025-06-10T15:38:31.000+10:00
diff --git a/cmd/eval/eval.go b/cmd/eval/eval.go
@@ -48,6 +48,8 @@ type EvaluationResult struct {
 	Details       string  `json:"details,omitempty"`
 }
 
+var FailedTests = errors.New("❌ Some tests failed.")
+
 // NewEvalCommand returns a new command to evaluate prompts against models
 func NewEvalCommand(cfg *command.Config) *cobra.Command {
 	cmd := &cobra.Command{
@@ -106,7 +108,14 @@ func NewEvalCommand(cfg *command.Config) *cobra.Command {
 				jsonOutput: jsonOutput,
 			}
 
-			return handler.runEvaluation(cmd.Context())
+			err = handler.runEvaluation(cmd.Context())
+			if err == FailedTests {
+				// Cobra by default will show the help message when an error occurs,
+				// which is not what we want for failed evaluations.
+				// Instead, we just want to exit with a non-zero code.
+				cmd.SilenceUsage = true
+			}
+			return err
 		},
 	}
 
@@ -206,6 +215,10 @@ func (h *evalCommandHandler) runEvaluation(ctx context.Context) error {
 		h.printSummary(passedTests, totalTests, passRate)
 	}
 
+	if totalTests-passedTests > 0 {
+		return FailedTests
+	}
+
 	return nil
 }
 
@@ -249,8 +262,6 @@ func (h *evalCommandHandler) printSummary(passedTests, totalTests int, passRate
 
 	if passedTests == totalTests {
 		h.cfg.WriteToOut("🎉 All tests passed!\n")
-	} else {
-		h.cfg.WriteToOut("❌ Some tests failed.\n")
 	}
 }
 
diff --git a/cmd/eval/eval_test.go b/cmd/eval/eval_test.go
@@ -306,7 +306,7 @@ evaluators:
 		cmd.SetArgs([]string{promptFile})
 
 		err = cmd.Execute()
-		require.NoError(t, err)
+		require.ErrorIs(t, err, FailedTests)
 
 		output := out.String()
 		require.Contains(t, output, "Failing Test")
@@ -376,7 +376,7 @@ evaluators:
 		cmd.SetArgs([]string{"--json", promptFile})
 
 		err = cmd.Execute()
-		require.NoError(t, err)
+		require.ErrorIs(t, err, FailedTests)
 
 		output := out.String()
 
@@ -549,7 +549,7 @@ evaluators:
 		cmd.SetArgs([]string{"--json", promptFile})
 
 		err = cmd.Execute()
-		require.NoError(t, err)
+		require.ErrorIs(t, err, FailedTests)
 
 		output := out.String()
 

Original file line number	Diff line number	Diff line change
`@@ -48,6 +48,8 @@ type EvaluationResult struct {`
`48`	`48`	Details string `json:"details,omitempty"`
`49`	`49`	`}`
`50`	`50`
	`51`	`+var FailedTests = errors.New("❌ Some tests failed.")`
	`52`	`+`
`51`	`53`	`// NewEvalCommand returns a new command to evaluate prompts against models`
`52`	`54`	`func NewEvalCommand(cfg command.Config) cobra.Command {`
`53`	`55`	`cmd := &cobra.Command{`
`@@ -106,7 +108,14 @@ func NewEvalCommand(cfg command.Config) cobra.Command {`
`106`	`108`	`jsonOutput: jsonOutput,`
`107`	`109`	`}`
`108`	`110`
`109`		`- return handler.runEvaluation(cmd.Context())`
	`111`	`+ err = handler.runEvaluation(cmd.Context())`
	`112`	`+ if err == FailedTests {`
	`113`	`+ // Cobra by default will show the help message when an error occurs,`
	`114`	`+ // which is not what we want for failed evaluations.`
	`115`	`+ // Instead, we just want to exit with a non-zero code.`
	`116`	`+ cmd.SilenceUsage = true`
	`117`	`+ }`
	`118`	`+ return err`
`110`	`119`	`},`
`111`	`120`	`}`
`112`	`121`
`@@ -206,6 +215,10 @@ func (h *evalCommandHandler) runEvaluation(ctx context.Context) error {`
`206`	`215`	`h.printSummary(passedTests, totalTests, passRate)`
`207`	`216`	`}`
`208`	`217`
	`218`	`+ if totalTests-passedTests > 0 {`
	`219`	`+ return FailedTests`
	`220`	`+ }`
	`221`	`+`
`209`	`222`	`return nil`
`210`	`223`	`}`
`211`	`224`
`@@ -249,8 +262,6 @@ func (h *evalCommandHandler) printSummary(passedTests, totalTests int, passRate`
`249`	`262`
`250`	`263`	`if passedTests == totalTests {`
`251`	`264`	`h.cfg.WriteToOut("🎉 All tests passed!\n")`
`252`		`- } else {`
`253`		`- h.cfg.WriteToOut("❌ Some tests failed.\n")`
`254`	`265`	`}`
`255`	`266`	`}`
`256`	`267`