@@ -48,6 +48,8 @@ type EvaluationResult struct {
4848 Details string `json:"details,omitempty"`
4949}
5050
51+ var FailedTests = errors .New ("❌ Some tests failed." )
52+
5153// NewEvalCommand returns a new command to evaluate prompts against models
5254func NewEvalCommand (cfg * command.Config ) * cobra.Command {
5355 cmd := & cobra.Command {
@@ -106,7 +108,14 @@ func NewEvalCommand(cfg *command.Config) *cobra.Command {
106108 jsonOutput : jsonOutput ,
107109 }
108110
109- return handler .runEvaluation (cmd .Context ())
111+ err = handler .runEvaluation (cmd .Context ())
112+ if err == FailedTests {
113+ // Cobra by default will show the help message when an error occurs,
114+ // which is not what we want for failed evaluations.
115+ // Instead, we just want to exit with a non-zero code.
116+ cmd .SilenceUsage = true
117+ }
118+ return err
110119 },
111120 }
112121
@@ -206,6 +215,10 @@ func (h *evalCommandHandler) runEvaluation(ctx context.Context) error {
206215 h .printSummary (passedTests , totalTests , passRate )
207216 }
208217
218+ if totalTests - passedTests > 0 {
219+ return FailedTests
220+ }
221+
209222 return nil
210223}
211224
@@ -249,8 +262,6 @@ func (h *evalCommandHandler) printSummary(passedTests, totalTests int, passRate
249262
250263 if passedTests == totalTests {
251264 h .cfg .WriteToOut ("🎉 All tests passed!\n " )
252- } else {
253- h .cfg .WriteToOut ("❌ Some tests failed.\n " )
254265 }
255266}
256267
0 commit comments