@@ -511,6 +511,7 @@ description: Testing JSON with failing evaluators
511511model: openai/gpt-4o
512512testData:
513513 - input: "hello"
514+ expected: "hello world"
514515messages:
515516 - role: user
516517 content: "{{input}}"
@@ -553,18 +554,94 @@ evaluators:
553554
554555 output := out .String ()
555556
557+ // Verify JSON structure
556558 var result EvaluationSummary
557559 err = json .Unmarshal ([]byte (output ), & result )
558560 require .NoError (t , err )
559561
560- // Verify failing test is properly represented
561- require .Equal (t , 1 , result .Summary .TotalTests )
562- require .Equal (t , 0 , result .Summary .PassedTests )
563- require .Equal (t , 1 , result .Summary .FailedTests )
564- require .Equal (t , 0.0 , result .Summary .PassRate )
562+ // Verify JSON doesn't contain human-readable text
563+ require .NotContains (t , output , "Running evaluation:" )
564+ })
565+
566+ t .Run ("eval with responseFormat and jsonSchema" , func (t * testing.T ) {
567+ const yamlBody = `
568+ name: JSON Schema Evaluation
569+ description: Testing responseFormat and jsonSchema in eval
570+ model: openai/gpt-4o
571+ responseFormat: json_schema
572+ jsonSchema:
573+ name: response_schema
574+ strict: true
575+ schema:
576+ type: object
577+ properties:
578+ message:
579+ type: string
580+ description: The response message
581+ confidence:
582+ type: number
583+ description: Confidence score
584+ required:
585+ - message
586+ additionalProperties: false
587+ testData:
588+ - input: "hello"
589+ expected: "hello world"
590+ messages:
591+ - role: user
592+ content: "Respond to: {{input}}"
593+ evaluators:
594+ - name: contains-message
595+ string:
596+ contains: "message"
597+ `
565598
566- require .Len (t , result .TestResults , 1 )
567- require .False (t , result .TestResults [0 ].EvaluationResults [0 ].Passed )
568- require .Equal (t , 0.0 , result .TestResults [0 ].EvaluationResults [0 ].Score )
599+ tmpDir := t .TempDir ()
600+ promptFile := filepath .Join (tmpDir , "test.prompt.yml" )
601+ err := os .WriteFile (promptFile , []byte (yamlBody ), 0644 )
602+ require .NoError (t , err )
603+
604+ client := azuremodels .NewMockClient ()
605+ var capturedRequest azuremodels.ChatCompletionOptions
606+ client .MockGetChatCompletionStream = func (ctx context.Context , req azuremodels.ChatCompletionOptions , org string ) (* azuremodels.ChatCompletionResponse , error ) {
607+ capturedRequest = req
608+ response := `{"message": "hello world", "confidence": 0.95}`
609+ reader := sse .NewMockEventReader ([]azuremodels.ChatCompletion {
610+ {
611+ Choices : []azuremodels.ChatChoice {
612+ {
613+ Message : & azuremodels.ChatChoiceMessage {
614+ Content : & response ,
615+ },
616+ },
617+ },
618+ },
619+ })
620+ return & azuremodels.ChatCompletionResponse {Reader : reader }, nil
621+ }
622+
623+ out := new (bytes.Buffer )
624+ cfg := command .NewConfig (out , out , client , true , 100 )
625+
626+ cmd := NewEvalCommand (cfg )
627+ cmd .SetArgs ([]string {promptFile })
628+
629+ err = cmd .Execute ()
630+ require .NoError (t , err )
631+
632+ // Verify that responseFormat and jsonSchema were included in the request
633+ require .NotNil (t , capturedRequest .ResponseFormat )
634+ require .Equal (t , "json_schema" , capturedRequest .ResponseFormat .Type )
635+ require .NotNil (t , capturedRequest .ResponseFormat .JsonSchema )
636+
637+ schema := * capturedRequest .ResponseFormat .JsonSchema
638+ require .Equal (t , "response_schema" , schema ["name" ])
639+ require .Equal (t , true , schema ["strict" ])
640+ require .Contains (t , schema , "schema" )
641+
642+ // Verify the test passed
643+ output := out .String ()
644+ require .Contains (t , output , "✓ PASSED" )
645+ require .Contains (t , output , "🎉 All tests passed!" )
569646 })
570647}
0 commit comments