Skip to content

Commit fcd5f5b

Browse files
committed
OpenAI - grader functions/endpoints added: runGrader, validateGrader with examples
1 parent 7dc4cc2 commit fcd5f5b

File tree

8 files changed

+359
-3
lines changed

8 files changed

+359
-3
lines changed

openai-client/src/main/scala/io/cequence/openaiscala/service/impl/EndPoint.scala

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -26,6 +26,7 @@ object EndPoint {
2626
case object vector_stores extends EndPoint
2727
case object runs extends EndPoint
2828
case object responses extends EndPoint
29+
case object graders extends EndPoint("fine_tuning/alpha/graders")
2930
}
3031

3132
sealed abstract class Param(value: String = "") extends NamedEnumValue(value)
Lines changed: 55 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,55 @@
1+
package io.cequence.openaiscala.service.impl
2+
3+
import io.cequence.openaiscala.OpenAIScalaClientException
4+
import io.cequence.openaiscala.domain.graders.Grader
5+
import io.cequence.openaiscala.domain.graders.JsonFormats._
6+
import io.cequence.openaiscala.service.OpenAIGraderService
7+
import io.cequence.wsclient.JsonUtil.StringAnyMapFormat
8+
import io.cequence.wsclient.ResponseImplicits._
9+
import play.api.libs.json.{JsObject, Json}
10+
11+
import scala.concurrent.Future
12+
13+
trait OpenAIGraderServiceImpl extends OpenAIGraderService with OpenAIServiceWSBase {
14+
15+
override def runGrader(
16+
grader: Grader,
17+
modelSample: String,
18+
item: Map[String, Any]
19+
): Future[String] = {
20+
val body = Json.obj(
21+
"grader" -> Json.toJson(grader),
22+
"model_sample" -> modelSample,
23+
"item" -> Json.toJson(item)(StringAnyMapFormat)
24+
)
25+
26+
execPOSTBody(
27+
EndPoint.graders,
28+
endPointParam = Some("run"),
29+
body = body
30+
).map(_.string)
31+
}
32+
33+
override def validateGrader(
34+
grader: Grader
35+
): Future[Grader] = {
36+
val body = Json.obj(
37+
"grader" -> Json.toJson(grader)
38+
)
39+
40+
execPOSTBody(
41+
EndPoint.graders,
42+
endPointParam = Some("validate"),
43+
body = body
44+
).map { response =>
45+
val json = response.asSafeJson[JsObject]
46+
(json \ "grader").asOpt[Grader] match {
47+
case Some(validatedGrader) => validatedGrader
48+
case None =>
49+
throw new OpenAIScalaClientException(
50+
s"Validated grader not found in response. The response JSON: ${json}"
51+
)
52+
}
53+
}
54+
}
55+
}

openai-client/src/main/scala/io/cequence/openaiscala/service/impl/OpenAIServiceImpl.scala

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -30,6 +30,7 @@ import scala.util.{Failure, Success, Try}
3030
private[service] trait OpenAIServiceImpl
3131
extends OpenAICoreServiceImpl
3232
with OpenAIResponseServiceImpl
33+
with OpenAIGraderServiceImpl
3334
with OpenAIService
3435
with HandleOpenAIErrorCodes { // TODO: should HandleOpenAIErrorCodes be here?
3536

Lines changed: 66 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,66 @@
1+
package io.cequence.openaiscala.service
2+
3+
import io.cequence.openaiscala.domain.graders.Grader
4+
import scala.concurrent.Future
5+
6+
/**
7+
* Service interface for OpenAI Graders API endpoints.
8+
*
9+
* The Graders API provides a way to evaluate model outputs against specific criteria using
10+
* graders.
11+
*
12+
* - Available Functions:
13+
*
14+
* '''Run Grader'''
15+
* - [[runGrader]] - Runs a grader to evaluate a model sample against a dataset item
16+
* - [[https://platform.openai.com/docs/api-reference/graders/run API Doc]]
17+
*
18+
* '''Validate Grader'''
19+
* - [[validateGrader]] - Validates a grader
20+
* - [[https://platform.openai.com/docs/api-reference/graders/validate API Doc]]
21+
*
22+
* @see
23+
* <a href="https://platform.openai.com/docs/api-reference/graders">OpenAI Responses API
24+
* Doc</a>
25+
*/
26+
trait OpenAIGraderService extends OpenAIServiceConsts {
27+
28+
/**
29+
* Runs a grader to evaluate a model sample against a dataset item.
30+
*
31+
* @param grader
32+
* The grader configuration used for evaluation
33+
* @param modelSample
34+
* The model sample to be evaluated. This value will be used to populate the sample
35+
* namespace. The output_json variable will be populated if the model sample is a valid
36+
* JSON string
37+
* @param item
38+
* The dataset item provided to the grader. This will be used to populate the item
39+
* namespace
40+
* @return
41+
* The evaluation result as a string
42+
*
43+
* @see
44+
* <a href="https://platform.openai.com/docs/api-reference/graders/run">OpenAI Doc</a>
45+
*/
46+
def runGrader(
47+
grader: Grader,
48+
modelSample: String,
49+
item: Map[String, Any]
50+
): Future[String]
51+
52+
/**
53+
* Validates a grader.
54+
*
55+
* @param grader
56+
* The grader used for the fine-tuning job.
57+
* @return
58+
* The validated grader object.
59+
*
60+
* @see
61+
* <a href="https://platform.openai.com/docs/api-reference/graders/validate">OpenAI Doc</a>
62+
*/
63+
def validateGrader(
64+
grader: Grader
65+
): Future[Grader]
66+
}

openai-core/src/main/scala/io/cequence/openaiscala/service/OpenAIResponsesService.scala

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -19,7 +19,7 @@ import scala.concurrent.Future
1919
* The Responses API provides a unified interface for creating and managing model responses
2020
* with support for various tools including file search, web search, and custom functions.
2121
*
22-
* ==Available Functions==
22+
* - Available Functions:
2323
*
2424
* '''Create Response'''
2525
* - [[createModelResponse]] - Creates a new model response from inputs with support for

openai-core/src/main/scala/io/cequence/openaiscala/service/OpenAIService.scala

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -43,12 +43,16 @@ import scala.concurrent.Future
4343
* and deleteAssistant
4444
* - '''Assistant Files''': createAssistantFile, listAssistantFiles, retrieveAssistantFile,
4545
* and deleteAssistantFile
46-
* - ''''Responses''' - createModelResponse, getModelResponse, deleteModelResponse,
46+
* - '''Responses''': createModelResponse, getModelResponse, deleteModelResponse,
4747
* cancelModelResponse, getModelResponseInputTokenCounts, and listModelResponseInputItems
48+
* - '''Graders''': runGrader, and validateGrader
4849
* @since Sep
4950
* 2024
5051
*/
51-
trait OpenAIService extends OpenAICoreService with OpenAIResponsesService {
52+
trait OpenAIService
53+
extends OpenAICoreService
54+
with OpenAIResponsesService
55+
with OpenAIGraderService {
5256

5357
/**
5458
* Retrieves a model instance, providing basic information about the model such as the owner
Lines changed: 68 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,68 @@
1+
package io.cequence.openaiscala.examples.graders
2+
3+
import io.cequence.openaiscala.domain.{ChatRole, ModelId}
4+
import io.cequence.openaiscala.domain.graders._
5+
import io.cequence.openaiscala.examples.Example
6+
7+
import scala.concurrent.Future
8+
9+
object RunGrader extends Example {
10+
11+
// Only the following models are allowed:
12+
// - gpt-4o-2024-08-06, gpt-4o-mini-2024-07-18,
13+
// - gpt-4.1-2025-04-14, gpt-4.1-mini-2025-04-14, gpt-4.1-nano-2025-04-14,
14+
// - o1-2024-12-17, o3-mini-2025-01-31, o4-mini-2025-04-16, o3-2025-04-16
15+
16+
// Try gpt-4o-mini first as it may have fewer restrictions
17+
val gradingModel = ModelId.gpt_4o_mini_2024_07_18
18+
19+
def run: Future[Unit] = {
20+
// Define a ScoreModelGrader to evaluate the quality of a model's response
21+
val grader = ScoreModelGrader(
22+
input = Seq(
23+
GraderModelInput(
24+
content = GraderInputContent.TextString(
25+
"Rate the helpfulness of the following response on a scale from 0 to 1:"
26+
),
27+
role = ChatRole.System
28+
),
29+
GraderModelInput(
30+
content = GraderInputContent.InputText("{{item.question}}"),
31+
role = ChatRole.User
32+
),
33+
GraderModelInput(
34+
content = GraderInputContent.OutputText("{{sample.output_json}}"),
35+
role = ChatRole.Assistant
36+
)
37+
),
38+
model = gradingModel,
39+
name = "helpfulness_scorer",
40+
range = Seq(0.0, 1.0),
41+
samplingParams = Some(
42+
SamplingParams(
43+
// temperature = Some(0.3),
44+
maxCompletionsTokens = Some(100)
45+
)
46+
)
47+
)
48+
49+
// Sample model output to be evaluated
50+
val modelSample = """{"answer": "The capital of France is Paris."}"""
51+
52+
// Dataset item containing the question
53+
val item = Map[String, Any](
54+
"question" -> "What is the capital of France?"
55+
)
56+
57+
// Run the grader
58+
service
59+
.runGrader(
60+
grader = grader,
61+
modelSample = modelSample,
62+
item = item
63+
)
64+
.map { result =>
65+
println(s"Grader evaluation result: $result")
66+
}
67+
}
68+
}
Lines changed: 161 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,161 @@
1+
package io.cequence.openaiscala.examples.graders
2+
3+
import io.cequence.openaiscala.domain.{ChatRole, ModelId}
4+
import io.cequence.openaiscala.domain.graders._
5+
import io.cequence.openaiscala.domain.settings.ReasoningEffort
6+
import io.cequence.openaiscala.examples.Example
7+
8+
import scala.concurrent.Future
9+
10+
object ValidateGrader extends Example {
11+
12+
private val graders = Seq(
13+
StringGrader(
14+
input = "{{sample.output_json}}",
15+
name = "exact_match_validator",
16+
operation = StringCheckOperation.eq,
17+
reference = "{{item.expected_answer}}"
18+
),
19+
ScoreModelGrader(
20+
input = Seq(
21+
GraderModelInput(
22+
role = ChatRole.User,
23+
content = GraderInputContent.TextString(
24+
"Score how close the reference answer is to the model answer. Score 1.0 if they are the same and 0.0 if they are different." +
25+
" Return just a floating point score\n\n" +
26+
" Reference answer: {{item.label}}\n\n" +
27+
" Model answer: {{sample.output_text}}"
28+
)
29+
)
30+
),
31+
model = ModelId.o4_mini_2025_04_16,
32+
name = "Example score model grader",
33+
samplingParams = Some(
34+
SamplingParams(
35+
temperature = Some(1.0),
36+
topP = Some(1.0),
37+
seed = Some(42),
38+
maxCompletionsTokens = Some(32768),
39+
reasoningEffort = Some(ReasoningEffort.medium)
40+
)
41+
)
42+
),
43+
LabelModelGrader(
44+
input = Seq(
45+
GraderModelInput(
46+
role = ChatRole.System,
47+
content = GraderInputContent.InputText(
48+
"Classify the sentiment of the following statement as one of positive, neutral, or negative"
49+
)
50+
),
51+
GraderModelInput(
52+
role = ChatRole.User,
53+
content = GraderInputContent.InputText(
54+
"Statement: {{item.response}}"
55+
)
56+
)
57+
),
58+
labels = Seq("positive", "neutral", "negative"),
59+
model = ModelId.gpt_4o_2024_08_06,
60+
name = "First label grader",
61+
passingLabels = Seq("positive")
62+
),
63+
PythonGrader(
64+
imageTag = "2025-05-08",
65+
name = "Example python grader",
66+
source = """
67+
def grade(sample: dict, item: dict) -> float:
68+
\"\"\"
69+
Returns 1.0 if `output_text` equals `label`, otherwise 0.0.
70+
\"\"\"
71+
output = sample.get("output_text")
72+
label = item.get("label")
73+
return 1.0 if output == label else 0.0
74+
"""
75+
),
76+
MultiGrader(
77+
name = "example multi grader",
78+
graders = Seq(
79+
TextSimilarityGrader(
80+
input = "The graded text",
81+
name = "example text similarity grader",
82+
reference = "The reference text",
83+
evaluationMetric = TextSimilarityEvaluationMetric.fuzzy_match
84+
),
85+
StringGrader(
86+
input = "{{sample.output_text}}",
87+
name = "Example string check grader",
88+
operation = StringCheckOperation.eq,
89+
reference = "{{item.label}}"
90+
)
91+
),
92+
calculateOutput = "0.5 * text_similarity_score + 0.5 * string_check_score)"
93+
)
94+
)
95+
96+
def run: Future[Unit] = {
97+
// Iterate through all graders and validate each one
98+
val validationFutures = graders.zipWithIndex.map { case (grader, index) =>
99+
println(
100+
s"\n[${index + 1}/${graders.size}] Validating: ${grader.`type`} - ${grader match {
101+
case sg: StringGrader => sg.name
102+
case smg: ScoreModelGrader => smg.name
103+
case lmg: LabelModelGrader => lmg.name
104+
case pg: PythonGrader => pg.name
105+
case mg: MultiGrader => mg.name
106+
case _ => "Unknown"
107+
}}"
108+
)
109+
110+
service
111+
.validateGrader(grader)
112+
.map { validatedGrader =>
113+
println(s"✓ Grader validated successfully:")
114+
println(s" Type: ${validatedGrader.`type`}")
115+
116+
validatedGrader match {
117+
case sg: StringGrader =>
118+
println(s" Name: ${sg.name}")
119+
println(s" Operation: ${sg.operation}")
120+
println(s" Input: ${sg.input}")
121+
println(s" Reference: ${sg.reference}")
122+
123+
case smg: ScoreModelGrader =>
124+
println(s" Name: ${smg.name}")
125+
println(s" Model: ${smg.model}")
126+
println(s" Input messages: ${smg.input.size}")
127+
println(s" Range: ${smg.range}")
128+
129+
case lmg: LabelModelGrader =>
130+
println(s" Name: ${lmg.name}")
131+
println(s" Model: ${lmg.model}")
132+
println(s" Labels: ${lmg.labels.mkString(", ")}")
133+
println(s" Passing labels: ${lmg.passingLabels.mkString(", ")}")
134+
135+
case pg: PythonGrader =>
136+
println(s" Name: ${pg.name}")
137+
println(s" Image tag: ${pg.imageTag}")
138+
println(s" Source code length: ${pg.source.length} chars")
139+
140+
case mg: MultiGrader =>
141+
println(s" Name: ${mg.name}")
142+
println(s" Number of sub-graders: ${mg.graders.size}")
143+
println(s" Calculate output: ${mg.calculateOutput}")
144+
145+
case other =>
146+
println(s" Grader: $other")
147+
}
148+
}
149+
.recover { case e: Exception =>
150+
println(s"✗ Validation failed: ${e.getMessage}")
151+
}
152+
}
153+
154+
// Wait for all validations to complete
155+
Future.sequence(validationFutures).map { _ =>
156+
println("\n" + "=" * 60)
157+
println(s"Validation complete: ${graders.size} grader(s) processed")
158+
println("=" * 60)
159+
}
160+
}
161+
}

0 commit comments

Comments
 (0)