Skip to content

Commit 35cb4fa

Browse files
committed
Mapping/conversion for OpenAI's reasoning effort (configurable via a config file) for Anthropic + examples!
1 parent 2b4a909 commit 35cb4fa

File tree

9 files changed

+560
-14
lines changed

9 files changed

+560
-14
lines changed

anthropic-client/src/main/scala/io/cequence/openaiscala/anthropic/service/impl/package.scala

Lines changed: 110 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -23,12 +23,14 @@ import io.cequence.openaiscala.domain.response.{
2323
ChatCompletionChunkResponse,
2424
ChatCompletionResponse,
2525
ChunkMessageSpec,
26+
CompletionTokenDetails,
2627
PromptTokensDetails,
2728
UsageInfo => OpenAIUsageInfo
2829
}
2930
import io.cequence.openaiscala.domain.settings.{
3031
ChatCompletionResponseFormatType,
31-
CreateChatCompletionSettings
32+
CreateChatCompletionSettings,
33+
ReasoningEffort
3234
}
3335
import io.cequence.openaiscala.domain.settings.CreateChatCompletionSettingsOps.RichCreateChatCompletionSettings
3436
import io.cequence.openaiscala.domain.{
@@ -43,11 +45,12 @@ import io.cequence.openaiscala.domain.{
4345
UserMessage => OpenAIUserMessage,
4446
UserSeqMessage => OpenAIUserSeqMessage
4547
}
48+
import io.cequence.openaiscala.service.HasOpenAIConfig
4649
import org.slf4j.LoggerFactory
4750

4851
import java.{util => ju}
4952

50-
package object impl extends AnthropicServiceConsts {
53+
package object impl extends AnthropicServiceConsts with HasOpenAIConfig {
5154

5255
private val logger: Logger = Logger(
5356
LoggerFactory.getLogger("io.cequence.openaiscala.anthropic.service.impl")
@@ -168,10 +171,61 @@ package object impl extends AnthropicServiceConsts {
168171
}
169172
}
170173

174+
/**
175+
* Converts OpenAI's reasoning_effort to Anthropic's thinking budget using the configured
176+
* mapping.
177+
*
178+
* @param reasoningEffort
179+
* The reasoning effort level from OpenAI settings
180+
* @return
181+
* Thinking budget in tokens, or None if reasoning_effort is None or budget is 0
182+
*/
183+
private def toThinkingBudget(
184+
reasoningEffort: Option[ReasoningEffort]
185+
): Option[Int] = {
186+
import io.cequence.wsclient.ConfigImplicits._
187+
188+
reasoningEffort.flatMap { effort =>
189+
val effortKey = effort.toString.toLowerCase
190+
val configPath =
191+
s"$configPrefix.reasoning-effort-thinking-budget-mapping.$effortKey.anthropic"
192+
193+
clientConfig.optionalInt(configPath) match {
194+
case Some(budget) =>
195+
logger.debug(
196+
s"Converting reasoning effort '$effortKey' to Anthropic thinking budget: $budget"
197+
)
198+
199+
if (budget == 0) {
200+
// budget = 0 means "don't enable extended thinking at all"
201+
// Return None to omit the thinking block instead of sending budget_tokens=0
202+
None
203+
} else if (budget < 1024) {
204+
// Anthropic minimum is 1024
205+
logger.warn(
206+
s"Thinking budget $budget is below Anthropic minimum of 1024. Clamping to 1024."
207+
)
208+
Some(1024)
209+
} else {
210+
Some(budget)
211+
}
212+
213+
case None =>
214+
logger.warn(
215+
s"No thinking budget mapping found for reasoning effort '$effortKey' in config path: $configPath"
216+
)
217+
None
218+
}
219+
}
220+
}
221+
171222
def toAnthropicSettings(
172223
settings: CreateChatCompletionSettings
173224
): AnthropicCreateMessageSettings = {
174-
val thinkingBudget = settings.anthropicThinkingBudgetTokens
225+
// Prioritize explicit thinking budget, fall back to reasoning_effort conversion
226+
val thinkingBudget = settings.anthropicThinkingBudgetTokens.orElse(
227+
toThinkingBudget(settings.reasoning_effort)
228+
)
175229

176230
// handle json schema
177231
val responseFormat =
@@ -200,12 +254,31 @@ package object impl extends AnthropicServiceConsts {
200254
} else
201255
None
202256

257+
// When thinking is enabled, temperature must be 1.0
258+
val temperature = thinkingBudget match {
259+
case Some(_) =>
260+
// Thinking is enabled
261+
settings.temperature match {
262+
case Some(temp) if temp != 1.0 =>
263+
logger.warn(
264+
s"Temperature is set to $temp but thinking is enabled. Anthropic requires temperature=1 when using extended thinking. Overriding to 1.0."
265+
)
266+
Some(1.0)
267+
case other =>
268+
// No temperature set or already 1.0, keep as is
269+
other
270+
}
271+
case None =>
272+
// No thinking, use original temperature
273+
settings.temperature
274+
}
275+
203276
AnthropicCreateMessageSettings(
204277
model = settings.model,
205278
max_tokens = settings.max_tokens.getOrElse(DefaultSettings.CreateMessage.max_tokens),
206279
metadata = Map.empty,
207280
stop_sequences = settings.stop,
208-
temperature = settings.temperature,
281+
temperature = temperature,
209282
top_p = settings.top_p,
210283
top_k = None,
211284
thinking = thinkingBudget.map(ThinkingSettings(_)),
@@ -215,7 +288,15 @@ package object impl extends AnthropicServiceConsts {
215288
)
216289
}
217290

218-
def toOpenAI(response: CreateMessageResponse): ChatCompletionResponse =
291+
def toOpenAI(response: CreateMessageResponse): ChatCompletionResponse = {
292+
// Extract thinking blocks and estimate token count
293+
val thinkingText = response.thinkingText
294+
val thinkingTokens = if (thinkingText.nonEmpty) {
295+
Some(estimateTokenCount(thinkingText))
296+
} else {
297+
None
298+
}
299+
219300
ChatCompletionResponse(
220301
id = response.id,
221302
created = new ju.Date(),
@@ -229,9 +310,10 @@ package object impl extends AnthropicServiceConsts {
229310
logprobs = None
230311
)
231312
),
232-
usage = Some(toOpenAI(response.usage)),
313+
usage = Some(toOpenAI(response.usage, thinkingTokens)),
233314
originalResponse = Some(response)
234315
)
316+
}
235317

236318
def toOpenAI(blockDelta: ContentBlockDelta): ChatCompletionChunkResponse =
237319
ChatCompletionChunkResponse(
@@ -258,7 +340,7 @@ package object impl extends AnthropicServiceConsts {
258340
def toOpenAIAssistantMessage(content: ContentBlocks): OpenAIAssistantMessage = {
259341
val textContents = content.blocks.collect { case ContentBlockBase(TextBlock(text, _), _) =>
260342
text
261-
} // TODO
343+
}
262344
// TODO: log if there is more than one text content
263345
if (textContents.isEmpty) {
264346
throw new IllegalArgumentException("No text content found in the response")
@@ -270,7 +352,10 @@ package object impl extends AnthropicServiceConsts {
270352
private def concatenateMessages(messageContent: Seq[String]): String =
271353
messageContent.mkString("\n")
272354

273-
def toOpenAI(usageInfo: UsageInfo): OpenAIUsageInfo = {
355+
def toOpenAI(
356+
usageInfo: UsageInfo,
357+
thinkingTokens: Option[Int] = None
358+
): OpenAIUsageInfo = {
274359
val promptTokens =
275360
usageInfo.input_tokens +
276361
usageInfo.cache_creation_input_tokens.getOrElse(0) +
@@ -286,7 +371,23 @@ package object impl extends AnthropicServiceConsts {
286371
audio_tokens = None
287372
)
288373
),
289-
completion_tokens_details = None
374+
completion_tokens_details = thinkingTokens.map { tokens =>
375+
CompletionTokenDetails(
376+
reasoning_tokens = tokens,
377+
accepted_prediction_tokens = None,
378+
rejected_prediction_tokens = None
379+
)
380+
}
290381
)
291382
}
383+
384+
/**
385+
* Estimates the number of tokens in a given text using a simple heuristic. This is an
386+
* approximation: tokens ≈ characters / 4 (average for English text) For more accurate
387+
* counting, a proper tokenizer should be used.
388+
*/
389+
// TODO
390+
private def estimateTokenCount(text: String): Int = {
391+
math.max(1, text.length / 4)
392+
}
292393
}
Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,17 @@
1+
package io.cequence.openaiscala.gemini.domain
2+
3+
import io.cequence.wsclient.domain.EnumValue
4+
5+
sealed trait ThinkingLevel extends EnumValue
6+
7+
object ThinkingLevel {
8+
case object THINKING_LEVEL_UNSPECIFIED extends ThinkingLevel
9+
case object LOW extends ThinkingLevel
10+
case object HIGH extends ThinkingLevel
11+
12+
def values: Seq[ThinkingLevel] = Seq(
13+
THINKING_LEVEL_UNSPECIFIED,
14+
LOW,
15+
HIGH
16+
)
17+
}

openai-examples/src/main/resources/logback.xml

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,8 @@
1616
<logger name="PlayWSStreamClientEngine" level="INFO"/>
1717

1818
<logger name="OpenAIChatCompletionExtra" level="DEBUG"/>
19+
<logger name="io.cequence.openaiscala.anthropic.service.impl" level="DEBUG"/>
20+
<logger name="io.cequence.openaiscala.gemini.service.impl.OpenAIGeminiChatCompletionService" level="DEBUG"/>
1921

2022
<!--
2123
<logger name="com.ning.http.client" level="INFO"/>
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,72 @@
1+
package io.cequence.openaiscala.examples.anthropic
2+
3+
import io.cequence.openaiscala.domain.settings.{
4+
ChatCompletionResponseFormatType,
5+
CreateChatCompletionSettings,
6+
JsonSchemaDef
7+
}
8+
import io.cequence.openaiscala.domain.{
9+
JsonSchema,
10+
NonOpenAIModelId,
11+
SystemMessage,
12+
UserMessage
13+
}
14+
import io.cequence.openaiscala.examples.{ChatCompletionProvider, ExampleBase}
15+
import io.cequence.openaiscala.service.OpenAIChatCompletionExtra.OpenAIChatCompletionImplicits
16+
import io.cequence.openaiscala.service.OpenAIChatCompletionService
17+
import play.api.libs.json.{JsObject, Json}
18+
19+
import scala.concurrent.Future
20+
21+
// requires `openai-scala-anthropic-client` as a dependency and `ANTHROPIC_API_KEY` environment variable to be set
22+
object AnthropicCreateChatCompletionWithJsonSchemaAndOpenAIAdapter
23+
extends ExampleBase[OpenAIChatCompletionService] {
24+
25+
override val service: OpenAIChatCompletionService =
26+
ChatCompletionProvider.anthropic()
27+
28+
// Define the JSON schema for weather responses
29+
// Note: Anthropic will automatically set 'additionalProperties: false' on all objects where it's not specified
30+
private val weatherSchema: JsonSchema = JsonSchema.Object(
31+
properties = Seq(
32+
"response" -> JsonSchema.Array(
33+
items = JsonSchema.Object(
34+
properties = Seq(
35+
"city" -> JsonSchema.String(),
36+
"temperature" -> JsonSchema.String(),
37+
"weather" -> JsonSchema.String()
38+
),
39+
required = Seq("city", "temperature", "weather")
40+
)
41+
)
42+
),
43+
required = Seq("response")
44+
)
45+
46+
private val weatherSchemaDef = JsonSchemaDef(
47+
name = "weather_response",
48+
strict =
49+
true, // Note: strict mode is not supported by Anthropic but will be ignored gracefully
50+
structure = Left(weatherSchema)
51+
)
52+
53+
private val messages = Seq(
54+
SystemMessage("You are a helpful weather assistant that responds in JSON."),
55+
UserMessage("What is the weather like in Norway? List several cities.")
56+
)
57+
58+
override protected def run: Future[_] =
59+
service
60+
.createChatCompletionWithJSON[JsObject](
61+
messages = messages,
62+
settings = CreateChatCompletionSettings(
63+
model = NonOpenAIModelId.claude_sonnet_4_5_20250929,
64+
max_tokens = Some(16000),
65+
response_format_type = Some(ChatCompletionResponseFormatType.json_schema),
66+
jsonSchema = Some(weatherSchemaDef)
67+
)
68+
)
69+
.map { json =>
70+
println(Json.prettyPrint(json))
71+
}
72+
}

0 commit comments

Comments
 (0)