@@ -23,12 +23,14 @@ import io.cequence.openaiscala.domain.response.{
2323 ChatCompletionChunkResponse ,
2424 ChatCompletionResponse ,
2525 ChunkMessageSpec ,
26+ CompletionTokenDetails ,
2627 PromptTokensDetails ,
2728 UsageInfo => OpenAIUsageInfo
2829}
2930import io .cequence .openaiscala .domain .settings .{
3031 ChatCompletionResponseFormatType ,
31- CreateChatCompletionSettings
32+ CreateChatCompletionSettings ,
33+ ReasoningEffort
3234}
3335import io .cequence .openaiscala .domain .settings .CreateChatCompletionSettingsOps .RichCreateChatCompletionSettings
3436import io .cequence .openaiscala .domain .{
@@ -43,11 +45,12 @@ import io.cequence.openaiscala.domain.{
4345 UserMessage => OpenAIUserMessage ,
4446 UserSeqMessage => OpenAIUserSeqMessage
4547}
48+ import io .cequence .openaiscala .service .HasOpenAIConfig
4649import org .slf4j .LoggerFactory
4750
4851import java .{util => ju }
4952
50- package object impl extends AnthropicServiceConsts {
53+ package object impl extends AnthropicServiceConsts with HasOpenAIConfig {
5154
5255 private val logger : Logger = Logger (
5356 LoggerFactory .getLogger(" io.cequence.openaiscala.anthropic.service.impl" )
@@ -168,10 +171,61 @@ package object impl extends AnthropicServiceConsts {
168171 }
169172 }
170173
174+ /**
175+ * Converts OpenAI's reasoning_effort to Anthropic's thinking budget using the configured
176+ * mapping.
177+ *
178+ * @param reasoningEffort
179+ * The reasoning effort level from OpenAI settings
180+ * @return
181+ * Thinking budget in tokens, or None if reasoning_effort is None or budget is 0
182+ */
183+ private def toThinkingBudget (
184+ reasoningEffort : Option [ReasoningEffort ]
185+ ): Option [Int ] = {
186+ import io .cequence .wsclient .ConfigImplicits ._
187+
188+ reasoningEffort.flatMap { effort =>
189+ val effortKey = effort.toString.toLowerCase
190+ val configPath =
191+ s " $configPrefix.reasoning-effort-thinking-budget-mapping. $effortKey.anthropic "
192+
193+ clientConfig.optionalInt(configPath) match {
194+ case Some (budget) =>
195+ logger.debug(
196+ s " Converting reasoning effort ' $effortKey' to Anthropic thinking budget: $budget"
197+ )
198+
199+ if (budget == 0 ) {
200+ // budget = 0 means "don't enable extended thinking at all"
201+ // Return None to omit the thinking block instead of sending budget_tokens=0
202+ None
203+ } else if (budget < 1024 ) {
204+ // Anthropic minimum is 1024
205+ logger.warn(
206+ s " Thinking budget $budget is below Anthropic minimum of 1024. Clamping to 1024. "
207+ )
208+ Some (1024 )
209+ } else {
210+ Some (budget)
211+ }
212+
213+ case None =>
214+ logger.warn(
215+ s " No thinking budget mapping found for reasoning effort ' $effortKey' in config path: $configPath"
216+ )
217+ None
218+ }
219+ }
220+ }
221+
171222 def toAnthropicSettings (
172223 settings : CreateChatCompletionSettings
173224 ): AnthropicCreateMessageSettings = {
174- val thinkingBudget = settings.anthropicThinkingBudgetTokens
225+ // Prioritize explicit thinking budget, fall back to reasoning_effort conversion
226+ val thinkingBudget = settings.anthropicThinkingBudgetTokens.orElse(
227+ toThinkingBudget(settings.reasoning_effort)
228+ )
175229
176230 // handle json schema
177231 val responseFormat =
@@ -200,12 +254,31 @@ package object impl extends AnthropicServiceConsts {
200254 } else
201255 None
202256
257+ // When thinking is enabled, temperature must be 1.0
258+ val temperature = thinkingBudget match {
259+ case Some (_) =>
260+ // Thinking is enabled
261+ settings.temperature match {
262+ case Some (temp) if temp != 1.0 =>
263+ logger.warn(
264+ s " Temperature is set to $temp but thinking is enabled. Anthropic requires temperature=1 when using extended thinking. Overriding to 1.0. "
265+ )
266+ Some (1.0 )
267+ case other =>
268+ // No temperature set or already 1.0, keep as is
269+ other
270+ }
271+ case None =>
272+ // No thinking, use original temperature
273+ settings.temperature
274+ }
275+
203276 AnthropicCreateMessageSettings (
204277 model = settings.model,
205278 max_tokens = settings.max_tokens.getOrElse(DefaultSettings .CreateMessage .max_tokens),
206279 metadata = Map .empty,
207280 stop_sequences = settings.stop,
208- temperature = settings. temperature,
281+ temperature = temperature,
209282 top_p = settings.top_p,
210283 top_k = None ,
211284 thinking = thinkingBudget.map(ThinkingSettings (_)),
@@ -215,7 +288,15 @@ package object impl extends AnthropicServiceConsts {
215288 )
216289 }
217290
218- def toOpenAI (response : CreateMessageResponse ): ChatCompletionResponse =
291+ def toOpenAI (response : CreateMessageResponse ): ChatCompletionResponse = {
292+ // Extract thinking blocks and estimate token count
293+ val thinkingText = response.thinkingText
294+ val thinkingTokens = if (thinkingText.nonEmpty) {
295+ Some (estimateTokenCount(thinkingText))
296+ } else {
297+ None
298+ }
299+
219300 ChatCompletionResponse (
220301 id = response.id,
221302 created = new ju.Date (),
@@ -229,9 +310,10 @@ package object impl extends AnthropicServiceConsts {
229310 logprobs = None
230311 )
231312 ),
232- usage = Some (toOpenAI(response.usage)),
313+ usage = Some (toOpenAI(response.usage, thinkingTokens )),
233314 originalResponse = Some (response)
234315 )
316+ }
235317
236318 def toOpenAI (blockDelta : ContentBlockDelta ): ChatCompletionChunkResponse =
237319 ChatCompletionChunkResponse (
@@ -258,7 +340,7 @@ package object impl extends AnthropicServiceConsts {
258340 def toOpenAIAssistantMessage (content : ContentBlocks ): OpenAIAssistantMessage = {
259341 val textContents = content.blocks.collect { case ContentBlockBase (TextBlock (text, _), _) =>
260342 text
261- } // TODO
343+ }
262344 // TODO: log if there is more than one text content
263345 if (textContents.isEmpty) {
264346 throw new IllegalArgumentException (" No text content found in the response" )
@@ -270,7 +352,10 @@ package object impl extends AnthropicServiceConsts {
270352 private def concatenateMessages (messageContent : Seq [String ]): String =
271353 messageContent.mkString(" \n " )
272354
273- def toOpenAI (usageInfo : UsageInfo ): OpenAIUsageInfo = {
355+ def toOpenAI (
356+ usageInfo : UsageInfo ,
357+ thinkingTokens : Option [Int ] = None
358+ ): OpenAIUsageInfo = {
274359 val promptTokens =
275360 usageInfo.input_tokens +
276361 usageInfo.cache_creation_input_tokens.getOrElse(0 ) +
@@ -286,7 +371,23 @@ package object impl extends AnthropicServiceConsts {
286371 audio_tokens = None
287372 )
288373 ),
289- completion_tokens_details = None
374+ completion_tokens_details = thinkingTokens.map { tokens =>
375+ CompletionTokenDetails (
376+ reasoning_tokens = tokens,
377+ accepted_prediction_tokens = None ,
378+ rejected_prediction_tokens = None
379+ )
380+ }
290381 )
291382 }
383+
384+ /**
385+ * Estimates the number of tokens in a given text using a simple heuristic. This is an
386+ * approximation: tokens ≈ characters / 4 (average for English text) For more accurate
387+ * counting, a proper tokenizer should be used.
388+ */
389+ // TODO
390+ private def estimateTokenCount (text : String ): Int = {
391+ math.max(1 , text.length / 4 )
392+ }
292393}
0 commit comments