Skip to content

Commit 2b4a909

Browse files
committed
Thinking support for gemini and mapping/conversion for OpenAI's reasoning effort (configurable via a config file)
1 parent cfff413 commit 2b4a909

File tree

6 files changed

+192
-31
lines changed

6 files changed

+192
-31
lines changed

google-gemini-client/src/main/scala/io/cequence/openaiscala/gemini/JsonFormats.scala

Lines changed: 33 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
package io.cequence.openaiscala.gemini
22

3+
import com.typesafe.scalalogging.Logger
34
import io.cequence.openaiscala.OpenAIScalaClientException
45
import io.cequence.openaiscala.gemini.domain.Expiration.{ExpireTime, TTL}
56
import io.cequence.openaiscala.gemini.domain.response._
@@ -11,11 +12,14 @@ import io.cequence.wsclient.JsonUtil
1112
import io.cequence.wsclient.JsonUtil.enumFormat
1213
import play.api.libs.functional.syntax._
1314
import play.api.libs.json._
15+
import org.slf4j.LoggerFactory
1416

1517
object JsonFormats extends JsonFormats
1618

1719
trait JsonFormats {
1820

21+
private val logger: Logger = Logger(LoggerFactory.getLogger(this.getClass))
22+
1923
// Content and Parts
2024
implicit val chatRoleFormat: Format[ChatRole] = enumFormat(ChatRole.values: _*)
2125

@@ -60,19 +64,32 @@ trait JsonFormats {
6064
}
6165

6266
implicit val partReads: Reads[Part] = { (json: JsValue) =>
63-
json.validate[JsObject].map { (jsonObject: JsObject) =>
64-
assert(jsonObject.fields.size == 1)
65-
val (prefixFieldName, prefixJson) = jsonObject.fields.head
67+
json.validate[JsObject].flatMap { (jsonObject: JsObject) =>
68+
// Filter out the thoughtSignature field if present (used for thinking blocks)
69+
val fields = jsonObject.fields.filterNot(_._1 == "thoughtSignature")
6670

67-
PartPrefix.of(prefixFieldName) match {
68-
case PartPrefix.text => json.as[Part.Text]
69-
case PartPrefix.inlineData => prefixJson.as[Part.InlineData]
70-
case PartPrefix.functionCall => prefixJson.as[Part.FunctionCall]
71-
case PartPrefix.functionResponse => prefixJson.as[Part.FunctionResponse]
72-
case PartPrefix.fileData => prefixJson.as[Part.FileData]
73-
case PartPrefix.executableCode => prefixJson.as[Part.ExecutableCode]
74-
case PartPrefix.codeExecutionResult => prefixJson.as[Part.CodeExecutionResult]
75-
case _ => throw new OpenAIScalaClientException(s"Unknown part type: $prefixFieldName")
71+
if (fields.isEmpty) {
72+
JsError("Part object has no fields after filtering thoughtSignature")
73+
} else {
74+
// Warn if there are multiple fields (unexpected structure)
75+
if (fields.size > 1) {
76+
logger.warn(
77+
s"Part object has ${fields.size} fields (expected 1): ${fields.map(_._1).mkString(", ")}. Using first field."
78+
)
79+
}
80+
81+
val (prefixFieldName, prefixJson) = fields.head
82+
83+
PartPrefix.of(prefixFieldName) match {
84+
case PartPrefix.text => json.validate[Part.Text]
85+
case PartPrefix.inlineData => prefixJson.validate[Part.InlineData]
86+
case PartPrefix.functionCall => prefixJson.validate[Part.FunctionCall]
87+
case PartPrefix.functionResponse => prefixJson.validate[Part.FunctionResponse]
88+
case PartPrefix.fileData => prefixJson.validate[Part.FileData]
89+
case PartPrefix.executableCode => prefixJson.validate[Part.ExecutableCode]
90+
case PartPrefix.codeExecutionResult => prefixJson.validate[Part.CodeExecutionResult]
91+
case _ => JsError(s"Unknown part type: $prefixFieldName")
92+
}
7693
}
7794
}
7895
}
@@ -219,6 +236,10 @@ trait JsonFormats {
219236
Format(speechConfigReads, speechConfigWrites)
220237

221238
implicit val modalityFormat: Format[Modality] = enumFormat(Modality.values: _*)
239+
implicit val thinkingLevelFormat: Format[ThinkingLevel] = enumFormat(
240+
ThinkingLevel.values: _*
241+
)
242+
implicit val thinkingConfigFormat: Format[ThinkingConfig] = Json.format[ThinkingConfig]
222243
implicit val generationConfigFormat: Format[GenerationConfig] = Json.format[GenerationConfig]
223244

224245
// Grounding Attribution and Metadata

google-gemini-client/src/main/scala/io/cequence/openaiscala/gemini/domain/response/GenerateContentResponse.scala

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -225,6 +225,8 @@ case class CitationSource(
225225
* Total number of tokens across all the generated response candidates.
226226
* @param totalTokenCount
227227
* Total token count for the generation request (prompt + response candidates).
228+
* @param thoughtsTokenCount
229+
* Optional. Number of tokens used for thinking/reasoning when thinkingConfig is enabled.
228230
* @param promptTokensDetails
229231
* Output only. List of modalities that were processed in the request input.
230232
* @param cacheTokensDetails
@@ -237,6 +239,7 @@ case class UsageMetadata(
237239
cachedContentTokenCount: Option[Int] = None,
238240
candidatesTokenCount: Option[Int] = None,
239241
totalTokenCount: Int,
242+
thoughtsTokenCount: Option[Int] = None,
240243
promptTokensDetails: Seq[ModalityTokenCount] = Nil,
241244
cacheTokensDetails: Seq[ModalityTokenCount] = Nil,
242245
candidatesTokensDetails: Seq[ModalityTokenCount] = Nil

google-gemini-client/src/main/scala/io/cequence/openaiscala/gemini/domain/settings/GenerationConfig.scala

Lines changed: 22 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
package io.cequence.openaiscala.gemini.domain.settings
22

3-
import io.cequence.openaiscala.gemini.domain.{Modality, Schema}
3+
import io.cequence.openaiscala.gemini.domain.{Modality, Schema, ThinkingLevel}
44

55
case class GenerationConfig(
66
stopSequences: Option[Seq[String]] = None,
@@ -18,7 +18,8 @@ case class GenerationConfig(
1818
responseLogprobs: Option[Boolean] = None,
1919
logprobs: Option[Int] = None,
2020
enableEnhancedCivicAnswers: Option[Boolean] = None,
21-
speechConfig: Option[SpeechConfig] = None
21+
speechConfig: Option[SpeechConfig] = None,
22+
thinkingConfig: Option[ThinkingConfig] = None
2223
)
2324

2425
sealed trait SpeechConfig
@@ -30,3 +31,22 @@ object SpeechConfig {
3031
}
3132

3233
case class PrebuiltVoiceConfig(voiceName: String)
34+
35+
/**
36+
* Config for thinking features.
37+
*
38+
* @param includeThoughts
39+
* Indicates whether to include thoughts in the response. If true, thoughts are returned only
40+
* when available. Value between 128 and 32768.
41+
* @param thinkingBudget
42+
* The number of thought tokens that the model should generate.
43+
* @param thinkingLevel
44+
* Controls the maximum depth of the model's internal reasoning process before it produces a
45+
* response. If not specified, the default is HIGH. Recommended for Gemini 3 or later models.
46+
* Use with earlier models results in an error.
47+
*/
48+
case class ThinkingConfig(
49+
includeThoughts: Option[Boolean] = None,
50+
thinkingBudget: Option[Int] = None,
51+
thinkingLevel: Option[ThinkingLevel] = None
52+
)

google-gemini-client/src/main/scala/io/cequence/openaiscala/gemini/service/impl/OpenAIGeminiChatCompletionService.scala

Lines changed: 81 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -10,15 +10,18 @@ import io.cequence.openaiscala.domain.response.{
1010
ChatCompletionChunkResponse,
1111
ChatCompletionResponse,
1212
ChunkMessageSpec,
13+
CompletionTokenDetails,
1314
PromptTokensDetails,
1415
UsageInfo => OpenAIUsageInfo
1516
}
16-
import io.cequence.openaiscala.domain.settings.CreateChatCompletionSettings
17+
import io.cequence.openaiscala.domain.settings.{CreateChatCompletionSettings, ReasoningEffort}
1718
import io.cequence.openaiscala.domain.{
1819
AssistantMessage,
1920
BaseMessage,
2021
DeveloperMessage,
2122
ImageURLContent,
23+
JsonSchema,
24+
NonOpenAIModelId,
2225
SystemMessage,
2326
TextContent,
2427
UserMessage,
@@ -31,18 +34,19 @@ import io.cequence.openaiscala.gemini.domain.response.{GenerateContentResponse,
3134
import io.cequence.openaiscala.gemini.domain.settings.CreateChatCompletionSettingsOps._
3235
import io.cequence.openaiscala.gemini.domain.settings.{
3336
GenerateContentSettings,
34-
GenerationConfig
37+
GenerationConfig,
38+
ThinkingConfig
3539
}
3640
import io.cequence.openaiscala.gemini.domain.{CachedContent, ChatRole, Content, Part}
3741
import io.cequence.openaiscala.gemini.service.GeminiService
3842
import io.cequence.openaiscala.service.{
43+
HasOpenAIConfig,
3944
OpenAIChatCompletionService,
4045
OpenAIChatCompletionStreamedServiceExtra
4146
}
4247

4348
import scala.concurrent.{ExecutionContext, Future}
4449
import io.cequence.openaiscala.domain.settings.ChatCompletionResponseFormatType
45-
import io.cequence.openaiscala.domain.JsonSchema
4650
import io.cequence.openaiscala.gemini.domain.Schema
4751
import com.typesafe.scalalogging.Logger
4852
import io.cequence.openaiscala.gemini.domain.SchemaType
@@ -55,7 +59,8 @@ private[service] class OpenAIGeminiChatCompletionService(
5559
)(
5660
implicit executionContext: ExecutionContext
5761
) extends OpenAIChatCompletionService
58-
with OpenAIChatCompletionStreamedServiceExtra {
62+
with OpenAIChatCompletionStreamedServiceExtra
63+
with HasOpenAIConfig {
5964

6065
protected val logger: Logger = Logger(LoggerFactory.getLogger(this.getClass))
6166

@@ -266,7 +271,11 @@ private[service] class OpenAIGeminiChatCompletionService(
266271
responseLogprobs = settings.logprobs,
267272
logprobs = settings.top_logprobs,
268273
enableEnhancedCivicAnswers = None,
269-
speechConfig = None
274+
speechConfig = None,
275+
thinkingConfig = toThinkingConfig(
276+
settings.model,
277+
settings.reasoning_effort
278+
)
270279
)
271280
),
272281
cachedContent = None
@@ -292,7 +301,7 @@ private[service] class OpenAIGeminiChatCompletionService(
292301
logger.warn(s"OpenAI param '$fieldName' is not yet supported by Gemini. Skipping...")
293302
}
294303

295-
notSupported(_.reasoning_effort, "reasoning_effort")
304+
// reasoning_effort is now supported via thinkingConfig conversion
296305
notSupported(_.service_tier, "service_tier")
297306
notSupported(_.parallel_tool_calls, "parallel_tool_calls")
298307
notSupportedCollection(_.metadata, "metadata")
@@ -301,6 +310,64 @@ private[service] class OpenAIGeminiChatCompletionService(
301310
notSupported(_.store, "store")
302311
}
303312

313+
/**
314+
* Converts OpenAI's reasoning_effort to Gemini's ThinkingConfig using the configured
315+
* mapping.
316+
*
317+
* @param reasoningEffort
318+
* The reasoning effort level from OpenAI settings
319+
* @return
320+
* ThinkingConfig with appropriate thinkingBudget, or None if reasoning_effort is None
321+
*/
322+
private def toThinkingConfig(
323+
model: String,
324+
reasoningEffort: Option[ReasoningEffort]
325+
): Option[ThinkingConfig] = {
326+
import io.cequence.wsclient.ConfigImplicits._
327+
328+
reasoningEffort.flatMap { effort =>
329+
val effortKey = effort.toString.toLowerCase
330+
val configPath =
331+
s"$configPrefix.reasoning-effort-thinking-budget-mapping.$effortKey.gemini"
332+
333+
clientConfig
334+
.optionalInt(configPath)
335+
.flatMap { budget =>
336+
logger.debug(
337+
s"Converting reasoning effort '$effortKey' to thinking budget: $budget"
338+
)
339+
340+
// budget = 0 has different meanings:
341+
// - For 2.5 Pro: 0 is out of range (min is 128), so we cannot turn it off completely, therefore
342+
// setting it the minimal budget of 128
343+
val budgetFinal =
344+
if (
345+
budget == 0 && (
346+
model.startsWith(NonOpenAIModelId.gemini_3_pro) ||
347+
model.startsWith(NonOpenAIModelId.gemini_2_5_pro)
348+
)
349+
)
350+
128
351+
else
352+
budget
353+
354+
Some(
355+
ThinkingConfig(
356+
includeThoughts = Some(false), // typically don't include thoughts in response
357+
thinkingBudget = Some(budgetFinal),
358+
thinkingLevel = None // let budget control the thinking depth
359+
)
360+
)
361+
}
362+
.orElse {
363+
logger.warn(
364+
s"No thinking budget mapping found for reasoning effort '$effortKey' in config path: $configPath"
365+
)
366+
None
367+
}
368+
}
369+
}
370+
304371
private def toGeminiJSONSchema(
305372
jsonSchema: JsonSchema
306373
): Schema = jsonSchema match {
@@ -454,7 +521,14 @@ private[service] class OpenAIGeminiChatCompletionService(
454521
cached_tokens = usageMetadata.cachedContentTokenCount.getOrElse(0),
455522
audio_tokens = None
456523
)
457-
)
524+
),
525+
completion_tokens_details = usageMetadata.thoughtsTokenCount.map { thinkingTokens =>
526+
CompletionTokenDetails(
527+
reasoning_tokens = thinkingTokens,
528+
accepted_prediction_tokens = None,
529+
rejected_prediction_tokens = None
530+
)
531+
}
458532
)
459533

460534
/**

openai-client/src/main/resources/openai-scala-client.conf

Lines changed: 51 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -66,7 +66,9 @@ openai-scala-client {
6666
# Moonshot AI
6767
"moonshotai/kimi-k2-instruct",
6868
"moonshotai/kimi-k2-instruct-0905",
69-
# Gemini 2.5
69+
# Gemini 2.5 / 3
70+
"gemini-3-pro",
71+
"gemini-3-pro-preview",
7072
"gemini-2.5-pro",
7173
"gemini-2.5-pro-preview-06-05",
7274
"gemini-2.5-pro-preview-05-06",
@@ -98,6 +100,14 @@ openai-scala-client {
98100
"gemini-1.5-pro-latest",
99101
"gemini-exp-1206",
100102
# Grok
103+
"grok-4-1-fast-reasoning",
104+
"grok-4-1-fast-non-reasoning",
105+
"grok-4",
106+
"grok-4-latest",
107+
"grok-4-0709",
108+
"grok-4-fast-reasoning",
109+
"grok-4-fast-non-reasoning",
110+
"grok-code-fast-1",
101111
"grok-2",
102112
"grok-2-1212",
103113
"grok-2-latest",
@@ -112,12 +122,45 @@ openai-scala-client {
112122
"grok-3-mini-latest",
113123
"grok-3-mini-fast",
114124
"grok-3-mini-fast-beta",
115-
"grok-3-mini-fast-latest",
116-
"grok-4",
117-
"grok-4-latest",
118-
"grok-4-0709",
119-
"grok-4-fast-reasoning",
120-
"grok-4-fast-non-reasoning",
121-
"grok-code-fast-1"
125+
"grok-3-mini-fast-latest"
122126
]
127+
128+
reasoning-effort-thinking-budget-mapping {
129+
# No explicit extended thinking
130+
none {
131+
# Gemini:
132+
# - 2.5 Flash / Flash-Lite: 0 = disable thinking
133+
# - 2.5 Pro: 0 is out of range (128–32768), so your code should clamp to 128 or skip setting thinking_budget.
134+
gemini = 0
135+
136+
# Anthropic:
137+
# 0 = sentinel meaning "don't enable extended thinking at all"
138+
# (omit the `thinking` block instead of sending budget_tokens=0).
139+
anthropic = 0
140+
}
141+
142+
# Quick replies, simple Q&A, light code edits, low cost/latency
143+
minimal {
144+
gemini = 256 # above Pro's min of 128
145+
anthropic = 1024 # Anthropic min is 1024
146+
}
147+
148+
# Normal app usage, small multi-step reasoning, short RAG answers
149+
low {
150+
gemini = 1024
151+
anthropic = 2048
152+
}
153+
154+
# Heavier reasoning, multi-step analysis, non-trivial debugging, multi-page doc Q&A
155+
medium {
156+
gemini = 4096
157+
anthropic = 4096
158+
}
159+
160+
# Hard problems: complex codebases, intricate math, multi-doc reasoning
161+
high {
162+
gemini = 8192
163+
anthropic = 8192
164+
}
165+
}
123166
}

openai-client/src/main/scala/io/cequence/openaiscala/service/OpenAIServiceFactoryHelper.scala

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,7 @@ import io.cequence.wsclient.service.ws.Timeouts
99

1010
import scala.concurrent.ExecutionContext
1111

12-
trait OpenAIServiceFactoryHelper[F] extends OpenAIServiceConsts {
12+
trait OpenAIServiceFactoryHelper[F] extends OpenAIServiceConsts with HasOpenAIConfig {
1313

1414
def apply(
1515
apiKey: String,
@@ -33,7 +33,7 @@ trait OpenAIServiceFactoryHelper[F] extends OpenAIServiceConsts {
3333
implicit ec: ExecutionContext,
3434
materializer: Materializer
3535
): F =
36-
apply(loadDefaultConfig)
36+
apply(clientConfig)
3737

3838
def apply(
3939
config: Config

0 commit comments

Comments
 (0)