tidyverse · hadley · Nov 5, 2025 · Oct 29, 2025 · Oct 29, 2025 · Nov 4, 2025
diff --git a/NEWS.md b/NEWS.md
@@ -1,5 +1,6 @@
 # ellmer (development version)
 
+* `params()` gains new `reasoning_effort` and `reasoning_tokens` so you can control the amount of effort a model spends on thinking. Initial support is provided for `chat_claude()`, `chat_google_gemini()`, and `chat_openai()` (#720).
 * `chat_anthropic()` gains new `cache` parameter to control caching. By default it is set to "5m". This should (on average) reduce the cost of your chats.(#584)
 * `chat_openai_responses()` gains a `service_tier` argument (#712).
 * `Chat$get_tokens()` now also returns the cost, and returns one row for each assistant turn, better representing the underlying data received from LLM APIs. Similarly, the `print()` method now reports costs on each assistant turn, rather than trying to parse out individual costs.

diff --git a/R/params.R b/R/params.R
@@ -18,6 +18,10 @@
 #' @param max_tokens Maximum number of tokens to generate.
 #' @param log_probs Include the log probabilities in the output?
 #' @param stop_sequences A character vector of tokens to stop generation on.
+#' @param reasoning_effort,reasoning_tokens How much effort to spend thinking?
+#'   `ressoning_effort` is a string, like "low", "medium", "high".
+#'   `reasoning_tokens` is an integer, giving a maximum token budget.
+#'   Each provider only takes one of these two parameters.
 #' @param ... Additional named parameters to send to the provider.
 #' @export
 params <- function(
@@ -30,6 +34,8 @@ params <- function(
   max_tokens = NULL,
   log_probs = NULL,
   stop_sequences = NULL,
+  reasoning_effort = NULL,
+  reasoning_tokens = NULL,
   ...
 ) {
   check_number_decimal(temperature, allow_null = TRUE, min = 0)
@@ -41,6 +47,8 @@ params <- function(
   check_number_whole(max_tokens, allow_null = TRUE, min = 1)
   check_bool(log_probs, allow_null = TRUE)
   check_character(stop_sequences, allow_null = TRUE)
+  check_string(reasoning_effort, allow_null = TRUE)
+  check_number_whole(reasoning_tokens, min = 0, allow_null = TRUE)
 
   compact(list2(
     temperature = temperature,
@@ -52,6 +60,8 @@ params <- function(
     max_tokens = max_tokens,
     log_probs = log_probs,
     stop_sequences = stop_sequences,
+    reasoning_effort = reasoning_effort,
+    reasoning_tokens = reasoning_tokens,
     extra_args = list2(...)
   ))
 }

diff --git a/R/provider-anthropic.R b/R/provider-anthropic.R
@@ -214,6 +214,15 @@ method(chat_body, ProviderAnthropic) <- function(
   tools <- as_json(provider, unname(tools))
 
   params <- chat_params(provider, provider@params)
+  if (has_name(params, "budget_tokens")) {
+    thinking <- list(
+      type = "enabled",
+      budget_tokens = params$budget_tokens
+    )
+    params$budget_tokens <- NULL
+  } else {
+    thinking <- NULL
+  }
 
   compact(list2(
     model = provider@model,
@@ -222,6 +231,7 @@ method(chat_body, ProviderAnthropic) <- function(
     stream = stream,
     tools = tools,
     tool_choice = tool_choice,
+    thinking = thinking,
     !!!params
   ))
 }
@@ -234,7 +244,8 @@ method(chat_params, ProviderAnthropic) <- function(provider, params) {
       top_p = "top_p",
       top_k = "top_k",
       max_tokens = "max_tokens",
-      stop_sequences = "stop_sequences"
+      stop_sequences = "stop_sequences",
+      budget_tokens = "reasoning_tokens"
     )
   )
 
@@ -262,7 +273,7 @@ method(stream_parse, ProviderAnthropic) <- function(provider, event) {
 }
 method(stream_text, ProviderAnthropic) <- function(provider, event) {
   if (event$type == "content_block_delta") {
-    event$delta$text
+    event$delta$text %||% event$delta$thinking
   }
 }
 method(stream_merge_chunks, ProviderAnthropic) <- function(

diff --git a/R/provider-google.R b/R/provider-google.R
@@ -210,6 +210,14 @@ method(chat_body, ProviderGoogleGemini) <- function(
     generation_config$response_schema <- as_json(provider, type)
   }
 
+  if (has_name(generation_config, "thinkingBudget")) {
+    generation_config$thinkingConfig <- list(
+      thinkingBudget = generation_config$thinkingBudget,
+      includeThoughts = TRUE
+    )
+    generation_config$thinkingBudget <- NULL
+  }
+
   contents <- as_json(provider, turns)
 
   # https://ai.google.dev/api/caching#Tool
@@ -240,7 +248,8 @@ method(chat_params, ProviderGoogleGemini) <- function(provider, params) {
       seed = "seed",
       maxOutputTokens = "max_tokens",
       responseLogprobs = "log_probs",
-      stopSequences = "stop_sequences"
+      stopSequences = "stop_sequences",
+      thinkingBudget = "reasoning_tokens"
     )
   )
 }
@@ -274,8 +283,8 @@ method(value_tokens, ProviderGoogleGemini) <- function(provider, json) {
   cached <- usage$cachedContentTokenCount %||% 0
 
   tokens(
-    input = (usage$promptTokenCount %||% 0) - cached,
-    output = usage$candidatesTokenCount,
+    input = (usage$promptTokenCount %||% 0) + -cached,
+    output = usage$candidatesTokenCount + (usage$thoughtsTokenCount %||% 0),
     cached_input = cached
   )
 }

diff --git a/R/provider-openai-responses.R b/R/provider-openai-responses.R
@@ -126,6 +126,16 @@ method(chat_body, ProviderOpenAIResponses) <- function(
   # https://platform.openai.com/docs/api-reference/responses/create#responses-create-include
   params <- chat_params(provider, provider@params)
 
+  if (has_name(params, "reasoning_effort")) {
+    reasoning <- list(
+      effort = params$reasoning_effort,
+      summary = "auto"
+    )
+    params$reasoning_effort <- NULL
+  } else {
+    reasoning <- NULL
+  }
+
   include <- c(
     if (isTRUE(params$log_probs)) "message.output_text.logprobs",
     if (is_openai_reasoning(provider@model)) "reasoning.encrypted_content"
@@ -140,6 +150,7 @@ method(chat_body, ProviderOpenAIResponses) <- function(
     stream = stream,
     tools = tools,
     text = text,
+    reasoning = reasoning,
     store = FALSE,
     service_tier = provider@service_tier
   ))
@@ -155,17 +166,24 @@ method(chat_params, ProviderOpenAIResponses) <- function(provider, params) {
       frequency_penalty = "frequency_penalty",
       max_tokens = "max_output_tokens",
       log_probs = "log_probs",
-      top_logprobs = "top_k"
+      top_logprobs = "top_k",
+      reasoning_effort = "reasoning_effort"
     )
   )
 }
 
 # OpenAI -> ellmer --------------------------------------------------------------
 
 method(stream_text, ProviderOpenAIResponses) <- function(provider, event) {
-  # https://platform.openai.com/docs/api-reference/responses-streaming/response/output_text/delta
   if (event$type == "response.output_text.delta") {
+    # https://platform.openai.com/docs/api-reference/responses-streaming/response/output_text/delta
+    event$delta
+  } else if (event$type == "response.reasoning_summary_text.delta") {
+    # https://platform.openai.com/docs/api-reference/responses-streaming/response/reasoning_summary_text/delta
     event$delta
+  } else if (event$type == "response.reasoning_summary_text.done") {
+    # https://platform.openai.com/docs/api-reference/responses-streaming/response/reasoning_summary_text/done
+    "\n\n"
   }
 }
 method(stream_merge_chunks, ProviderOpenAIResponses) <- function(
@@ -214,17 +232,7 @@ method(value_turn, ProviderOpenAIResponses) <- function(
       arguments <- jsonlite::parse_json(output$arguments)
       ContentToolRequest(output$id, output$name, arguments)
     } else if (output$type == "reasoning") {
-      # {
-      #   id: str,
-      #   summary: str,
-      #   type: "reasoning",
-      #   content: [
-      #     { text: str, type: "reasoning_text" }
-      #   ],
-      #   encrypted_content: str,
-      #   status: "in_progress" | "completed" | "incomplete"
-      # }
-      thinking <- paste0(map_chr(output$content, "[[", "text"), collapse = "")
+      thinking <- paste0(map_chr(output$summary, "[[", "text"), collapse = "")
       ContentThinking(thinking = thinking, extra = output)
     } else if (output$type == "image_generation_call") {
       mime_type <- switch(

diff --git a/man/params.Rd b/man/params.Rd