@@ -18,6 +18,7 @@ package googlegenai
18
18
19
19
import (
20
20
"context"
21
+ "encoding/base64"
21
22
"encoding/json"
22
23
"fmt"
23
24
"net/http"
63
64
64
65
// Attribution header
65
66
xGoogApiClientHeader = http .CanonicalHeaderKey ("x-goog-api-client" )
66
- GenkitClientHeader = http.Header {
67
+ genkitClientHeader = http.Header {
67
68
xGoogApiClientHeader : {fmt .Sprintf ("genkit-go/%s" , internal .Version )},
68
69
}
69
70
)
@@ -174,6 +175,15 @@ type SafetySetting struct {
174
175
Threshold HarmBlockThreshold `json:"threshold,omitempty"`
175
176
}
176
177
178
+ type Modality string
179
+
180
+ const (
181
+ // Indicates the model should return images
182
+ ImageMode Modality = "IMAGE"
183
+ // Indicates the model should return text
184
+ TextMode Modality = "TEXT"
185
+ )
186
+
177
187
// GeminiConfig mirrors GenerateContentConfig without direct genai dependency
178
188
type GeminiConfig struct {
179
189
// MaxOutputTokens is the maximum number of tokens to generate.
@@ -192,6 +202,8 @@ type GeminiConfig struct {
192
202
SafetySettings []* SafetySetting `json:"safetySettings,omitempty"`
193
203
// CodeExecution is whether to allow executing of code generated by the model.
194
204
CodeExecution bool `json:"codeExecution,omitempty"`
205
+ // Response modalities for returned model messages
206
+ ResponseModalities []Modality `json:"responseModalities,omitempty"`
195
207
}
196
208
197
209
// configFromRequest converts any supported config type to [GeminiConfig].
@@ -333,6 +345,23 @@ func generate(
333
345
return nil , err
334
346
}
335
347
348
+ if len (config .ResponseModalities ) > 0 {
349
+ err := validateResponseModalities (model , config .ResponseModalities )
350
+ if err != nil {
351
+ return nil , err
352
+ }
353
+ for _ , m := range config .ResponseModalities {
354
+ gcc .ResponseModalities = append (gcc .ResponseModalities , string (m ))
355
+ }
356
+
357
+ // prevent an error in the client where:
358
+ // if TEXT modality is not present and the model supports it, the client
359
+ // will return an error
360
+ if ! slices .Contains (gcc .ResponseModalities , string (genai .ModalityText )) {
361
+ gcc .ResponseModalities = append (gcc .ResponseModalities , string (genai .ModalityText ))
362
+ }
363
+ }
364
+
336
365
var contents []* genai.Content
337
366
for _ , m := range input .Messages {
338
367
// system parts are handled separately
@@ -523,6 +552,23 @@ func convertRequest(input *ai.ModelRequest, cache *genai.CachedContent) (*genai.
523
552
return & gcc , nil
524
553
}
525
554
555
+ // validateResponseModalities checks if response modality is valid for the requested model
556
+ func validateResponseModalities (model string , modalities []Modality ) error {
557
+ for _ , m := range modalities {
558
+ switch m {
559
+ case ImageMode :
560
+ if ! slices .Contains (imageGenModels , model ) {
561
+ return fmt .Errorf ("IMAGE response modality is not supported for model %q" , model )
562
+ }
563
+ case TextMode :
564
+ continue
565
+ default :
566
+ return fmt .Errorf ("unknown response modality provided: %q" , m )
567
+ }
568
+ }
569
+ return nil
570
+ }
571
+
526
572
// toGeminiTools translates a slice of [ai.ToolDefinition] to a slice of [genai.Tool].
527
573
func toGeminiTools (inTools []* ai.ToolDefinition ) ([]* genai.Tool , error ) {
528
574
var outTools []* genai.Tool
@@ -724,7 +770,11 @@ func translateCandidate(cand *genai.Candidate) *ai.ModelResponse {
724
770
}
725
771
if part .InlineData != nil {
726
772
partFound ++
727
- p = ai .NewMediaPart (part .InlineData .MIMEType , string (part .InlineData .Data ))
773
+ p = ai .NewMediaPart (part .InlineData .MIMEType , base64 .StdEncoding .EncodeToString (part .InlineData .Data ))
774
+ }
775
+ if part .FileData != nil {
776
+ partFound ++
777
+ p = ai .NewMediaPart (part .FileData .MIMEType , part .FileData .FileURI )
728
778
}
729
779
if part .FunctionCall != nil {
730
780
partFound ++
0 commit comments