@@ -41,29 +41,32 @@ func BuildOllamaClient(ctx context.Context) (*OllamaClient, error) {
41
41
}
42
42
klog .Infof ("using ollama with base url %v" , baseURL .String ())
43
43
44
+ model := os .Getenv ("OLLAMA_MODEL" )
45
+ if model == "" {
46
+ klog .Fatalf ("OLLAMA_MODEL not set" )
47
+ }
48
+
44
49
return & OllamaClient {
45
50
baseURL : baseURL ,
46
51
httpClient : http .DefaultClient ,
52
+ model : model ,
47
53
}, nil
48
54
}
49
55
50
56
type OllamaClient struct {
51
57
baseURL * url.URL
52
58
httpClient * http.Client
59
+ model string
53
60
}
54
61
55
62
func (c * OllamaClient ) Close () error {
56
63
return nil
57
64
}
58
65
59
66
func (c * OllamaClient ) StartChat (systemPrompt string ) Chat {
60
- session := & chatRequest {}
61
-
62
- model := os .Getenv ("OLLAMA_MODEL" )
63
- if model == "" {
64
- klog .Fatalf ("OLLAMA_MODEL not set" )
67
+ session := & chatRequest {
68
+ Model : c .model ,
65
69
}
66
- session .Model = model
67
70
68
71
// HACK: Setting the system prompt seems to really mess up some ollama models
69
72
// session.Messages = append(session.Messages, chatMessage{
@@ -114,6 +117,51 @@ type chatResponse struct {
114
117
EvalDuration int64 `json:"eval_duration"`
115
118
}
116
119
120
+ type completionRequest struct {
121
+ // model: (required) the model name
122
+ Model string `json:"model,omitempty"`
123
+ // prompt: the prompt to generate a response for
124
+ Prompt string `json:"prompt,omitempty"`
125
+
126
+ // suffix: the text after the model response
127
+
128
+ // images: (optional) a list of base64-encoded images (for multimodal models such as llava)
129
+
130
+ // format: the format to return a response in. Format can be json or a JSON schema
131
+
132
+ // options: additional model parameters listed in the documentation for the Modelfile such as temperature
133
+ Options map [string ]any `json:"options,omitempty"`
134
+
135
+ // system: system message to (overrides what is defined in the Modelfile)
136
+
137
+ // template: the prompt template to use (overrides what is defined in the Modelfile)
138
+
139
+ // stream: if false the response will be returned as a single response object, rather than a stream of objects
140
+ Stream * bool `json:"stream,omitempty"`
141
+
142
+ // raw: if true no formatting will be applied to the prompt. You may choose to use the raw parameter if you are specifying a full templated prompt in your request to the API
143
+
144
+ // keep_alive: controls how long the model will stay loaded into memory following the request (default: 5m)
145
+
146
+ // context (deprecated): the context parameter returned from a previous request to /generate, this can be used to keep a short conversational memory
147
+ }
148
+
149
+ type completionResponse struct {
150
+ Model string `json:"model"`
151
+ CreatedAt string `json:"created_at"`
152
+ Response string `json:"response"`
153
+ Done bool `json:"done"`
154
+
155
+ // "context": [1, 2, 3],
156
+
157
+ TotalDuration int64 `json:"total_duration"`
158
+ LoadDuration int64 `json:"load_duration"`
159
+ PromptEvalCount int64 `json:"prompt_eval_count"`
160
+ PromptEvalDuration int64 `json:"prompt_eval_duration"`
161
+ EvalCount int64 `json:"eval_count"`
162
+ EvalDuration int64 `json:"eval_duration"`
163
+ }
164
+
117
165
type chatMessage struct {
118
166
// role: the role of the message, either system, user, assistant, or tool
119
167
Role string `json:"role,omitempty"`
@@ -196,7 +244,9 @@ func (c *OllamaChat) SendMessage(ctx context.Context, parts ...string) (Response
196
244
Role : "user" ,
197
245
Content : part ,
198
246
})
247
+ klog .Infof ("sending user:\n %v" , part )
199
248
}
249
+
200
250
ollamaResponse , err := c .client .doChat (ctx , c .session )
201
251
if err != nil {
202
252
return nil , err
@@ -213,6 +263,68 @@ func (c *OllamaChat) SendMessage(ctx context.Context, parts ...string) (Response
213
263
return response , nil
214
264
}
215
265
266
+ func (c * OllamaClient ) GenerateCompletion (ctx context.Context , request * CompletionRequest ) (CompletionResponse , error ) {
267
+ ollamaRequest := & completionRequest {
268
+ Model : c .model ,
269
+ Prompt : request .Prompt ,
270
+ Options : map [string ]any {
271
+ "num_ctx" : 128 * 1024 ,
272
+ },
273
+ }
274
+
275
+ ollamaResponse , err := c .doCompletion (ctx , ollamaRequest )
276
+ if err != nil {
277
+ return nil , err
278
+ }
279
+
280
+ if ollamaResponse .Response == "" {
281
+ return nil , fmt .Errorf ("no response returned from ollama" )
282
+ }
283
+
284
+ response := & OllamaCompletionResponse {ollamaResponse : ollamaResponse }
285
+ return response , nil
286
+ }
287
+
288
+ func (c * OllamaClient ) doCompletion (ctx context.Context , req * completionRequest ) (* completionResponse , error ) {
289
+ stream := false
290
+ req .Stream = & stream
291
+
292
+ body , err := json .Marshal (req )
293
+ if err != nil {
294
+ return nil , fmt .Errorf ("building json body: %w" , err )
295
+ }
296
+ u := c .baseURL .JoinPath ("api" , "generate" )
297
+ klog .V (2 ).Infof ("sending POST request to %v: %v" , u .String (), string (body ))
298
+ httpRequest , err := http .NewRequestWithContext (ctx , "POST" , u .String (), bytes .NewReader (body ))
299
+ if err != nil {
300
+ return nil , fmt .Errorf ("building http request: %w" , err )
301
+ }
302
+ httpRequest .Header .Set ("Content-Type" , "application/json" )
303
+
304
+ httpResponse , err := c .httpClient .Do (httpRequest )
305
+ if err != nil {
306
+ return nil , fmt .Errorf ("performing http request: %w" , err )
307
+ }
308
+ defer httpResponse .Body .Close ()
309
+
310
+ b , err := io .ReadAll (httpResponse .Body )
311
+ if err != nil {
312
+ return nil , fmt .Errorf ("reading response body: %w" , err )
313
+ }
314
+
315
+ klog .Infof ("response is: %v" , string (b ))
316
+
317
+ if httpResponse .StatusCode != 200 {
318
+ return nil , fmt .Errorf ("unexpected http status: %q with response %q" , httpResponse .Status , string (b ))
319
+ }
320
+
321
+ completionResponse := & completionResponse {}
322
+ if err := json .Unmarshal (b , completionResponse ); err != nil {
323
+ return nil , fmt .Errorf ("unmarshalling json response: %w" , err )
324
+ }
325
+ return completionResponse , nil
326
+ }
327
+
216
328
func (c * OllamaClient ) doChat (ctx context.Context , req * chatRequest ) (* chatResponse , error ) {
217
329
stream := false
218
330
req .Stream = & stream
@@ -222,7 +334,7 @@ func (c *OllamaClient) doChat(ctx context.Context, req *chatRequest) (*chatRespo
222
334
return nil , fmt .Errorf ("building json body: %w" , err )
223
335
}
224
336
u := c .baseURL .JoinPath ("api" , "chat" )
225
- klog .Infof ("sending POST request to %v: %v" , u .String (), string (body ))
337
+ klog .V ( 2 ). Infof ("sending POST request to %v: %v" , u .String (), string (body ))
226
338
httpRequest , err := http .NewRequestWithContext (ctx , "POST" , u .String (), bytes .NewReader (body ))
227
339
if err != nil {
228
340
return nil , fmt .Errorf ("building http request: %w" , err )
@@ -323,3 +435,17 @@ func (p *OllamaPart) AsFunctionCalls() ([]FunctionCall, bool) {
323
435
}
324
436
return functionCalls , true
325
437
}
438
+
439
+ type OllamaCompletionResponse struct {
440
+ ollamaResponse * completionResponse
441
+ }
442
+
443
+ var _ CompletionResponse = & OllamaCompletionResponse {}
444
+
445
+ func (r * OllamaCompletionResponse ) Response () string {
446
+ return r .ollamaResponse .Response
447
+ }
448
+
449
+ func (r * OllamaCompletionResponse ) UsageMetadata () any {
450
+ return r .ollamaResponse
451
+ }
0 commit comments