Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Handle extended thinking with Anthropic provider, process streaming thinking blocks and show them in the output tab #4426

Open
wants to merge 9 commits into
base: main
Choose a base branch
from
55 changes: 32 additions & 23 deletions core/index.d.ts
Original file line number Diff line number Diff line change
Expand Up @@ -43,13 +43,13 @@ export interface IndexingProgressUpdate {
desc: string;
shouldClearIndexes?: boolean;
status:
| "loading"
| "indexing"
| "done"
| "failed"
| "paused"
| "disabled"
| "cancelled";
| "loading"
| "indexing"
| "done"
| "failed"
| "paused"
| "disabled"
| "cancelled";
debugInfo?: string;
}

Expand Down Expand Up @@ -312,7 +312,7 @@ export interface CompletionOptions extends BaseCompletionOptions {
model: string;
}

export type ChatMessageRole = "user" | "assistant" | "system" | "tool";
export type ChatMessageRole = "user" | "assistant" | "thinking" | "system" | "tool";

export type TextMessagePart = {
type: "text";
Expand Down Expand Up @@ -357,6 +357,14 @@ export interface UserChatMessage {
content: MessageContent;
}

export interface ThinkingChatMessage {
role: "thinking";
content: MessageContent;
signature?: string;
redactedThinking?: string;
toolCalls?: ToolCallDelta[];
}

export interface AssistantChatMessage {
role: "assistant";
content: MessageContent;
Expand All @@ -371,6 +379,7 @@ export interface SystemChatMessage {
export type ChatMessage =
| UserChatMessage
| AssistantChatMessage
| ThinkingChatMessage
| SystemChatMessage
| ToolResultChatMessage;

Expand Down Expand Up @@ -679,10 +688,10 @@ export interface IDE {
getCurrentFile(): Promise<
| undefined
| {
isUntitled: boolean;
path: string;
contents: string;
}
isUntitled: boolean;
path: string;
contents: string;
}
>;

getLastFileSaveTimestamp?(): number;
Expand Down Expand Up @@ -866,11 +875,11 @@ export interface CustomCommand {
export interface Prediction {
type: "content";
content:
| string
| {
type: "text";
text: string;
}[];
| string
| {
type: "text";
text: string;
}[];
}

export interface ToolExtras {
Expand Down Expand Up @@ -1208,9 +1217,9 @@ export interface Config {
embeddingsProvider?: EmbeddingsProviderDescription | ILLM;
/** The model that Continue will use for tab autocompletions. */
tabAutocompleteModel?:
| CustomLLM
| ModelDescription
| (CustomLLM | ModelDescription)[];
| CustomLLM
| ModelDescription
| (CustomLLM | ModelDescription)[];
/** Options for tab autocomplete */
tabAutocompleteOptions?: Partial<TabAutocompleteOptions>;
/** UI styles customization */
Expand Down Expand Up @@ -1302,9 +1311,9 @@ export type PackageDetailsSuccess = PackageDetails & {
export type PackageDocsResult = {
packageInfo: ParsedPackageInfo;
} & (
| { error: string; details?: never }
| { details: PackageDetailsSuccess; error?: never }
);
| { error: string; details?: never }
| { details: PackageDetailsSuccess; error?: never }
);

export interface TerminalOptions {
reuseTerminal?: boolean;
Expand Down
12 changes: 7 additions & 5 deletions core/llm/countTokens.ts
Original file line number Diff line number Diff line change
Expand Up @@ -27,9 +27,9 @@ class LlamaEncoding implements Encoding {
}

class NonWorkerAsyncEncoder implements AsyncEncoder {
constructor(private readonly encoding: Encoding) {}
constructor(private readonly encoding: Encoding) { }

async close(): Promise<void> {}
async close(): Promise<void> { }

async encode(text: string): Promise<number[]> {
return this.encoding.encode(text);
Expand Down Expand Up @@ -366,6 +366,7 @@ function chatMessageIsEmpty(message: ChatMessage): boolean {
message.content.trim() === "" &&
!message.toolCalls
);
case "thinking":
case "tool":
return false;
}
Expand All @@ -383,8 +384,8 @@ function compileChatMessages(
): ChatMessage[] {
let msgsCopy = msgs
? msgs
.map((msg) => ({ ...msg }))
.filter((msg) => !chatMessageIsEmpty(msg) && msg.role !== "system")
.map((msg) => ({ ...msg }))
.filter((msg) => !chatMessageIsEmpty(msg) && msg.role !== "system")
: [];

msgsCopy = addSpaceToAnyEmptyMessages(msgsCopy);
Expand Down Expand Up @@ -469,5 +470,6 @@ export {
pruneLinesFromTop,
pruneRawPromptFromTop,
pruneStringFromBottom,
pruneStringFromTop,
pruneStringFromTop
};

39 changes: 30 additions & 9 deletions core/llm/index.ts
Original file line number Diff line number Diff line change
Expand Up @@ -193,11 +193,11 @@ export abstract class BaseLLM implements ILLM {
options.completionOptions?.maxTokens ??
(llmInfo?.maxCompletionTokens
? Math.min(
llmInfo.maxCompletionTokens,
// Even if the model has a large maxTokens, we don't want to use that every time,
// because it takes away from the context length
this.contextLength / 4,
)
llmInfo.maxCompletionTokens,
// Even if the model has a large maxTokens, we don't want to use that every time,
// because it takes away from the context length
this.contextLength / 4,
)
: DEFAULT_MAX_TOKENS),
};
this.requestOptions = options.requestOptions;
Expand Down Expand Up @@ -780,6 +780,7 @@ export abstract class BaseLLM implements ILLM {
}
}

let thinking = "";
let completion = "";
let citations: null | string[] = null

Expand Down Expand Up @@ -834,8 +835,16 @@ export abstract class BaseLLM implements ILLM {
signal,
completionOptions,
)) {
completion += chunk.content;
yield chunk;

if (chunk.role === "assistant") {
completion += chunk.content;
yield chunk;
}

if (chunk.role === "thinking") {
thinking += chunk.content;
yield chunk;
}
}
}
}
Expand All @@ -847,6 +856,18 @@ export abstract class BaseLLM implements ILLM {
this._logTokensGenerated(completionOptions.model, prompt, completion);

if (logEnabled && this.writeLog) {
if (thinking) {
await this.writeLog(`Thinking:\n${thinking}\n\n`);
}
/*
TODO: According to: https://docs.anthropic.com/en/docs/build-with-claude/extended-thinking
During tool use, you must pass thinking and redacted_thinking blocks back to the API,
and you must include the complete unmodified block back to the API. This is critical
for maintaining the model's reasoning flow and conversation integrity.

On the other hand, adding thinking and redacted_thinking blocks are ignored on subsequent
requests when not using tools, so it's the simplest option to always add to history.
*/
await this.writeLog(`Completion:\n${completion}\n\n`);

if (citations) {
Expand Down Expand Up @@ -920,15 +941,15 @@ export abstract class BaseLLM implements ILLM {
);
}

protected async *_streamComplete(
protected async * _streamComplete(
prompt: string,
signal: AbortSignal,
options: CompletionOptions,
): AsyncGenerator<string> {
throw new Error("Not implemented");
}

protected async *_streamChat(
protected async * _streamChat(
messages: ChatMessage[],
signal: AbortSignal,
options: CompletionOptions,
Expand Down
46 changes: 37 additions & 9 deletions core/llm/llms/Anthropic.ts
Original file line number Diff line number Diff line change
Expand Up @@ -32,9 +32,9 @@ class Anthropic extends BaseLLM {
})),
tool_choice: options.toolChoice
? {
type: "tool",
name: options.toolChoice.function.name,
}
type: "tool",
name: options.toolChoice.function.name,
}
: undefined,
};

Expand Down Expand Up @@ -63,6 +63,23 @@ class Anthropic extends BaseLLM {
input: JSON.parse(toolCall.function?.arguments || "{}"),
})),
};
} else if (message.role === "thinking" && !message.redactedThinking) {
return {
role: "assistant",
content: [{
type: "thinking",
thinking: message.content,
signature: message.signature
}]
};
} else if (message.role === "thinking" && message.redactedThinking) {
return {
role: "assistant",
content: [{
type: "redacted_thinking",
data: message.redactedThinking
}]
};
}

if (typeof message.content === "string") {
Expand Down Expand Up @@ -174,12 +191,12 @@ class Anthropic extends BaseLLM {
messages: msgs,
system: shouldCacheSystemMessage
? [
{
type: "text",
text: this.systemMessage,
cache_control: { type: "ephemeral" },
},
]
{
type: "text",
text: this.systemMessage,
cache_control: { type: "ephemeral" },
},
]
: systemMessage,
}),
signal,
Expand Down Expand Up @@ -216,13 +233,24 @@ class Anthropic extends BaseLLM {
lastToolUseId = value.content_block.id;
lastToolUseName = value.content_block.name;
}
// handle redacted thinking
if (value.content_block.type === "redacted_thinking") {
console.log("redacted thinking", value.content_block.data);
yield { role: "thinking", content: "", redactedThinking: value.content_block.data };
}
break;
case "content_block_delta":
// https://docs.anthropic.com/en/api/messages-streaming#delta-types
switch (value.delta.type) {
case "text_delta":
yield { role: "assistant", content: value.delta.text };
break;
case "thinking_delta":
yield { role: "thinking", content: value.delta.thinking };
break;
case "signature_delta":
yield { role: "thinking", content: "", signature: value.delta.signature };
break;
case "input_json_delta":
if (!lastToolUseId || !lastToolUseName) {
throw new Error("No tool use found");
Expand Down
Loading
Loading