diff --git a/agents/src/llm/chat_context.ts b/agents/src/llm/chat_context.ts index 138959490..6f7ae088b 100644 --- a/agents/src/llm/chat_context.ts +++ b/agents/src/llm/chat_context.ts @@ -499,6 +499,113 @@ export class ChatContext { return await toChatCtx(format, this, injectDummyUserMessage); } + /** + * Compare this ChatContext with another for logical equivalence. + * Unlike strict equality, this method: + * - Ignores timestamps (createdAt fields) + * - Ignores other volatile metadata + * - Focuses on content: compares IDs, types, and payload + * + * This is useful for detecting if the conversation content has changed, + * for example when validating preemptive generation results. + * + * @param other - The ChatContext to compare with + * @returns true if both contexts contain the same sequence of items with matching essential fields + */ + isEquivalent(other: ChatContext): boolean { + // Same object reference + if (this === other) { + return true; + } + + // Different lengths + if (this._items.length !== other._items.length) { + return false; + } + + // Compare each item pair + for (let i = 0; i < this._items.length; i++) { + const a = this._items[i]!; + const b = other._items[i]!; + + // IDs and types must match + if (a.id !== b.id || a.type !== b.type) { + return false; + } + + // Type-specific field comparison + if (a.type === 'message' && b.type === 'message') { + // Compare role, content, and interrupted status (not timestamp) + if (a.role !== b.role || a.interrupted !== b.interrupted) { + return false; + } + + // Compare content arrays + if (a.content.length !== b.content.length) { + return false; + } + + for (let j = 0; j < a.content.length; j++) { + const ca = a.content[j]!; + const cb = b.content[j]!; + + // Both are strings + if (typeof ca === 'string' && typeof cb === 'string') { + if (ca !== cb) { + return false; + } + } + // Both are objects + else if (typeof ca === 'object' && typeof cb === 'object') { + if (ca.type !== cb.type) { + return false; + } + + if (ca.type === 'image_content' && cb.type === 'image_content') { + // Compare essential image fields (not cache) + if ( + ca.id !== cb.id || + ca.image !== cb.image || + ca.inferenceDetail !== cb.inferenceDetail || + ca.inferenceWidth !== cb.inferenceWidth || + ca.inferenceHeight !== cb.inferenceHeight || + ca.mimeType !== cb.mimeType + ) { + return false; + } + } else if (ca.type === 'audio_content' && cb.type === 'audio_content') { + // Compare audio transcript (frames comparison would be too expensive) + if (ca.transcript !== cb.transcript) { + return false; + } + } + } + // Mismatched types + else { + return false; + } + } + } else if (a.type === 'function_call' && b.type === 'function_call') { + // Compare name, callId, and args (not timestamp) + if (a.name !== b.name || a.callId !== b.callId || a.args !== b.args) { + return false; + } + } else if (a.type === 'function_call_output' && b.type === 'function_call_output') { + // Compare name, callId, output, and isError (not timestamp) + if ( + a.name !== b.name || + a.callId !== b.callId || + a.output !== b.output || + a.isError !== b.isError + ) { + return false; + } + } + } + + return true; + } + /** * Internal helper used by `truncate` & `addMessage` to find the correct * insertion index for a timestamp so the list remains sorted. diff --git a/plugins/deepgram/src/stt.ts b/plugins/deepgram/src/stt.ts index bc4b1d4a4..612bed173 100644 --- a/plugins/deepgram/src/stt.ts +++ b/plugins/deepgram/src/stt.ts @@ -35,6 +35,13 @@ export interface STTOptions { dictation: boolean; diarize: boolean; numerals: boolean; + /** + * Enable eager end-of-turn detection for preemptive generation. + * When set to a value between 0.3-0.9, Deepgram will emit EagerEndOfTurn events + * when it detects a pause in speech, allowing the agent to start generating responses + * preemptively. + */ + eagerEotThreshold?: number; } const defaultSTTOptions: STTOptions = { @@ -161,6 +168,7 @@ export class SpeechStream extends stt.SpeechStream { keyterm: this.#opts.keyterm, profanity_filter: this.#opts.profanityFilter, language: this.#opts.language, + eager_eot_threshold: this.#opts.eagerEotThreshold, }; Object.entries(params).forEach(([k, v]) => { if (v !== undefined) { @@ -326,6 +334,29 @@ export class SpeechStream extends stt.SpeechStream { break; } + case 'EagerEndOfTurn': { + // Deepgram has detected a pause in speech, but the user is technically + // still speaking. Send a preflight event to enable preemptive generation. + const metadata = json['metadata']; + const requestId = metadata['request_id']; + this.#requestId = requestId; + + const alternatives = liveTranscriptionToSpeechData(this.#opts.language!, json); + + if (alternatives[0] && alternatives[0].text) { + this.#logger.debug( + { transcript: alternatives[0].text, confidence: alternatives[0].confidence }, + 'received eager end-of-turn event', + ); + + this.queue.put({ + type: stt.SpeechEventType.PREFLIGHT_TRANSCRIPT, + alternatives: [alternatives[0], ...alternatives.slice(1)], + }); + } + + break; + } case 'Metadata': { break; }