@@ -12,20 +12,64 @@ type ContentMessage = {
1212 content : string ;
1313} ;
1414
15+ /**
16+ * Message format used by OpenAI and Anthropic APIs for media.
17+ */
18+ type ContentArrayMessage = {
19+ [ key : string ] : unknown ;
20+ content : {
21+ [ key : string ] : unknown ;
22+ type : string ;
23+ } [ ] ;
24+ } ;
25+
26+ /**
27+ * Inline media content source, with a potentially very large base64
28+ * blob or data: uri.
29+ */
30+ type ContentMedia = Record < string , unknown > &
31+ (
32+ | {
33+ media_type : string ;
34+ data : string ;
35+ }
36+ | {
37+ image_url : `data:${string } `;
38+ }
39+ | {
40+ type : 'blob' | 'base64' ;
41+ content : string ;
42+ }
43+ | {
44+ b64_json : string ;
45+ }
46+ | {
47+ uri : `data:${string } `;
48+ }
49+ ) ;
50+
1551/**
1652 * Message format used by Google GenAI API.
1753 * Parts can be strings or objects with a text property.
1854 */
1955type PartsMessage = {
2056 [ key : string ] : unknown ;
21- parts : Array < string | { text : string } > ;
57+ parts : Array < TextPart | MediaPart > ;
2258} ;
2359
2460/**
2561 * A part in a Google GenAI message that contains text.
2662 */
2763type TextPart = string | { text : string } ;
2864
65+ /**
66+ * A part in a Google GenAI that contains media.
67+ */
68+ type MediaPart = {
69+ type : string ;
70+ content : string ;
71+ } ;
72+
2973/**
3074 * Calculate the UTF-8 byte length of a string.
3175 */
@@ -79,11 +123,12 @@ function truncateTextByBytes(text: string, maxBytes: number): string {
79123 *
80124 * @returns The text content
81125 */
82- function getPartText ( part : TextPart ) : string {
126+ function getPartText ( part : TextPart | MediaPart ) : string {
83127 if ( typeof part === 'string' ) {
84128 return part ;
85129 }
86- return part . text ;
130+ if ( 'text' in part ) return part . text ;
131+ return '' ;
87132}
88133
89134/**
@@ -93,7 +138,7 @@ function getPartText(part: TextPart): string {
93138 * @param text - New text content
94139 * @returns New part with updated text
95140 */
96- function withPartText ( part : TextPart , text : string ) : TextPart {
141+ function withPartText ( part : TextPart | MediaPart , text : string ) : TextPart {
97142 if ( typeof part === 'string' ) {
98143 return text ;
99144 }
@@ -112,6 +157,31 @@ function isContentMessage(message: unknown): message is ContentMessage {
112157 ) ;
113158}
114159
160+ /**
161+ * Check if a message has the OpenAI/Anthropic content array format.
162+ */
163+ function isContentArrayMessage ( message : unknown ) : message is ContentArrayMessage {
164+ return message !== null && typeof message === 'object' && 'content' in message && Array . isArray ( message . content ) ;
165+ }
166+
167+ /**
168+ * Check if a content part is an OpenAI/Anthropic media source
169+ */
170+ function isContentMedia ( part : unknown ) : part is ContentMedia {
171+ if ( ! part || typeof part !== 'object' ) return false ;
172+
173+ return isContentMediaSource ( part ) ||
174+ ( 'media_type' in part && typeof part . media_type === 'string' && 'data' in part ) ||
175+ ( 'image_url' in part && typeof part . image_url === 'string' && part . image_url . startsWith ( 'data:' ) ) ||
176+ ( 'type' in part && ( part . type === 'blob' || part . type === 'base64' ) ) ||
177+ 'b64_json' in part ||
178+ ( 'type' in part && 'result' in part && part . type === 'image_generation' ) ||
179+ ( 'uri' in part && typeof part . uri === 'string' && part . uri . startsWith ( 'data:' ) ) ;
180+ }
181+ function isContentMediaSource ( part : NonNullable < unknown > ) : boolean {
182+ return 'type' in part && typeof part . type === 'string' && 'source' in part && isContentMedia ( part . source ) ;
183+ }
184+
115185/**
116186 * Check if a message has the Google GenAI parts format.
117187 */
@@ -167,7 +237,7 @@ function truncatePartsMessage(message: PartsMessage, maxBytes: number): unknown[
167237 }
168238
169239 // Include parts until we run out of space
170- const includedParts : TextPart [ ] = [ ] ;
240+ const includedParts : ( TextPart | MediaPart ) [ ] = [ ] ;
171241
172242 for ( const part of parts ) {
173243 const text = getPartText ( part ) ;
@@ -190,7 +260,10 @@ function truncatePartsMessage(message: PartsMessage, maxBytes: number): unknown[
190260 }
191261 }
192262
263+ /* c8 ignore start
264+ * for type safety only, algorithm guarantees SOME text included */
193265 return includedParts . length > 0 ? [ { ...message , parts : includedParts } ] : [ ] ;
266+ /* c8 ignore stop */
194267}
195268
196269/**
@@ -205,9 +278,11 @@ function truncatePartsMessage(message: PartsMessage, maxBytes: number): unknown[
205278 * @returns Array containing the truncated message, or empty array if truncation fails
206279 */
207280function truncateSingleMessage ( message : unknown , maxBytes : number ) : unknown [ ] {
281+ /* c8 ignore start - unreachable */
208282 if ( ! message || typeof message !== 'object' ) {
209283 return [ ] ;
210284 }
285+ /* c8 ignore start - unreachable */
211286
212287 if ( isContentMessage ( message ) ) {
213288 return truncateContentMessage ( message , maxBytes ) ;
@@ -221,6 +296,59 @@ function truncateSingleMessage(message: unknown, maxBytes: number): unknown[] {
221296 return [ ] ;
222297}
223298
299+ const REMOVED_STRING = '<removed>' ;
300+
301+ const MEDIA_FIELDS = [ 'image_url' , 'data' , 'content' , 'b64_json' , 'result' , 'uri' ] as const ;
302+
303+ function stripInlineMediaFromSingleMessage ( part : ContentMedia ) : ContentMedia {
304+ const strip = { ...part } ;
305+ if ( isContentMedia ( strip . source ) ) {
306+ strip . source = stripInlineMediaFromSingleMessage ( strip . source ) ;
307+ }
308+ for ( const field of MEDIA_FIELDS ) {
309+ if ( strip [ field ] ) strip [ field ] = REMOVED_STRING ;
310+ }
311+ return strip ;
312+ }
313+
314+ /**
315+ * Strip the inline media from message arrays.
316+ *
317+ * This returns a stripped message. We do NOT want to mutate the data in place,
318+ * because of course we still want the actual API/client to handle the media.
319+ */
320+ export function stripInlineMediaFromMessages ( messages : unknown [ ] ) : unknown [ ] {
321+ return messages . map ( message => {
322+ if ( ! ! message && typeof message === 'object' ) {
323+ if ( isContentArrayMessage ( message ) ) {
324+ // eslint-disable-next-line no-param-reassign
325+ message = {
326+ ...message ,
327+ content : stripInlineMediaFromMessages ( message . content ) ,
328+ } ;
329+ } else if ( 'content' in message && isContentMedia ( message . content ) ) {
330+ // eslint-disable-next-line no-param-reassign
331+ message = {
332+ ...message ,
333+ content : stripInlineMediaFromSingleMessage ( message . content ) ,
334+ } ;
335+ }
336+ if ( isPartsMessage ( message ) ) {
337+ // eslint-disable-next-line no-param-reassign
338+ message = {
339+ ...message ,
340+ parts : stripInlineMediaFromMessages ( message . parts ) ,
341+ } ;
342+ }
343+ if ( isContentMedia ( message ) ) {
344+ // eslint-disable-next-line no-param-reassign
345+ message = stripInlineMediaFromSingleMessage ( message ) ;
346+ }
347+ }
348+ return message ;
349+ } ) ;
350+ }
351+
224352/**
225353 * Truncate an array of messages to fit within a byte limit.
226354 *
@@ -246,6 +374,11 @@ export function truncateMessagesByBytes(messages: unknown[], maxBytes: number):
246374 return messages ;
247375 }
248376
377+ // strip inline media first. This will often get us below the threshold,
378+ // while preserving human-readable information about messages sent.
379+ // eslint-disable-next-line no-param-reassign
380+ messages = stripInlineMediaFromMessages ( messages ) ;
381+
249382 // Fast path: if all messages fit, return as-is
250383 const totalBytes = jsonBytes ( messages ) ;
251384 if ( totalBytes <= maxBytes ) {
0 commit comments