diff --git a/apps/ccusage/src/data-loader.ts b/apps/ccusage/src/data-loader.ts index 8e22aae2..3c41fbbb 100644 --- a/apps/ccusage/src/data-loader.ts +++ b/apps/ccusage/src/data-loader.ts @@ -486,25 +486,6 @@ function filterByProject( }); } -/** - * Checks if an entry is a duplicate based on hash - */ -function isDuplicateEntry(uniqueHash: string | null, processedHashes: Set): boolean { - if (uniqueHash == null) { - return false; - } - return processedHashes.has(uniqueHash); -} - -/** - * Marks an entry as processed - */ -function markAsProcessed(uniqueHash: string | null, processedHashes: Set): void { - if (uniqueHash != null) { - processedHashes.add(uniqueHash); - } -} - /** * Extracts unique models from entries, excluding synthetic model */ @@ -774,17 +755,17 @@ export async function loadDailyUsageData(options?: LoadOptions): Promise(); - - // Collect all valid data entries first - const allEntries: { + // Track entries by hash to keep the last (most complete) entry for each unique message+request + // This is important for streaming responses where output_tokens accumulate across multiple entries + type EntryType = { data: UsageData; date: string; cost: number; model: string | undefined; project: string; - }[] = []; + }; + const entriesByHash = new Map(); + const entriesWithoutHash: EntryType[] = []; for (const file of sortedFiles) { // Extract project name from file path once per file @@ -799,16 +780,6 @@ export async function loadDailyUsageData(options?: LoadOptions): Promise(); - - // Collect all valid data entries with session info first - const allEntries: Array<{ + // Track entries by hash to keep the last (most complete) entry for each unique message+request + // This is important for streaming responses where output_tokens accumulate across multiple entries + type EntryType = { data: UsageData; sessionKey: string; sessionId: string; @@ -943,7 +924,9 @@ export async function loadSessionData(options?: LoadOptions): Promise = []; + }; + const entriesByHash = new Map(); + const entriesWithoutHash: EntryType[] = []; for (const { file, baseDir } of sortedFilesWithBase) { // Extract session info from file path using its specific base directory @@ -965,21 +948,11 @@ export async function loadSessionData(options?: LoadOptions): Promise entry.sessionKey); @@ -1376,11 +1361,10 @@ export async function loadSessionBlockData(options?: LoadOptions): Promise(); - - // Collect all valid data entries first - const allEntries: LoadedUsageEntry[] = []; + // Track entries by hash to keep the last (most complete) entry for each unique message+request + // This is important for streaming responses where output_tokens accumulate across multiple entries + const entriesByHash = new Map(); + const entriesWithoutHash: LoadedUsageEntry[] = []; for (const file of sortedFiles) { await processJSONLFileByLine(file, async (line) => { @@ -1392,23 +1376,13 @@ export async function loadSessionBlockData(options?: LoadOptions): Promise { + it('should process files in chronological order and keep last entry', async () => { await using fixture = await createFixture({ projects: { 'newer.jsonl': JSON.stringify({ @@ -4454,11 +4441,12 @@ if (import.meta.vitest != null) { mode: 'display', }); - // Should keep the older entry (100/50 tokens) not the newer one (200/100) + // Should keep the LAST entry (200/100 tokens) for streaming response accuracy + // Files are processed in chronological order, last entry wins expect(data).toHaveLength(1); - expect(data[0]?.date).toBe('2025-01-10'); - expect(data[0]?.inputTokens).toBe(100); - expect(data[0]?.outputTokens).toBe(50); + expect(data[0]?.date).toBe('2025-01-15'); + expect(data[0]?.inputTokens).toBe(200); + expect(data[0]?.outputTokens).toBe(100); }); }); @@ -4504,19 +4492,19 @@ if (import.meta.vitest != null) { mode: 'display', }); - // Session 1 should have the entry - const session1 = sessions.find((s) => s.sessionId === 'session1'); - expect(session1).toBeDefined(); - expect(session1?.inputTokens).toBe(100); - expect(session1?.outputTokens).toBe(50); - - // Session 2 should either not exist or have 0 tokens (duplicate was skipped) + // Session 2 should have the entry (LAST one wins for streaming accuracy) const session2 = sessions.find((s) => s.sessionId === 'session2'); - if (session2 != null) { - expect(session2.inputTokens).toBe(0); - expect(session2.outputTokens).toBe(0); + expect(session2).toBeDefined(); + expect(session2?.inputTokens).toBe(100); + expect(session2?.outputTokens).toBe(50); + + // Session 1 should either not exist or have 0 tokens (superseded by later entry) + const session1 = sessions.find((s) => s.sessionId === 'session1'); + if (session1 != null) { + expect(session1.inputTokens).toBe(0); + expect(session1.outputTokens).toBe(0); } else { - // It's also valid for session2 to not be included if it has no entries + // It's also valid for session1 to not be included if it has no entries expect(sessions.length).toBe(1); } });