diff --git a/apps/codex/src/_types.ts b/apps/codex/src/_types.ts index 3540e218..681a8483 100644 --- a/apps/codex/src/_types.ts +++ b/apps/codex/src/_types.ts @@ -43,6 +43,10 @@ export type ModelPricing = { inputCostPerMToken: number; cachedInputCostPerMToken: number; outputCostPerMToken: number; + tieredThresholdTokens?: number; + inputCostPerMTokenAboveThreshold?: number; + cachedInputCostPerMTokenAboveThreshold?: number; + outputCostPerMTokenAboveThreshold?: number; }; export type PricingLookupResult = { diff --git a/apps/codex/src/pricing.ts b/apps/codex/src/pricing.ts index f4b1c6dd..12c80acb 100644 --- a/apps/codex/src/pricing.ts +++ b/apps/codex/src/pricing.ts @@ -11,6 +11,31 @@ const CODEX_MODEL_ALIASES_MAP = new Map([ ['gpt-5-codex', 'gpt-5'], ['gpt-5.3-codex', 'gpt-5.2-codex'], ]); +const TIERED_PRICING_CONFIGS = [ + { + thresholdTokens: 272_000, + inputField: 'input_cost_per_token_above_272k_tokens', + cachedInputField: 'cache_read_input_token_cost_above_272k_tokens', + outputField: 'output_cost_per_token_above_272k_tokens', + }, + { + thresholdTokens: 200_000, + inputField: 'input_cost_per_token_above_200k_tokens', + cachedInputField: 'cache_read_input_token_cost_above_200k_tokens', + outputField: 'output_cost_per_token_above_200k_tokens', + }, + { + thresholdTokens: 128_000, + inputField: 'input_cost_per_token_above_128k_tokens', + cachedInputField: undefined, + outputField: 'output_cost_per_token_above_128k_tokens', + }, +] as const satisfies ReadonlyArray<{ + thresholdTokens: number; + inputField: keyof LiteLLMModelPricing; + cachedInputField?: keyof LiteLLMModelPricing; + outputField: keyof LiteLLMModelPricing; +}>; const FREE_MODEL_PRICING = { inputCostPerMToken: 0, cachedInputCostPerMToken: 0, @@ -39,6 +64,45 @@ function toPerMillion(value: number | undefined, fallback?: number): number { return perToken * MILLION; } +function getTieredPricing( + pricing: LiteLLMModelPricing, +): Pick< + ModelPricing, + | 'tieredThresholdTokens' + | 'inputCostPerMTokenAboveThreshold' + | 'cachedInputCostPerMTokenAboveThreshold' + | 'outputCostPerMTokenAboveThreshold' +> { + for (const config of TIERED_PRICING_CONFIGS) { + const tieredInputPrice = pricing[config.inputField]; + const tieredCachedInputPrice = + config.cachedInputField == null ? undefined : pricing[config.cachedInputField]; + const tieredOutputPrice = pricing[config.outputField]; + + if (tieredInputPrice == null && tieredCachedInputPrice == null && tieredOutputPrice == null) { + continue; + } + + return { + tieredThresholdTokens: config.thresholdTokens, + inputCostPerMTokenAboveThreshold: toPerMillion( + tieredInputPrice, + pricing.input_cost_per_token, + ), + cachedInputCostPerMTokenAboveThreshold: toPerMillion( + tieredCachedInputPrice, + tieredInputPrice ?? pricing.input_cost_per_token, + ), + outputCostPerMTokenAboveThreshold: toPerMillion( + tieredOutputPrice, + pricing.output_cost_per_token, + ), + }; + } + + return {}; +} + export type CodexPricingSourceOptions = { offline?: boolean; offlineLoader?: () => Promise>; @@ -96,6 +160,7 @@ export class CodexPricingSource implements PricingSource, Disposable { pricing.input_cost_per_token, ), outputCostPerMToken: toPerMillion(pricing.output_cost_per_token), + ...getTieredPricing(pricing), }; } } @@ -188,5 +253,27 @@ if (import.meta.vitest != null) { expect(pricing.outputCostPerMToken).toBeCloseTo(15); expect(pricing.cachedInputCostPerMToken).toBeCloseTo(0.19); }); + + it('preserves tiered pricing metadata when LiteLLM exposes it', async () => { + using source = new CodexPricingSource({ + offline: true, + offlineLoader: async () => ({ + 'gpt-5.4': { + input_cost_per_token: 2.5e-6, + output_cost_per_token: 1.5e-5, + cache_read_input_token_cost: 2.5e-7, + input_cost_per_token_above_272k_tokens: 5e-6, + output_cost_per_token_above_272k_tokens: 2.25e-5, + cache_read_input_token_cost_above_272k_tokens: 5e-7, + }, + }), + }); + + const pricing = await source.getPricing('gpt-5.4'); + expect(pricing.tieredThresholdTokens).toBe(272_000); + expect(pricing.inputCostPerMTokenAboveThreshold).toBeCloseTo(5); + expect(pricing.outputCostPerMTokenAboveThreshold).toBeCloseTo(22.5); + expect(pricing.cachedInputCostPerMTokenAboveThreshold).toBeCloseTo(0.5); + }); }); } diff --git a/apps/codex/src/token-utils.ts b/apps/codex/src/token-utils.ts index bcbc3982..7e7ace58 100644 --- a/apps/codex/src/token-utils.ts +++ b/apps/codex/src/token-utils.ts @@ -25,6 +25,29 @@ function nonCachedInputTokens(usage: TokenUsageDelta): number { return nonCached > 0 ? nonCached : 0; } +function calculateTieredCost( + totalTokens: number, + basePricePerMToken: number, + tieredPricePerMToken?: number, + thresholdTokens?: number, +): number { + if (totalTokens <= 0) { + return 0; + } + + if (thresholdTokens == null || tieredPricePerMToken == null || totalTokens <= thresholdTokens) { + return (totalTokens / MILLION) * basePricePerMToken; + } + + const tokensBelowThreshold = Math.min(totalTokens, thresholdTokens); + const tokensAboveThreshold = Math.max(0, totalTokens - thresholdTokens); + + return ( + (tokensBelowThreshold / MILLION) * basePricePerMToken + + (tokensAboveThreshold / MILLION) * tieredPricePerMToken + ); +} + /** * Calculate the cost in USD for token usage based on model pricing * @@ -45,11 +68,79 @@ export function calculateCostUSD(usage: TokenUsageDelta, pricing: ModelPricing): usage.cachedInputTokens > usage.inputTokens ? usage.inputTokens : usage.cachedInputTokens; const outputTokens = usage.outputTokens; - const inputCost = (nonCachedInput / MILLION) * pricing.inputCostPerMToken; - const cachedCost = (cachedInput / MILLION) * pricing.cachedInputCostPerMToken; - const outputCost = (outputTokens / MILLION) * pricing.outputCostPerMToken; + const inputCost = calculateTieredCost( + nonCachedInput, + pricing.inputCostPerMToken, + pricing.inputCostPerMTokenAboveThreshold, + pricing.tieredThresholdTokens, + ); + const cachedCost = calculateTieredCost( + cachedInput, + pricing.cachedInputCostPerMToken, + pricing.cachedInputCostPerMTokenAboveThreshold, + pricing.tieredThresholdTokens, + ); + const outputCost = calculateTieredCost( + outputTokens, + pricing.outputCostPerMToken, + pricing.outputCostPerMTokenAboveThreshold, + pricing.tieredThresholdTokens, + ); return inputCost + cachedCost + outputCost; } export { formatCurrency, formatTokens }; + +if (import.meta.vitest != null) { + describe('calculateCostUSD', () => { + it('uses flat pricing when no tiered rates are provided', () => { + const cost = calculateCostUSD( + { + inputTokens: 1_000, + cachedInputTokens: 100, + outputTokens: 500, + reasoningOutputTokens: 0, + totalTokens: 1_500, + }, + { + inputCostPerMToken: 2.5, + cachedInputCostPerMToken: 0.25, + outputCostPerMToken: 15, + }, + ); + + const expected = (900 / MILLION) * 2.5 + (100 / MILLION) * 0.25 + (500 / MILLION) * 15; + expect(cost).toBeCloseTo(expected, 10); + }); + + it('uses tiered pricing above the configured threshold', () => { + const cost = calculateCostUSD( + { + inputTokens: 320_000, + cachedInputTokens: 40_000, + outputTokens: 300_000, + reasoningOutputTokens: 0, + totalTokens: 620_000, + }, + { + inputCostPerMToken: 2.5, + cachedInputCostPerMToken: 0.25, + outputCostPerMToken: 15, + tieredThresholdTokens: 272_000, + inputCostPerMTokenAboveThreshold: 5, + cachedInputCostPerMTokenAboveThreshold: 0.5, + outputCostPerMTokenAboveThreshold: 22.5, + }, + ); + + const expected = + (272_000 / MILLION) * 2.5 + + (8_000 / MILLION) * 5 + + (40_000 / MILLION) * 0.25 + + (272_000 / MILLION) * 15 + + (28_000 / MILLION) * 22.5; + expect(cost).toBeCloseTo(expected, 10); + }); + }); +} diff --git a/packages/internal/src/pricing.ts b/packages/internal/src/pricing.ts index a76facf4..e0a79069 100644 --- a/packages/internal/src/pricing.ts +++ b/packages/internal/src/pricing.ts @@ -4,28 +4,49 @@ import * as v from 'valibot'; export const LITELLM_PRICING_URL = 'https://raw.githubusercontent.com/BerriAI/litellm/main/model_prices_and_context_window.json'; -/** - * Default token threshold for tiered pricing in 1M context window models. - * LiteLLM's pricing schema hard-codes this threshold in field names - * (e.g., `input_cost_per_token_above_200k_tokens`). - * The threshold parameter in calculateTieredCost allows flexibility for - * future models that may use different thresholds. - */ -const DEFAULT_TIERED_THRESHOLD = 200_000; +const TIERED_PRICING_CONFIGS = [ + { + threshold: 272_000, + inputField: 'input_cost_per_token_above_272k_tokens', + outputField: 'output_cost_per_token_above_272k_tokens', + cacheCreationField: 'cache_creation_input_token_cost_above_272k_tokens', + cacheReadField: 'cache_read_input_token_cost_above_272k_tokens', + }, + { + threshold: 200_000, + inputField: 'input_cost_per_token_above_200k_tokens', + outputField: 'output_cost_per_token_above_200k_tokens', + cacheCreationField: 'cache_creation_input_token_cost_above_200k_tokens', + cacheReadField: 'cache_read_input_token_cost_above_200k_tokens', + }, + { + threshold: 128_000, + inputField: 'input_cost_per_token_above_128k_tokens', + outputField: 'output_cost_per_token_above_128k_tokens', + cacheCreationField: undefined, + cacheReadField: undefined, + }, +] as const satisfies ReadonlyArray<{ + threshold: number; + inputField: keyof LiteLLMModelPricing; + outputField: keyof LiteLLMModelPricing; + cacheCreationField?: keyof LiteLLMModelPricing; + cacheReadField?: keyof LiteLLMModelPricing; +}>; /** * LiteLLM Model Pricing Schema * * ⚠️ TIERED PRICING NOTE: * Different models use different token thresholds for tiered pricing: - * - Claude/Anthropic: 200k tokens (implemented in calculateTieredCost) - * - Gemini: 128k tokens (schema fields only, NOT implemented in calculations) - * - GPT/OpenAI: No tiered pricing (flat rate) + * - OpenAI GPT-5.4: 272k tokens + * - Claude/Anthropic: 200k tokens + * - Gemini: 128k tokens * * When adding support for new models: * 1. Check if model has tiered pricing in LiteLLM data * 2. Verify the threshold value - * 3. Update calculateTieredCost logic if threshold differs from 200k + * 3. Add the corresponding threshold config when LiteLLM introduces a new tier field * 4. Add tests for tiered pricing boundaries */ export const liteLLMModelPricingSchema = v.object({ @@ -36,12 +57,17 @@ export const liteLLMModelPricingSchema = v.object({ max_tokens: v.optional(v.number()), max_input_tokens: v.optional(v.number()), max_output_tokens: v.optional(v.number()), + // OpenAI GPT-5.4: 1M context window pricing (272k threshold) + input_cost_per_token_above_272k_tokens: v.optional(v.number()), + output_cost_per_token_above_272k_tokens: v.optional(v.number()), + cache_creation_input_token_cost_above_272k_tokens: v.optional(v.number()), + cache_read_input_token_cost_above_272k_tokens: v.optional(v.number()), // Claude/Anthropic: 1M context window pricing (200k threshold) input_cost_per_token_above_200k_tokens: v.optional(v.number()), output_cost_per_token_above_200k_tokens: v.optional(v.number()), cache_creation_input_token_cost_above_200k_tokens: v.optional(v.number()), cache_read_input_token_cost_above_200k_tokens: v.optional(v.number()), - // Gemini: Tiered pricing (128k threshold) - NOT implemented in calculations + // Gemini: Tiered pricing (128k threshold) input_cost_per_token_above_128k_tokens: v.optional(v.number()), output_cost_per_token_above_128k_tokens: v.optional(v.number()), // Provider-specific pricing multipliers (e.g., fast mode, regional pricing) @@ -273,31 +299,36 @@ export class LiteLLMPricingFetcher implements Disposable { }, pricing: LiteLLMModelPricing, ): number { + const getTieredPricingConfig = (): (typeof TIERED_PRICING_CONFIGS)[number] | null => { + for (const config of TIERED_PRICING_CONFIGS) { + if ( + pricing[config.inputField] != null || + pricing[config.outputField] != null || + (config.cacheCreationField != null && pricing[config.cacheCreationField] != null) || + (config.cacheReadField != null && pricing[config.cacheReadField] != null) + ) { + return config; + } + } + + return null; + }; + /** - * Calculate cost with tiered pricing for 1M context window models - * - * @param totalTokens - Total number of tokens to calculate cost for - * @param basePrice - Price per token for tokens up to the threshold - * @param tieredPrice - Price per token for tokens above the threshold - * @param threshold - Token threshold for tiered pricing (default 200k) - * @returns Total cost applying tiered pricing when applicable - * - * @example - * // 300k tokens with base price $3/M and tiered price $6/M - * calculateTieredCost(300_000, 3e-6, 6e-6) - * // Returns: (200_000 * 3e-6) + (100_000 * 6e-6) = $1.2 + * Calculate cost with tiered pricing for models that expose a second rate + * above a token threshold via LiteLLM's pricing schema. */ const calculateTieredCost = ( totalTokens: number | undefined, basePrice: number | undefined, tieredPrice: number | undefined, - threshold: number = DEFAULT_TIERED_THRESHOLD, + threshold?: number, ): number => { if (totalTokens == null || totalTokens <= 0) { return 0; } - if (totalTokens > threshold && tieredPrice != null) { + if (threshold != null && totalTokens > threshold && tieredPrice != null) { const tokensBelowThreshold = Math.min(totalTokens, threshold); const tokensAboveThreshold = Math.max(0, totalTokens - threshold); @@ -315,28 +346,39 @@ export class LiteLLMPricingFetcher implements Disposable { return 0; }; + const tieredPricingConfig = getTieredPricingConfig(); + const threshold = tieredPricingConfig?.threshold; + const inputCost = calculateTieredCost( tokens.input_tokens, pricing.input_cost_per_token, - pricing.input_cost_per_token_above_200k_tokens, + tieredPricingConfig == null ? undefined : pricing[tieredPricingConfig.inputField], + threshold, ); const outputCost = calculateTieredCost( tokens.output_tokens, pricing.output_cost_per_token, - pricing.output_cost_per_token_above_200k_tokens, + tieredPricingConfig == null ? undefined : pricing[tieredPricingConfig.outputField], + threshold, ); const cacheCreationCost = calculateTieredCost( tokens.cache_creation_input_tokens, pricing.cache_creation_input_token_cost, - pricing.cache_creation_input_token_cost_above_200k_tokens, + tieredPricingConfig?.cacheCreationField == null + ? undefined + : pricing[tieredPricingConfig.cacheCreationField], + threshold, ); const cacheReadCost = calculateTieredCost( tokens.cache_read_input_tokens, pricing.cache_read_input_token_cost, - pricing.cache_read_input_token_cost_above_200k_tokens, + tieredPricingConfig?.cacheReadField == null + ? undefined + : pricing[tieredPricingConfig.cacheReadField], + threshold, ); return inputCost + outputCost + cacheCreationCost + cacheReadCost; @@ -570,6 +612,42 @@ if (import.meta.vitest != null) { expect(costBelow).toBe(0); }); + it('calculates tiered pricing for 272k threshold models', async () => { + using fetcher = new LiteLLMPricingFetcher({ + offline: true, + offlineLoader: async () => ({ + 'gpt-5.4': { + input_cost_per_token: 2.5e-6, + output_cost_per_token: 1.5e-5, + cache_read_input_token_cost: 2.5e-7, + input_cost_per_token_above_272k_tokens: 5e-6, + output_cost_per_token_above_272k_tokens: 2.25e-5, + cache_read_input_token_cost_above_272k_tokens: 5e-7, + }, + }), + }); + + const cost = await Result.unwrap( + fetcher.calculateCostFromTokens( + { + input_tokens: 300_000, + output_tokens: 280_000, + cache_read_input_tokens: 290_000, + }, + 'gpt-5.4', + ), + ); + + const expectedCost = + 272_000 * 2.5e-6 + + 28_000 * 5e-6 + + 272_000 * 1.5e-5 + + 8_000 * 2.25e-5 + + 272_000 * 2.5e-7 + + 18_000 * 5e-7; + expect(cost).toBeCloseTo(expectedCost); + }); + it('applies fast speed multiplier from provider_specific_entry', async () => { using fetcher = new LiteLLMPricingFetcher({ offline: true,