ryoppippi · GrayXu · Mar 8, 2026
diff --git a/apps/codex/src/_types.ts b/apps/codex/src/_types.ts
@@ -43,6 +43,10 @@ export type ModelPricing = {
 	inputCostPerMToken: number;
 	cachedInputCostPerMToken: number;
 	outputCostPerMToken: number;
+	tieredThresholdTokens?: number;
+	inputCostPerMTokenAboveThreshold?: number;
+	cachedInputCostPerMTokenAboveThreshold?: number;
+	outputCostPerMTokenAboveThreshold?: number;
 };
 
 export type PricingLookupResult = {

diff --git a/apps/codex/src/pricing.ts b/apps/codex/src/pricing.ts
@@ -11,6 +11,31 @@ const CODEX_MODEL_ALIASES_MAP = new Map<string, string>([
 	['gpt-5-codex', 'gpt-5'],
 	['gpt-5.3-codex', 'gpt-5.2-codex'],
 ]);
+const TIERED_PRICING_CONFIGS = [
+	{
+		thresholdTokens: 272_000,
+		inputField: 'input_cost_per_token_above_272k_tokens',
+		cachedInputField: 'cache_read_input_token_cost_above_272k_tokens',
+		outputField: 'output_cost_per_token_above_272k_tokens',
+	},
+	{
+		thresholdTokens: 200_000,
+		inputField: 'input_cost_per_token_above_200k_tokens',
+		cachedInputField: 'cache_read_input_token_cost_above_200k_tokens',
+		outputField: 'output_cost_per_token_above_200k_tokens',
+	},
+	{
+		thresholdTokens: 128_000,
+		inputField: 'input_cost_per_token_above_128k_tokens',
+		cachedInputField: undefined,
+		outputField: 'output_cost_per_token_above_128k_tokens',
+	},
+] as const satisfies ReadonlyArray<{
+	thresholdTokens: number;
+	inputField: keyof LiteLLMModelPricing;
+	cachedInputField?: keyof LiteLLMModelPricing;
+	outputField: keyof LiteLLMModelPricing;
+}>;
 const FREE_MODEL_PRICING = {
 	inputCostPerMToken: 0,
 	cachedInputCostPerMToken: 0,
@@ -39,6 +64,45 @@ function toPerMillion(value: number | undefined, fallback?: number): number {
 	return perToken * MILLION;
 }
 
+function getTieredPricing(
+	pricing: LiteLLMModelPricing,
+): Pick<
+	ModelPricing,
+	| 'tieredThresholdTokens'
+	| 'inputCostPerMTokenAboveThreshold'
+	| 'cachedInputCostPerMTokenAboveThreshold'
+	| 'outputCostPerMTokenAboveThreshold'
+> {
+	for (const config of TIERED_PRICING_CONFIGS) {
+		const tieredInputPrice = pricing[config.inputField];
+		const tieredCachedInputPrice =
+			config.cachedInputField == null ? undefined : pricing[config.cachedInputField];
+		const tieredOutputPrice = pricing[config.outputField];
+
+		if (tieredInputPrice == null && tieredCachedInputPrice == null && tieredOutputPrice == null) {
+			continue;
+		}
+
+		return {
+			tieredThresholdTokens: config.thresholdTokens,
+			inputCostPerMTokenAboveThreshold: toPerMillion(
+				tieredInputPrice,
+				pricing.input_cost_per_token,
+			),
+			cachedInputCostPerMTokenAboveThreshold: toPerMillion(
+				tieredCachedInputPrice,
+				tieredInputPrice ?? pricing.input_cost_per_token,
+			),
+			outputCostPerMTokenAboveThreshold: toPerMillion(
+				tieredOutputPrice,
+				pricing.output_cost_per_token,
+			),
+		};
+	}
+
+	return {};
+}
+
 export type CodexPricingSourceOptions = {
 	offline?: boolean;
 	offlineLoader?: () => Promise<Record<string, LiteLLMModelPricing>>;
@@ -96,6 +160,7 @@ export class CodexPricingSource implements PricingSource, Disposable {
 				pricing.input_cost_per_token,
 			),
 			outputCostPerMToken: toPerMillion(pricing.output_cost_per_token),
+			...getTieredPricing(pricing),
 		};
 	}
 }
@@ -188,5 +253,27 @@ if (import.meta.vitest != null) {
 			expect(pricing.outputCostPerMToken).toBeCloseTo(15);
 			expect(pricing.cachedInputCostPerMToken).toBeCloseTo(0.19);
 		});
+
+		it('preserves tiered pricing metadata when LiteLLM exposes it', async () => {
+			using source = new CodexPricingSource({
+				offline: true,
+				offlineLoader: async () => ({
+					'gpt-5.4': {
+						input_cost_per_token: 2.5e-6,
+						output_cost_per_token: 1.5e-5,
+						cache_read_input_token_cost: 2.5e-7,
+						input_cost_per_token_above_272k_tokens: 5e-6,
+						output_cost_per_token_above_272k_tokens: 2.25e-5,
+						cache_read_input_token_cost_above_272k_tokens: 5e-7,
+					},
+				}),
+			});
+
+			const pricing = await source.getPricing('gpt-5.4');
+			expect(pricing.tieredThresholdTokens).toBe(272_000);
+			expect(pricing.inputCostPerMTokenAboveThreshold).toBeCloseTo(5);
+			expect(pricing.outputCostPerMTokenAboveThreshold).toBeCloseTo(22.5);
+			expect(pricing.cachedInputCostPerMTokenAboveThreshold).toBeCloseTo(0.5);
+		});
 	});
 }
diff --git a/apps/codex/src/token-utils.ts b/apps/codex/src/token-utils.ts
@@ -25,6 +25,29 @@ function nonCachedInputTokens(usage: TokenUsageDelta): number {
 	return nonCached > 0 ? nonCached : 0;
 }
 
+function calculateTieredCost(
+	totalTokens: number,
+	basePricePerMToken: number,
+	tieredPricePerMToken?: number,
+	thresholdTokens?: number,
+): number {
+	if (totalTokens <= 0) {
+		return 0;
+	}
+
+	if (thresholdTokens == null || tieredPricePerMToken == null || totalTokens <= thresholdTokens) {
+		return (totalTokens / MILLION) * basePricePerMToken;
+	}
+
+	const tokensBelowThreshold = Math.min(totalTokens, thresholdTokens);
+	const tokensAboveThreshold = Math.max(0, totalTokens - thresholdTokens);
+
+	return (
+		(tokensBelowThreshold / MILLION) * basePricePerMToken +
+		(tokensAboveThreshold / MILLION) * tieredPricePerMToken
+	);
+}
+
 /**
  * Calculate the cost in USD for token usage based on model pricing
  *
@@ -45,11 +68,79 @@ export function calculateCostUSD(usage: TokenUsageDelta, pricing: ModelPricing):
 		usage.cachedInputTokens > usage.inputTokens ? usage.inputTokens : usage.cachedInputTokens;
 	const outputTokens = usage.outputTokens;
 
-	const inputCost = (nonCachedInput / MILLION) * pricing.inputCostPerMToken;
-	const cachedCost = (cachedInput / MILLION) * pricing.cachedInputCostPerMToken;
-	const outputCost = (outputTokens / MILLION) * pricing.outputCostPerMToken;
+	const inputCost = calculateTieredCost(
+		nonCachedInput,
+		pricing.inputCostPerMToken,
+		pricing.inputCostPerMTokenAboveThreshold,
+		pricing.tieredThresholdTokens,
+	);
+	const cachedCost = calculateTieredCost(
+		cachedInput,
+		pricing.cachedInputCostPerMToken,
+		pricing.cachedInputCostPerMTokenAboveThreshold,
+		pricing.tieredThresholdTokens,
+	);
+	const outputCost = calculateTieredCost(
+		outputTokens,
+		pricing.outputCostPerMToken,
+		pricing.outputCostPerMTokenAboveThreshold,
+		pricing.tieredThresholdTokens,
+	);
 
 	return inputCost + cachedCost + outputCost;
 }
 
 export { formatCurrency, formatTokens };
+
+if (import.meta.vitest != null) {
+	describe('calculateCostUSD', () => {
+		it('uses flat pricing when no tiered rates are provided', () => {
+			const cost = calculateCostUSD(
+				{
+					inputTokens: 1_000,
+					cachedInputTokens: 100,
+					outputTokens: 500,
+					reasoningOutputTokens: 0,
+					totalTokens: 1_500,
+				},
+				{
+					inputCostPerMToken: 2.5,
+					cachedInputCostPerMToken: 0.25,
+					outputCostPerMToken: 15,
+				},
+			);
+
+			const expected = (900 / MILLION) * 2.5 + (100 / MILLION) * 0.25 + (500 / MILLION) * 15;
+			expect(cost).toBeCloseTo(expected, 10);
+		});
+
+		it('uses tiered pricing above the configured threshold', () => {
+			const cost = calculateCostUSD(
+				{
+					inputTokens: 320_000,
+					cachedInputTokens: 40_000,
+					outputTokens: 300_000,
+					reasoningOutputTokens: 0,
+					totalTokens: 620_000,
+				},
+				{
+					inputCostPerMToken: 2.5,
+					cachedInputCostPerMToken: 0.25,
+					outputCostPerMToken: 15,
+					tieredThresholdTokens: 272_000,
+					inputCostPerMTokenAboveThreshold: 5,
+					cachedInputCostPerMTokenAboveThreshold: 0.5,
+					outputCostPerMTokenAboveThreshold: 22.5,
+				},
+			);
+
+			const expected =
+				(272_000 / MILLION) * 2.5 +
+				(8_000 / MILLION) * 5 +
+				(40_000 / MILLION) * 0.25 +
+				(272_000 / MILLION) * 15 +
+				(28_000 / MILLION) * 22.5;
+			expect(cost).toBeCloseTo(expected, 10);
+		});
+	});
+}