Skip to content

Commit d1695d6

Browse files
authored
Add OpenAI video generation and content retrieval support (#15745)
* Add openai videos generation and retrieval support * add retrieval endpoint * Add docs * Add imports * remove orjson * remove double import * fix openai videos format * remove mock code * remove not required comments * Add tests * Add tests * Add other video endpoints * Fix cost calculation and transformation * Fixed mypy tests * remove not used imports * fix typed dict for list * fix mypy errors
1 parent 1a87c02 commit d1695d6

File tree

20 files changed

+3016
-10
lines changed

20 files changed

+3016
-10
lines changed

docs/my-website/docs/providers/openai.md

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -836,4 +836,10 @@ response = completion(
836836
model="gpt-5-pro",
837837
messages=[{"role": "user", "content": "Solve this complex reasoning problem..."}]
838838
)
839-
```
839+
```
840+
841+
## Video Generation
842+
843+
LiteLLM supports OpenAI's video generation models including Sora.
844+
845+
For detailed documentation on video generation, see [OpenAI Video Generation →](./openai/video_generation.md)
Lines changed: 143 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,143 @@
1+
import Tabs from '@theme/Tabs';
2+
import TabItem from '@theme/TabItem';
3+
4+
# OpenAI Video Generation
5+
6+
LiteLLM supports OpenAI's video generation models including Sora.
7+
8+
## Quick Start
9+
10+
### Required API Keys
11+
12+
```python
13+
import os
14+
os.environ["OPENAI_API_KEY"] = "your-api-key"
15+
```
16+
17+
### Basic Usage
18+
19+
```python
20+
from litellm import video_generation, video_retrieval
21+
import os
22+
23+
os.environ["OPENAI_API_KEY"] = "your-api-key"
24+
25+
# Generate a video
26+
response = video_generation(
27+
prompt="A cat playing with a ball of yarn in a sunny garden",
28+
model="sora-2",
29+
seconds="8",
30+
size="720x1280"
31+
)
32+
33+
print(f"Video ID: {response.id}")
34+
print(f"Status: {response.status}")
35+
36+
# Download video content when ready
37+
video_bytes = video_retrieval(
38+
video_id=response.id,
39+
model="sora-2"
40+
)
41+
42+
# Save to file
43+
with open("generated_video.mp4", "wb") as f:
44+
f.write(video_bytes)
45+
```
46+
47+
## Supported Models
48+
49+
| Model Name | Description | Max Duration | Supported Sizes |
50+
|------------|-------------|--------------|-----------------|
51+
| sora-2 | OpenAI's latest video generation model | 8 seconds | 720x1280, 1280x720 |
52+
53+
## Video Generation Parameters
54+
55+
- `prompt` (required): Text description of the desired video
56+
- `model` (optional): Model to use, defaults to "sora-2"
57+
- `seconds` (optional): Video duration in seconds (e.g., "8", "16")
58+
- `size` (optional): Video dimensions (e.g., "720x1280", "1280x720")
59+
- `input_reference` (optional): Reference image for video editing
60+
- `user` (optional): User identifier for tracking
61+
62+
## Video Content Retrieval
63+
64+
```python
65+
# Download video content
66+
video_bytes = video_retrieval(
67+
video_id="video_1234567890",
68+
model="sora-2"
69+
)
70+
71+
# Save to file
72+
with open("video.mp4", "wb") as f:
73+
f.write(video_bytes)
74+
```
75+
76+
## Complete Workflow
77+
78+
```python
79+
import litellm
80+
import time
81+
82+
def generate_and_download_video(prompt):
83+
# Step 1: Generate video
84+
response = litellm.video_generation(
85+
prompt=prompt,
86+
model="sora-2",
87+
seconds="8",
88+
size="720x1280"
89+
)
90+
91+
video_id = response.id
92+
print(f"Video ID: {video_id}")
93+
94+
# Step 2: Wait for processing (in practice, poll status)
95+
time.sleep(30)
96+
97+
# Step 3: Download video
98+
video_bytes = litellm.video_retrieval(
99+
video_id=video_id,
100+
model="sora-2"
101+
)
102+
103+
# Step 4: Save to file
104+
with open(f"video_{video_id}.mp4", "wb") as f:
105+
f.write(video_bytes)
106+
107+
return f"video_{video_id}.mp4"
108+
109+
# Usage
110+
video_file = generate_and_download_video(
111+
"A cat playing with a ball of yarn in a sunny garden"
112+
)
113+
```
114+
115+
## Video Editing with Reference Images
116+
117+
```python
118+
# Video editing with reference image
119+
response = litellm.video_generation(
120+
prompt="Make the cat jump higher",
121+
input_reference="path/to/image.jpg", # Reference image
122+
model="sora-2",
123+
seconds="8"
124+
)
125+
126+
print(f"Video ID: {response.id}")
127+
```
128+
129+
## Error Handling
130+
131+
```python
132+
from litellm.exceptions import BadRequestError, AuthenticationError
133+
134+
try:
135+
response = video_generation(
136+
prompt="A cat playing with a ball of yarn",
137+
model="sora-2"
138+
)
139+
except AuthenticationError as e:
140+
print(f"Authentication failed: {e}")
141+
except BadRequestError as e:
142+
print(f"Bad request: {e}")
143+
```

litellm/__init__.py

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -104,7 +104,7 @@
104104
# Register async client cleanup to prevent resource leaks
105105
register_async_client_cleanup()
106106
####################################################
107-
if set_verbose == True:
107+
if set_verbose:
108108
_turn_on_debug()
109109
####################################################
110110
### Callbacks /Logging / Success / Failure Handlers #####
@@ -980,6 +980,9 @@ def add_known_models():
980980
####### IMAGE GENERATION MODELS ###################
981981
openai_image_generation_models = ["dall-e-2", "dall-e-3"]
982982

983+
####### VIDEO GENERATION MODELS ###################
984+
openai_video_generation_models = ["sora-2"]
985+
983986
from .timeout import timeout
984987
from .cost_calculator import completion_cost
985988
from litellm.litellm_core_utils.litellm_logging import Logging, modify_integration
@@ -1209,7 +1212,6 @@ def add_known_models():
12091212
OpenAIOSeriesConfig,
12101213
)
12111214

1212-
from .llms.snowflake.chat.transformation import SnowflakeConfig
12131215
from .llms.gradient_ai.chat.transformation import GradientAIConfig
12141216

12151217
openaiOSeriesConfig = OpenAIOSeriesConfig()
@@ -1245,7 +1247,6 @@ def add_known_models():
12451247
from .llms.baseten.chat import BasetenConfig
12461248
from .llms.sambanova.chat import SambanovaConfig
12471249
from .llms.sambanova.embedding.transformation import SambaNovaEmbeddingConfig
1248-
from .llms.ai21.chat.transformation import AI21ChatConfig
12491250
from .llms.fireworks_ai.chat.transformation import FireworksAIConfig
12501251
from .llms.fireworks_ai.completion.transformation import FireworksAITextCompletionConfig
12511252
from .llms.fireworks_ai.audio_transcription.transformation import (
@@ -1332,6 +1333,7 @@ def add_known_models():
13321333
from .assistants.main import *
13331334
from .batches.main import *
13341335
from .images.main import *
1336+
from .videos.main import *
13351337
from .batch_completion.main import * # type: ignore
13361338
from .rerank_api.main import *
13371339
from .llms.anthropic.experimental_pass_through.messages.handler import *

litellm/constants.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -265,6 +265,7 @@
265265
"high": 10,
266266
}
267267
DEFAULT_IMAGE_ENDPOINT_MODEL = "dall-e-2"
268+
DEFAULT_VIDEO_ENDPOINT_MODEL = "sora-2"
268269

269270
LITELLM_CHAT_PROVIDERS = [
270271
"openai",

litellm/cost_calculator.py

Lines changed: 97 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -877,6 +877,29 @@ def completion_cost( # noqa: PLR0915
877877
size=size,
878878
optional_params=optional_params,
879879
)
880+
elif (
881+
call_type == CallTypes.create_video.value
882+
or call_type == CallTypes.acreate_video.value
883+
):
884+
### VIDEO GENERATION COST CALCULATION ###
885+
if completion_response is not None and hasattr(completion_response, 'usage'):
886+
usage_obj = completion_response.usage
887+
duration_seconds = usage_obj.get('duration_seconds')
888+
889+
if duration_seconds is not None:
890+
# Calculate cost based on video duration using video-specific cost calculation
891+
from litellm.llms.openai.cost_calculation import video_generation_cost
892+
return video_generation_cost(
893+
model=model,
894+
duration_seconds=duration_seconds,
895+
custom_llm_provider=custom_llm_provider
896+
)
897+
# Fallback to default video cost calculation if no duration available
898+
return default_video_cost_calculator(
899+
model=model,
900+
duration_seconds=0.0, # Default to 0 if no duration available
901+
custom_llm_provider=custom_llm_provider
902+
)
880903
elif (
881904
call_type == CallTypes.speech.value
882905
or call_type == CallTypes.aspeech.value
@@ -1344,6 +1367,80 @@ def default_image_cost_calculator(
13441367
return cost_info["input_cost_per_pixel"] * height * width * n
13451368

13461369

1370+
def default_video_cost_calculator(
1371+
model: str,
1372+
duration_seconds: float,
1373+
custom_llm_provider: Optional[str] = None,
1374+
) -> float:
1375+
"""
1376+
Default video cost calculator for video generation
1377+
1378+
Args:
1379+
model (str): Model name
1380+
duration_seconds (float): Duration of the generated video in seconds
1381+
custom_llm_provider (Optional[str]): Custom LLM provider
1382+
1383+
Returns:
1384+
float: Cost in USD for the video generation
1385+
1386+
Raises:
1387+
Exception: If model pricing not found in cost map
1388+
"""
1389+
# Build model names for cost lookup
1390+
base_model_name = model
1391+
model_name_without_custom_llm_provider: Optional[str] = None
1392+
if custom_llm_provider and model.startswith(f"{custom_llm_provider}/"):
1393+
model_name_without_custom_llm_provider = model.replace(
1394+
f"{custom_llm_provider}/", ""
1395+
)
1396+
base_model_name = f"{custom_llm_provider}/{model_name_without_custom_llm_provider}"
1397+
1398+
verbose_logger.debug(
1399+
f"Looking up cost for video model: {base_model_name}"
1400+
)
1401+
1402+
model_without_provider = model.split('/')[-1]
1403+
1404+
# Try model with provider first, fall back to base model name
1405+
cost_info: Optional[dict] = None
1406+
models_to_check: List[Optional[str]] = [
1407+
base_model_name,
1408+
model,
1409+
model_without_provider,
1410+
model_name_without_custom_llm_provider,
1411+
]
1412+
for _model in models_to_check:
1413+
if _model is not None and _model in litellm.model_cost:
1414+
cost_info = litellm.model_cost[_model]
1415+
break
1416+
1417+
# If still not found, try with custom_llm_provider prefix
1418+
if cost_info is None and custom_llm_provider:
1419+
prefixed_model = f"{custom_llm_provider}/{model}"
1420+
if prefixed_model in litellm.model_cost:
1421+
cost_info = litellm.model_cost[prefixed_model]
1422+
if cost_info is None:
1423+
raise Exception(
1424+
f"Model not found in cost map. Tried checking {models_to_check}"
1425+
)
1426+
1427+
# Check for video-specific cost per second first
1428+
video_cost_per_second = cost_info.get("output_cost_per_video_per_second")
1429+
if video_cost_per_second is not None:
1430+
return video_cost_per_second * duration_seconds
1431+
1432+
# Fallback to general output cost per second
1433+
output_cost_per_second = cost_info.get("output_cost_per_second")
1434+
if output_cost_per_second is not None:
1435+
return output_cost_per_second * duration_seconds
1436+
1437+
# If no cost information found, return 0
1438+
verbose_logger.info(
1439+
f"No cost information found for video model {model}. Please add pricing to model_prices_and_context_window.json"
1440+
)
1441+
return 0.0
1442+
1443+
13471444
def batch_cost_calculator(
13481445
usage: Usage,
13491446
model: str,

litellm/litellm_core_utils/get_llm_provider_logic.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -279,6 +279,7 @@ def get_llm_provider( # noqa: PLR0915
279279
or "ft:gpt-3.5-turbo" in model
280280
or "ft:gpt-4" in model # catches ft:gpt-4-0613, ft:gpt-4o
281281
or model in litellm.openai_image_generation_models
282+
or model in litellm.openai_video_generation_models
282283
):
283284
custom_llm_provider = "openai"
284285
elif model in litellm.open_ai_text_completion_models:

0 commit comments

Comments
 (0)