BerriAI · ishaan-jaff · Oct 22, 2025 · Oct 14, 2025 · Oct 14, 2025 · Oct 14, 2025
diff --git a/docs/my-website/docs/providers/openai.md b/docs/my-website/docs/providers/openai.md
@@ -836,4 +836,10 @@ response = completion(
     model="gpt-5-pro", 
     messages=[{"role": "user", "content": "Solve this complex reasoning problem..."}]
 )
-```
+```
+
+## Video Generation
+
+LiteLLM supports OpenAI's video generation models including Sora.
+
+For detailed documentation on video generation, see [OpenAI Video Generation →](./openai/video_generation.md)
diff --git a/docs/my-website/docs/providers/openai/video_generation.md b/docs/my-website/docs/providers/openai/video_generation.md
@@ -0,0 +1,143 @@
+import Tabs from '@theme/Tabs';
+import TabItem from '@theme/TabItem';
+
+# OpenAI Video Generation
+
+LiteLLM supports OpenAI's video generation models including Sora.
+
+## Quick Start
+
+### Required API Keys
+
+```python
+import os 
+os.environ["OPENAI_API_KEY"] = "your-api-key"
+```
+
+### Basic Usage
+
+```python
+from litellm import video_generation, video_retrieval
+import os
+
+os.environ["OPENAI_API_KEY"] = "your-api-key"
+
+# Generate a video
+response = video_generation(
+    prompt="A cat playing with a ball of yarn in a sunny garden",
+    model="sora-2",
+    seconds="8",
+    size="720x1280"
+)
+
+print(f"Video ID: {response.id}")
+print(f"Status: {response.status}")
+
+# Download video content when ready
+video_bytes = video_retrieval(
+    video_id=response.id,
+    model="sora-2"
+)
+
+# Save to file
+with open("generated_video.mp4", "wb") as f:
+    f.write(video_bytes)
+```
+
+## Supported Models
+
+| Model Name | Description | Max Duration | Supported Sizes |
+|------------|-------------|--------------|-----------------|
+| sora-2 | OpenAI's latest video generation model | 8 seconds | 720x1280, 1280x720 |
+
+## Video Generation Parameters
+
+- `prompt` (required): Text description of the desired video
+- `model` (optional): Model to use, defaults to "sora-2"
+- `seconds` (optional): Video duration in seconds (e.g., "8", "16")
+- `size` (optional): Video dimensions (e.g., "720x1280", "1280x720")
+- `input_reference` (optional): Reference image for video editing
+- `user` (optional): User identifier for tracking
+
+## Video Content Retrieval
+
+```python
+# Download video content
+video_bytes = video_retrieval(
+    video_id="video_1234567890",
+    model="sora-2"
+)
+
+# Save to file
+with open("video.mp4", "wb") as f:
+    f.write(video_bytes)
+```
+
+## Complete Workflow
+
+```python
+import litellm
+import time
+
+def generate_and_download_video(prompt):
+    # Step 1: Generate video
+    response = litellm.video_generation(
+        prompt=prompt,
+        model="sora-2",
+        seconds="8",
+        size="720x1280"
+    )
+
+    video_id = response.id
+    print(f"Video ID: {video_id}")
+
+    # Step 2: Wait for processing (in practice, poll status)
+    time.sleep(30)
+
+    # Step 3: Download video
+    video_bytes = litellm.video_retrieval(
+        video_id=video_id,
+        model="sora-2"
+    )
+
+    # Step 4: Save to file
+    with open(f"video_{video_id}.mp4", "wb") as f:
+        f.write(video_bytes)
+
+    return f"video_{video_id}.mp4"
+
+# Usage
+video_file = generate_and_download_video(
+    "A cat playing with a ball of yarn in a sunny garden"
+)
+```
+
+## Video Editing with Reference Images
+
+```python
+# Video editing with reference image
+response = litellm.video_generation(
+    prompt="Make the cat jump higher",
+    input_reference="path/to/image.jpg",  # Reference image
+    model="sora-2",
+    seconds="8"
+)
+
+print(f"Video ID: {response.id}")
+```
+
+## Error Handling
+
+```python
+from litellm.exceptions import BadRequestError, AuthenticationError
+
+try:
+    response = video_generation(
+        prompt="A cat playing with a ball of yarn",
+        model="sora-2"
+    )
+except AuthenticationError as e:
+    print(f"Authentication failed: {e}")
+except BadRequestError as e:
+    print(f"Bad request: {e}")
+```
diff --git a/litellm/__init__.py b/litellm/__init__.py
@@ -104,7 +104,7 @@
 # Register async client cleanup to prevent resource leaks
 register_async_client_cleanup()
 ####################################################
-if set_verbose == True:
+if set_verbose:
     _turn_on_debug()
 ####################################################
 ### Callbacks /Logging / Success / Failure Handlers #####
@@ -980,6 +980,9 @@ def add_known_models():
 ####### IMAGE GENERATION MODELS ###################
 openai_image_generation_models = ["dall-e-2", "dall-e-3"]
 
+####### VIDEO GENERATION MODELS ###################
+openai_video_generation_models = ["sora-2"]
+
 from .timeout import timeout
 from .cost_calculator import completion_cost
 from litellm.litellm_core_utils.litellm_logging import Logging, modify_integration
@@ -1206,7 +1209,6 @@ def add_known_models():
     OpenAIOSeriesConfig,
 )
 
-from .llms.snowflake.chat.transformation import SnowflakeConfig
 from .llms.gradient_ai.chat.transformation import GradientAIConfig
 
 openaiOSeriesConfig = OpenAIOSeriesConfig()
@@ -1242,7 +1244,6 @@ def add_known_models():
 from .llms.baseten.chat import BasetenConfig
 from .llms.sambanova.chat import SambanovaConfig
 from .llms.sambanova.embedding.transformation import SambaNovaEmbeddingConfig
-from .llms.ai21.chat.transformation import AI21ChatConfig
 from .llms.fireworks_ai.chat.transformation import FireworksAIConfig
 from .llms.fireworks_ai.completion.transformation import FireworksAITextCompletionConfig
 from .llms.fireworks_ai.audio_transcription.transformation import (
@@ -1329,6 +1330,7 @@ def add_known_models():
 from .assistants.main import *
 from .batches.main import *
 from .images.main import *
+from .videos.main import *
 from .batch_completion.main import *  # type: ignore
 from .rerank_api.main import *
 from .llms.anthropic.experimental_pass_through.messages.handler import *

diff --git a/litellm/constants.py b/litellm/constants.py
@@ -265,6 +265,7 @@
     "high": 10,
 }
 DEFAULT_IMAGE_ENDPOINT_MODEL = "dall-e-2"
+DEFAULT_VIDEO_ENDPOINT_MODEL = "sora-2"
 
 LITELLM_CHAT_PROVIDERS = [
     "openai",

diff --git a/litellm/cost_calculator.py b/litellm/cost_calculator.py
@@ -877,6 +877,29 @@ def completion_cost(  # noqa: PLR0915
                         size=size,
                         optional_params=optional_params,
                     )
+                elif (
+                    call_type == CallTypes.create_video.value
+                    or call_type == CallTypes.acreate_video.value
+                ):
+                    ### VIDEO GENERATION COST CALCULATION ###
+                    if completion_response is not None and hasattr(completion_response, 'usage'):
+                        usage_obj = completion_response.usage
+                        duration_seconds = usage_obj.get('duration_seconds')
+
+                        if duration_seconds is not None:
+                            # Calculate cost based on video duration using video-specific cost calculation
+                            from litellm.llms.openai.cost_calculation import video_generation_cost
+                            return video_generation_cost(
+                                model=model,
+                                duration_seconds=duration_seconds,
+                                custom_llm_provider=custom_llm_provider
+                            )
+                    # Fallback to default video cost calculation if no duration available
+                    return default_video_cost_calculator(
+                        model=model,
+                        duration_seconds=0.0,  # Default to 0 if no duration available
+                        custom_llm_provider=custom_llm_provider
+                    )
                 elif (
                     call_type == CallTypes.speech.value
                     or call_type == CallTypes.aspeech.value
@@ -1344,6 +1367,80 @@ def default_image_cost_calculator(
     return cost_info["input_cost_per_pixel"] * height * width * n
 
 
+def default_video_cost_calculator(
+    model: str,
+    duration_seconds: float,
+    custom_llm_provider: Optional[str] = None,
+) -> float:
+    """
+    Default video cost calculator for video generation
+
+    Args:
+        model (str): Model name
+        duration_seconds (float): Duration of the generated video in seconds
+        custom_llm_provider (Optional[str]): Custom LLM provider
+
+    Returns:
+        float: Cost in USD for the video generation
+
+    Raises:
+        Exception: If model pricing not found in cost map
+    """
+    # Build model names for cost lookup
+    base_model_name = model
+    model_name_without_custom_llm_provider: Optional[str] = None
+    if custom_llm_provider and model.startswith(f"{custom_llm_provider}/"):
+        model_name_without_custom_llm_provider = model.replace(
+            f"{custom_llm_provider}/", ""
+        )
+        base_model_name = f"{custom_llm_provider}/{model_name_without_custom_llm_provider}"
+
+    verbose_logger.debug(
+        f"Looking up cost for video model: {base_model_name}"
+    )
+
+    model_without_provider = model.split('/')[-1]
+
+    # Try model with provider first, fall back to base model name
+    cost_info: Optional[dict] = None
+    models_to_check: List[Optional[str]] = [
+        base_model_name,
+        model,
+        model_without_provider,
+        model_name_without_custom_llm_provider,
+    ]
+    for _model in models_to_check:
+        if _model is not None and _model in litellm.model_cost:
+            cost_info = litellm.model_cost[_model]
+            break
+
+    # If still not found, try with custom_llm_provider prefix
+    if cost_info is None and custom_llm_provider:
+        prefixed_model = f"{custom_llm_provider}/{model}"
+        if prefixed_model in litellm.model_cost:
+            cost_info = litellm.model_cost[prefixed_model]
+    if cost_info is None:
+        raise Exception(
+            f"Model not found in cost map. Tried checking {models_to_check}"
+        )
+
+    # Check for video-specific cost per second first
+    video_cost_per_second = cost_info.get("output_cost_per_video_per_second")
+    if video_cost_per_second is not None:
+        return video_cost_per_second * duration_seconds
+
+    # Fallback to general output cost per second
+    output_cost_per_second = cost_info.get("output_cost_per_second")
+    if output_cost_per_second is not None:
+        return output_cost_per_second * duration_seconds
+
+    # If no cost information found, return 0
+    verbose_logger.info(
+        f"No cost information found for video model {model}. Please add pricing to model_prices_and_context_window.json"
+    )
+    return 0.0
+
+
 def batch_cost_calculator(
     usage: Usage,
     model: str,

diff --git a/litellm/litellm_core_utils/get_llm_provider_logic.py b/litellm/litellm_core_utils/get_llm_provider_logic.py
@@ -279,6 +279,7 @@ def get_llm_provider(  # noqa: PLR0915
             or "ft:gpt-3.5-turbo" in model
             or "ft:gpt-4" in model  # catches ft:gpt-4-0613, ft:gpt-4o
             or model in litellm.openai_image_generation_models
+            or model in litellm.openai_video_generation_models
         ):
             custom_llm_provider = "openai"
         elif model in litellm.open_ai_text_completion_models: