meta-pytorch
diff --git a/‎.github/workflows/linux_wheel.yaml‎
Lines changed: 6 additions & 2 deletions b/‎.github/workflows/linux_wheel.yaml‎
Lines changed: 6 additions & 2 deletions
diff --git a/‎.github/workflows/reference_resources.yaml‎
Lines changed: 44 additions & 2 deletions b/‎.github/workflows/reference_resources.yaml‎
Lines changed: 44 additions & 2 deletions
diff --git a/‎.github/workflows/windows_wheel.yaml‎
Lines changed: 6 additions & 3 deletions b/‎.github/workflows/windows_wheel.yaml‎
Lines changed: 6 additions & 3 deletions
diff --git a/‎README.md‎
Lines changed: 5 additions & 3 deletions b/‎README.md‎
Lines changed: 5 additions & 3 deletions
diff --git a/‎benchmarks/decoders/benchmark_transforms.py‎
Lines changed: 164 additions & 0 deletions b/‎benchmarks/decoders/benchmark_transforms.py‎
Lines changed: 164 additions & 0 deletions
diff --git a/‎docs/source/index.rst‎
Lines changed: 1 addition & 1 deletion b/‎docs/source/index.rst‎
Lines changed: 1 addition & 1 deletion
@@ -72,10 +72,14 @@ jobs:
           name: meta-pytorch_torchcodec__${{ matrix.python-version }}_cpu_x86_64
           path: pytorch/torchcodec/dist/
       - name: Setup conda env
-        uses: conda-incubator/setup-miniconda@v2
+        uses: conda-incubator/setup-miniconda@v3
         with:
           auto-update-conda: true
-          miniconda-version: "latest"
+          # Using miniforge instead of miniconda ensures that the default
+          # conda channel is conda-forge instead of main/default. This ensures
+          # ABI consistency between dependencies:
+          # https://conda-forge.org/docs/user/transitioning_from_defaults/
+          miniforge-version: latest
           activate-environment: test
           python-version: ${{ matrix.python-version }}
       - name: Update pip
 
@@ -14,14 +14,51 @@ defaults:
     shell: bash -l -eo pipefail {0}
 
 jobs:
+  generate-matrix:
+    uses: pytorch/test-infra/.github/workflows/generate_binary_build_matrix.yml@main
+    with:
+      package-type: wheel
+      os: linux
+      test-infra-repository: pytorch/test-infra
+      test-infra-ref: main
+      with-xpu: disable
+      with-rocm: disable
+      with-cuda: disable
+      build-python-only: "disable"
+
+  build:
+    needs: generate-matrix
+    strategy:
+      fail-fast: false
+    name: Build and Upload Linux wheel
+    uses: pytorch/test-infra/.github/workflows/build_wheels_linux.yml@main
+    with:
+      repository: meta-pytorch/torchcodec
+      ref: ""
+      test-infra-repository: pytorch/test-infra
+      test-infra-ref: main
+      build-matrix: ${{ needs.generate-matrix.outputs.matrix }}
+      pre-script: packaging/pre_build_script.sh
+      post-script: packaging/post_build_script.sh
+      smoke-test-script: packaging/fake_smoke_test.py
+      package-name: torchcodec
+      trigger-event: ${{ github.event_name }}
+      build-platform: "python-build-package"
+      build-command: "BUILD_AGAINST_ALL_FFMPEG_FROM_S3=1 python -m build --wheel -vvv --no-isolation"
+
   test-reference-resource-generation:
+    needs: build
     runs-on: ubuntu-latest
     strategy:
       fail-fast: false
       matrix:
         python-version: ['3.10']
         ffmpeg-version-for-tests: ['4.4.2', '5.1.2', '6.1.1', '7.0.1']
     steps:
+      - uses: actions/download-artifact@v4
+        with:
+          name: meta-pytorch_torchcodec__${{ matrix.python-version }}_cpu_x86_64
+          path: pytorch/torchcodec/dist/
       - name: Setup conda env
         uses: conda-incubator/setup-miniconda@v2
         with:
@@ -43,11 +80,16 @@ jobs:
           # Note that we're installing stable - this is for running a script where we're a normal PyTorch
           # user, not for building TorhCodec.
           python -m pip install torch --index-url https://download.pytorch.org/whl/cpu
-          python -m pip install numpy pillow
+          python -m pip install numpy pillow pytest
 
+      - name: Install torchcodec from the wheel
+        run: |
+          wheel_path=`find pytorch/torchcodec/dist -type f -name "*.whl"`
+          echo Installing $wheel_path
+          python -m pip install $wheel_path -vvv
       - name: Check out repo
         uses: actions/checkout@v3
 
       - name: Run generation reference resources
         run: |
-          python test/generate_reference_resources.py
+          python -m test.generate_reference_resources
@@ -71,8 +71,7 @@ jobs:
         # TODO: FFmpeg 5 on Windows segfaults in avcodec_open2() when passing
         # bad parameters.
         # See https://github.com/pytorch/torchcodec/pull/806
-        # TODO: Support FFmpeg 8 on Windows
-        ffmpeg-version-for-tests: ['4.4.2', '5.1.2', '6.1.1', '7.0.1']
+        ffmpeg-version-for-tests: ['4.4.2', '5.1.2', '6.1.1', '7.0.1', '8.0']
     needs: build
     steps:
       - uses: actions/download-artifact@v4
@@ -83,7 +82,11 @@ jobs:
         uses: conda-incubator/setup-miniconda@v2
         with:
           auto-update-conda: true
-          miniconda-version: "latest"
+          # Using miniforge instead of miniconda ensures that the default
+          # conda channel is conda-forge instead of main/default. This ensures
+          # ABI consistency between dependencies:
+          # https://conda-forge.org/docs/user/transitioning_from_defaults/
+          miniforge-version: latest
           activate-environment: test
           python-version: ${{ matrix.python-version }}
       - name: Update pip
 
@@ -107,8 +107,8 @@ ffmpeg -f lavfi -i \
    `torch` and `torchcodec`.
 
 2. Install FFmpeg, if it's not already installed. Linux distributions usually
-   come with FFmpeg pre-installed. TorchCodec supports all major FFmpeg versions
-   in [4, 7].
+   come with FFmpeg pre-installed. TorchCodec supports major FFmpeg versions
+   in [4, 7] on all platforms, and FFmpeg version 8 is supported on Mac and Linux.
 
    If FFmpeg is not already installed, or you need a more recent version, an
    easy way to install it is to use `conda`:
@@ -131,6 +131,7 @@ The following table indicates the compatibility between versions of
 | `torchcodec`       | `torch`            | Python              |
 | ------------------ | ------------------ | ------------------- |
 | `main` / `nightly` | `main` / `nightly` | `>=3.10`, `<=3.13`   |
+| `0.8`              | `2.9`              | `>=3.10`, `<=3.13`   |
 | `0.7`              | `2.8`              | `>=3.9`, `<=3.13`   |
 | `0.6`              | `2.8`              | `>=3.9`, `<=3.13`   |
 | `0.5`              | `2.7`              | `>=3.9`, `<=3.13`   |
@@ -147,7 +148,8 @@ format you want. Refer to Nvidia's GPU support matrix for more details
 [here](https://developer.nvidia.com/video-encode-and-decode-gpu-support-matrix-new).
 
 1. Install FFmpeg with NVDEC support.
-   TorchCodec with CUDA should work with FFmpeg versions in [4, 7].
+   TorchCodec with CUDA should work with FFmpeg versions in [4, 7] on all platforms,
+   and FFmpeg version 8 is supported on Linux.
 
    If FFmpeg is not already installed, or you need a more recent version, an
    easy way to install it is to use `conda`:
 
@@ -0,0 +1,164 @@
+import math
+from argparse import ArgumentParser
+from pathlib import Path
+from time import perf_counter_ns
+
+import torch
+from torch import Tensor
+from torchcodec._core import add_video_stream, create_from_file, get_frames_by_pts
+from torchcodec.decoders import VideoDecoder
+from torchvision.transforms import v2
+
+DEFAULT_NUM_EXP = 20
+
+
+def bench(f, *args, num_exp=DEFAULT_NUM_EXP, warmup=1) -> Tensor:
+
+    for _ in range(warmup):
+        f(*args)
+
+    times = []
+    for _ in range(num_exp):
+        start = perf_counter_ns()
+        f(*args)
+        end = perf_counter_ns()
+        times.append(end - start)
+    return torch.tensor(times).float()
+
+
+def report_stats(times: Tensor, unit: str = "ms", prefix: str = "") -> float:
+    mul = {
+        "ns": 1,
+        "µs": 1e-3,
+        "ms": 1e-6,
+        "s": 1e-9,
+    }[unit]
+    times = times * mul
+    std = times.std().item()
+    med = times.median().item()
+    mean = times.mean().item()
+    min = times.min().item()
+    max = times.max().item()
+    print(
+        f"{prefix:<45} {med = :.2f}, {mean = :.2f} +- {std:.2f}, {min = :.2f}, {max = :.2f} - in {unit}"
+    )
+
+
+def torchvision_resize(
+    path: Path, pts_seconds: list[float], dims: tuple[int, int]
+) -> None:
+    decoder = create_from_file(str(path), seek_mode="approximate")
+    add_video_stream(decoder)
+    raw_frames, *_ = get_frames_by_pts(decoder, timestamps=pts_seconds)
+    return v2.functional.resize(raw_frames, size=dims)
+
+
+def torchvision_crop(
+    path: Path, pts_seconds: list[float], dims: tuple[int, int], x: int, y: int
+) -> None:
+    decoder = create_from_file(str(path), seek_mode="approximate")
+    add_video_stream(decoder)
+    raw_frames, *_ = get_frames_by_pts(decoder, timestamps=pts_seconds)
+    return v2.functional.crop(raw_frames, top=y, left=x, height=dims[0], width=dims[1])
+
+
+def decoder_native_resize(
+    path: Path, pts_seconds: list[float], dims: tuple[int, int]
+) -> None:
+    decoder = create_from_file(str(path), seek_mode="approximate")
+    add_video_stream(decoder, transform_specs=f"resize, {dims[0]}, {dims[1]}")
+    return get_frames_by_pts(decoder, timestamps=pts_seconds)[0]
+
+
+def decoder_native_crop(
+    path: Path, pts_seconds: list[float], dims: tuple[int, int], x: int, y: int
+) -> None:
+    decoder = create_from_file(str(path), seek_mode="approximate")
+    add_video_stream(decoder, transform_specs=f"crop, {dims[0]}, {dims[1]}, {x}, {y}")
+    return get_frames_by_pts(decoder, timestamps=pts_seconds)[0]
+
+
+def main():
+    parser = ArgumentParser()
+    parser.add_argument("--path", type=str, help="path to file", required=True)
+    parser.add_argument(
+        "--num-exp",
+        type=int,
+        default=DEFAULT_NUM_EXP,
+        help="number of runs to average over",
+    )
+
+    args = parser.parse_args()
+    path = Path(args.path)
+
+    metadata = VideoDecoder(path).metadata
+    duration = metadata.duration_seconds
+
+    print(
+        f"Benchmarking {path.name}, duration: {duration}, codec: {metadata.codec}, averaging over {args.num_exp} runs:"
+    )
+
+    input_height = metadata.height
+    input_width = metadata.width
+    fraction_of_total_frames_to_sample = [0.005, 0.01, 0.05, 0.1]
+    fraction_of_input_dimensions = [0.5, 0.25, 0.125]
+
+    for num_fraction in fraction_of_total_frames_to_sample:
+        num_frames_to_sample = math.ceil(metadata.num_frames * num_fraction)
+        print(
+            f"Sampling {num_fraction * 100}%, {num_frames_to_sample}, of {metadata.num_frames} frames"
+        )
+        uniform_timestamps = [
+            i * duration / num_frames_to_sample for i in range(num_frames_to_sample)
+        ]
+
+        for dims_fraction in fraction_of_input_dimensions:
+            dims = (int(input_height * dims_fraction), int(input_width * dims_fraction))
+
+            times = bench(
+                torchvision_resize, path, uniform_timestamps, dims, num_exp=args.num_exp
+            )
+            report_stats(times, prefix=f"torchvision_resize({dims})")
+
+            times = bench(
+                decoder_native_resize,
+                path,
+                uniform_timestamps,
+                dims,
+                num_exp=args.num_exp,
+            )
+            report_stats(times, prefix=f"decoder_native_resize({dims})")
+            print()
+
+            center_x = (input_height - dims[0]) // 2
+            center_y = (input_width - dims[1]) // 2
+            times = bench(
+                torchvision_crop,
+                path,
+                uniform_timestamps,
+                dims,
+                center_x,
+                center_y,
+                num_exp=args.num_exp,
+            )
+            report_stats(
+                times, prefix=f"torchvision_crop({dims}, {center_x}, {center_y})"
+            )
+
+            times = bench(
+                decoder_native_crop,
+                path,
+                uniform_timestamps,
+                dims,
+                center_x,
+                center_y,
+                num_exp=args.num_exp,
+            )
+            report_stats(
+                times, prefix=f"decoder_native_crop({dims}, {center_x}, {center_y})"
+            )
+            print()
+
+
+if __name__ == "__main__":
+    main()
@@ -11,7 +11,7 @@ We achieve these capabilities through:
 
 * Pythonic APIs that mirror Python and PyTorch conventions.
 * Relying on `FFmpeg <https://www.ffmpeg.org/>`_ to do the decoding / encoding.
-  TorchCodec uses the version of FFmpeg you already have installed. FMPEG is a
+  TorchCodec uses the version of FFmpeg you already have installed. FFmpeg is a
   mature library with broad coverage available on most systems. It is, however,
   not easy to use.  TorchCodec abstracts FFmpeg's complexity to ensure it is
   used correctly and efficiently.