diff --git a/README.md b/README.md index 9ee16774..75e272e3 100644 --- a/README.md +++ b/README.md @@ -26,6 +26,7 @@ Add this to your `.pre-commit-config.yaml` #### `check-added-large-files` Prevent giant files from being committed. - Specify what is "too large" with `args: ['--maxkb=123']` (default=500kB). + - Optionally exclude glob-like patterns with `args: ['--exclude=uv.lock,examples/*ipynb']` - Limits checked files to those indicated as staged for addition by git. - If `git-lfs` is installed, lfs files will be skipped (requires `git-lfs>=2.2.1`) diff --git a/pre_commit_hooks/check_added_large_files.py b/pre_commit_hooks/check_added_large_files.py index e6741623..9fb0be06 100644 --- a/pre_commit_hooks/check_added_large_files.py +++ b/pre_commit_hooks/check_added_large_files.py @@ -5,6 +5,7 @@ import os import subprocess from collections.abc import Sequence +from fnmatch import fnmatch from pre_commit_hooks.util import added_files from pre_commit_hooks.util import zsplit @@ -34,12 +35,17 @@ def find_large_added_files( filenames: Sequence[str], maxkb: int, *, + exclude: list[str] | None = None, enforce_all: bool = False, ) -> int: # Find all added files that are also in the list of files pre-commit tells # us about retv = 0 - filenames_filtered = set(filenames) + exclude = [] if not exclude else exclude + filenames_filtered = { + fname for fname in filenames + if not any(fnmatch(fname, pat) for pat in exclude) + } filter_lfs_files(filenames_filtered) if not enforce_all: @@ -68,12 +74,17 @@ def main(argv: Sequence[str] | None = None) -> int: '--maxkb', type=int, default=500, help='Maximum allowable KB for added files', ) + parser.add_argument( + '--exclude', type=str, default='', + help='Comma-separated list of glob-style patterns to be excluded', + ) args = parser.parse_args(argv) return find_large_added_files( args.filenames, args.maxkb, enforce_all=args.enforce_all, + exclude=args.exclude.split(','), ) diff --git a/tests/check_added_large_files_test.py b/tests/check_added_large_files_test.py index 54c4e689..3c0dbe0d 100644 --- a/tests/check_added_large_files_test.py +++ b/tests/check_added_large_files_test.py @@ -43,6 +43,23 @@ def test_add_something_giant(temp_git_dir): assert find_large_added_files(['f.py'], 10) == 0 +def test_use_exclude(temp_git_dir): + with temp_git_dir.as_cwd(): + temp_git_dir.join('uv.lock').write('a' * 10_000) + temp_git_dir.join('big.baddie').write('a' * 10_000) + + cmd_output('git', 'add', 'uv.lock') + cmd_output('git', 'add', 'big.baddie') + + # should fail due to big baddie as thats not excluded + assert find_large_added_files( + ['uv.lock', 'big.baddie'], 1, exclude=['*.lock'], + ) == 1 + # should pass when all files excluded, with both expand and exact match + assert find_large_added_files(['uv.lock'], 1, exclude=['*.lock']) == 0 + assert find_large_added_files(['uv.lock'], 1, exclude=['uv.lock']) == 0 + + def test_enforce_all(temp_git_dir): with temp_git_dir.as_cwd(): temp_git_dir.join('f.py').write('a' * 10000)