Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@ Add this to your `.pre-commit-config.yaml`
#### `check-added-large-files`
Prevent giant files from being committed.
- Specify what is "too large" with `args: ['--maxkb=123']` (default=500kB).
- Optionally exclude glob-like patterns with `args: ['--exclude=uv.lock,examples/*ipynb']`
- Limits checked files to those indicated as staged for addition by git.
- If `git-lfs` is installed, lfs files will be skipped
(requires `git-lfs>=2.2.1`)
Expand Down
13 changes: 12 additions & 1 deletion pre_commit_hooks/check_added_large_files.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
import os
import subprocess
from collections.abc import Sequence
from fnmatch import fnmatch

from pre_commit_hooks.util import added_files
from pre_commit_hooks.util import zsplit
Expand Down Expand Up @@ -34,12 +35,17 @@ def find_large_added_files(
filenames: Sequence[str],
maxkb: int,
*,
exclude: list[str] | None = None,
enforce_all: bool = False,
) -> int:
# Find all added files that are also in the list of files pre-commit tells
# us about
retv = 0
filenames_filtered = set(filenames)
exclude = [] if not exclude else exclude
filenames_filtered = {
fname for fname in filenames
if not any(fnmatch(fname, pat) for pat in exclude)
}
filter_lfs_files(filenames_filtered)

if not enforce_all:
Expand Down Expand Up @@ -68,12 +74,17 @@ def main(argv: Sequence[str] | None = None) -> int:
'--maxkb', type=int, default=500,
help='Maximum allowable KB for added files',
)
parser.add_argument(
'--exclude', type=str, default='',
help='Comma-separated list of glob-style patterns to be excluded',
)
args = parser.parse_args(argv)

return find_large_added_files(
args.filenames,
args.maxkb,
enforce_all=args.enforce_all,
exclude=args.exclude.split(','),
)


Expand Down
17 changes: 17 additions & 0 deletions tests/check_added_large_files_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -43,6 +43,23 @@ def test_add_something_giant(temp_git_dir):
assert find_large_added_files(['f.py'], 10) == 0


def test_use_exclude(temp_git_dir):
with temp_git_dir.as_cwd():
temp_git_dir.join('uv.lock').write('a' * 10_000)
temp_git_dir.join('big.baddie').write('a' * 10_000)

cmd_output('git', 'add', 'uv.lock')
cmd_output('git', 'add', 'big.baddie')

# should fail due to big baddie as thats not excluded
assert find_large_added_files(
['uv.lock', 'big.baddie'], 1, exclude=['*.lock'],
) == 1
# should pass when all files excluded, with both expand and exact match
assert find_large_added_files(['uv.lock'], 1, exclude=['*.lock']) == 0
assert find_large_added_files(['uv.lock'], 1, exclude=['uv.lock']) == 0


def test_enforce_all(temp_git_dir):
with temp_git_dir.as_cwd():
temp_git_dir.join('f.py').write('a' * 10000)
Expand Down