Skip to content

Commit 1d03c45

Browse files
committed
feat: add .ts_ignore pattern ignoring system
1 parent da18bd0 commit 1d03c45

File tree

9 files changed

+245
-53
lines changed

9 files changed

+245
-53
lines changed

src/tagstudio/core/constants.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,7 @@
99
TS_FOLDER_NAME: str = ".TagStudio"
1010
BACKUP_FOLDER_NAME: str = "backups"
1111
COLLAGE_FOLDER_NAME: str = "collages"
12+
IGNORE_NAME: str = ".ts_ignore"
1213
THUMB_CACHE_NAME: str = "thumbs"
1314

1415
FONT_SAMPLE_TEXT: str = (

src/tagstudio/core/library/alchemy/library.py

Lines changed: 31 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -83,6 +83,7 @@
8383
ValueType,
8484
)
8585
from tagstudio.core.library.alchemy.visitors import SQLBoolExpressionBuilder
86+
from tagstudio.core.library.ignore import Ignore
8687
from tagstudio.core.library.json.library import Library as JsonLibrary
8788
from tagstudio.qt.translations import Translations
8889

@@ -92,6 +93,7 @@
9293

9394
logger = structlog.get_logger(__name__)
9495

96+
9597
TAG_CHILDREN_QUERY = text("""
9698
-- Note for this entire query that tag_parents.child_id is the parent id and tag_parents.parent_id is the child id due to bad naming
9799
WITH RECURSIVE ChildTags AS (
@@ -866,6 +868,7 @@ def search_library(
866868
"""
867869
assert isinstance(search, BrowsingState)
868870
assert self.engine
871+
assert self.library_dir
869872

870873
with Session(self.engine, expire_on_commit=False) as session:
871874
statement = select(Entry)
@@ -878,6 +881,7 @@ def search_library(
878881
f"SQL Expression Builder finished ({format_timespan(end_time - start_time)})"
879882
)
880883

884+
# TODO: Convert old extension lists to new .ts_ignore format
881885
extensions = self.prefs(LibraryPrefs.EXTENSION_LIST)
882886
is_exclude_list = self.prefs(LibraryPrefs.IS_EXCLUDE_LIST)
883887

@@ -887,11 +891,37 @@ def search_library(
887891
statement = statement.where(Entry.suffix.in_(extensions))
888892

889893
statement = statement.distinct(Entry.id)
894+
ignore_patterns: list[str] = Ignore.get_patterns(self.library_dir)
895+
896+
# Add glob pattern filters with exclusion patterns allowing for overrides.
897+
statement = statement.filter(
898+
and_(
899+
or_(
900+
or_(
901+
*[
902+
Entry.path.op("GLOB")(p.lstrip("!"))
903+
for p in ignore_patterns
904+
if p.startswith("!")
905+
]
906+
),
907+
and_(
908+
*[
909+
Entry.path.op("NOT GLOB")(p)
910+
for p in ignore_patterns
911+
if not p.startswith("!")
912+
]
913+
),
914+
)
915+
)
916+
)
917+
918+
# TODO: This query will become unnecessary once this method returns unlimited IDs and
919+
# the it becomes the frontend's responsibility (once again) to split and display them.
890920
start_time = time.time()
891921
query_count = select(func.count()).select_from(statement.alias("entries"))
892922
count_all: int = session.execute(query_count).scalar() or 0
893923
end_time = time.time()
894-
logger.info(f"finished counting ({format_timespan(end_time - start_time)})")
924+
logger.info(f"[Library] Finished counting ({format_timespan(end_time - start_time)})")
895925

896926
sort_on: ColumnExpressionArgument = Entry.id
897927
match search.sorting_mode:

src/tagstudio/core/library/ignore.py

Lines changed: 148 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,148 @@
1+
# Copyright (C) 2025 Travis Abendshien (CyanVoxel).
2+
# Licensed under the GPL-3.0 License.
3+
# Created for TagStudio: https://github.com/CyanVoxel/TagStudio
4+
5+
from copy import deepcopy
6+
from pathlib import Path
7+
8+
import structlog
9+
from wcmatch import glob, pathlib
10+
11+
from tagstudio.core.constants import IGNORE_NAME, TS_FOLDER_NAME
12+
from tagstudio.core.singleton import Singleton
13+
14+
logger = structlog.get_logger()
15+
16+
PATH_GLOB_FLAGS = glob.GLOBSTARLONG | glob.DOTGLOB | glob.NEGATE | pathlib.MATCHBASE
17+
18+
19+
def _ignore_to_glob(ignore_patterns: list[str]) -> list[str]:
20+
"""Convert .gitignore-like patterns to explicit glob syntax.
21+
22+
Args:
23+
ignore_patterns (list[str]): The .gitignore-like patterns to convert.
24+
"""
25+
glob_patterns: list[str] = deepcopy(ignore_patterns)
26+
additional_patterns: list[str] = []
27+
28+
# Mimic implicit .gitignore syntax behavior for the SQLite GLOB function.
29+
for pattern in glob_patterns:
30+
# Temporarily remove any exclusion character before processing
31+
exclusion_char = ""
32+
gp = pattern
33+
if pattern.startswith("!"):
34+
gp = pattern[1:]
35+
exclusion_char = "!"
36+
37+
if not gp.startswith("**/") and not gp.startswith("*/") and not gp.startswith("/"):
38+
# Create a version of a prefix-less pattern that starts with "**/"
39+
gp = "**/" + gp
40+
additional_patterns.append(exclusion_char + gp)
41+
42+
gp = gp.removesuffix("/**").removesuffix("/*").removesuffix("/")
43+
additional_patterns.append(exclusion_char + gp)
44+
45+
gp = gp.removeprefix("**/").removeprefix("*/")
46+
additional_patterns.append(exclusion_char + gp)
47+
48+
glob_patterns = glob_patterns + additional_patterns
49+
50+
# Add "/**" suffix to suffix-less patterns to match implicit .gitignore behavior.
51+
for pattern in glob_patterns:
52+
if pattern.endswith("/**"):
53+
continue
54+
55+
glob_patterns.append(pattern.removesuffix("/*").removesuffix("/") + "/**")
56+
57+
glob_patterns = list(set(glob_patterns))
58+
59+
logger.info("[Ignore]", glob_patterns=glob_patterns)
60+
return glob_patterns
61+
62+
63+
GLOBAL_IGNORE = _ignore_to_glob(
64+
[
65+
# TagStudio -------------------
66+
f"{TS_FOLDER_NAME}",
67+
# System Trashes --------------
68+
".Trash",
69+
".Trash-*",
70+
".Trashes",
71+
"$RECYCLE.BIN",
72+
# macOS Generated -------------
73+
".DS_Store",
74+
".fseventsd",
75+
".Spotlight-V100",
76+
"._*",
77+
"System Volume Information",
78+
]
79+
)
80+
81+
82+
class Ignore(metaclass=Singleton):
83+
"""Class for processing and managing glob-like file ignore file patterns."""
84+
85+
_last_loaded: tuple[Path, float] | None = None
86+
_patterns: list[str] = []
87+
88+
@staticmethod
89+
def get_patterns(library_dir: Path, include_global: bool = True) -> list[str]:
90+
"""Get the ignore patterns for the given library directory.
91+
92+
Args:
93+
library_dir (Path): The path of the library to load patterns from.
94+
include_global (bool): Flag for including the global ignore set.
95+
In most scenarios, this should be True.
96+
"""
97+
patterns = GLOBAL_IGNORE if include_global else []
98+
ts_ignore_path = Path(library_dir / TS_FOLDER_NAME / IGNORE_NAME)
99+
100+
if not ts_ignore_path.exists():
101+
logger.info(
102+
"[Ignore] No .ts_ignore file found",
103+
path=ts_ignore_path,
104+
)
105+
Ignore._last_loaded = None
106+
Ignore._patterns = patterns
107+
108+
return Ignore._patterns
109+
110+
# Process the .ts_ignore file if the previous result is non-existent or outdated.
111+
loaded = (ts_ignore_path, ts_ignore_path.stat().st_mtime)
112+
if not Ignore._last_loaded or (Ignore._last_loaded and Ignore._last_loaded != loaded):
113+
logger.info(
114+
"[Ignore] Processing the .ts_ignore file...",
115+
library=library_dir,
116+
last_mtime=Ignore._last_loaded[1] if Ignore._last_loaded else None,
117+
new_mtime=loaded[1],
118+
)
119+
Ignore._patterns = _ignore_to_glob(patterns + Ignore._load_ignore_file(ts_ignore_path))
120+
else:
121+
logger.info(
122+
"[Ignore] No updates to the .ts_ignore detected",
123+
library=library_dir,
124+
last_mtime=Ignore._last_loaded[1],
125+
new_mtime=loaded[1],
126+
)
127+
Ignore._last_loaded = loaded
128+
129+
return Ignore._patterns
130+
131+
@staticmethod
132+
def _load_ignore_file(path: Path) -> list[str]:
133+
"""Load and process the .ts_ignore file into a list of glob patterns.
134+
135+
Args:
136+
path (Path): The path of the .ts_ignore file.
137+
"""
138+
patterns: list[str] = []
139+
if path.exists():
140+
with open(path, encoding="utf8") as f:
141+
for line_raw in f.readlines():
142+
line = line_raw.strip()
143+
# Ignore blank lines and comments
144+
if not line or line.startswith("#"):
145+
continue
146+
patterns.append(line)
147+
148+
return patterns

src/tagstudio/core/utils/missing_files.py

Lines changed: 13 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -3,10 +3,11 @@
33
from pathlib import Path
44

55
import structlog
6+
from wcmatch import pathlib
67

78
from tagstudio.core.library.alchemy.library import Library
89
from tagstudio.core.library.alchemy.models import Entry
9-
from tagstudio.core.utils.refresh_dir import GLOBAL_IGNORE_SET
10+
from tagstudio.core.library.ignore import PATH_GLOB_FLAGS, Ignore
1011

1112
logger = structlog.get_logger()
1213

@@ -25,7 +26,9 @@ def missing_file_entries_count(self) -> int:
2526

2627
def refresh_missing_files(self) -> Iterator[int]:
2728
"""Track the number of entries that point to an invalid filepath."""
29+
assert self.library.library_dir
2830
logger.info("[refresh_missing_files] Refreshing missing files...")
31+
2932
self.missing_file_entries = []
3033
for i, entry in enumerate(self.library.get_entries()):
3134
full_path = self.library.library_dir / entry.path
@@ -38,16 +41,15 @@ def match_missing_file_entry(self, match_entry: Entry) -> list[Path]:
3841
3942
Works if files were just moved to different subfolders and don't have duplicate names.
4043
"""
41-
matches = []
42-
for path in self.library.library_dir.glob(f"**/{match_entry.path.name}"):
43-
# Ensure matched file isn't in a globally ignored folder
44-
skip: bool = False
45-
for part in path.parts:
46-
if part in GLOBAL_IGNORE_SET:
47-
skip = True
48-
break
49-
if skip:
50-
continue
44+
assert self.library.library_dir
45+
matches: list[Path] = []
46+
47+
ignore_patterns = Ignore.get_patterns(self.library.library_dir)
48+
for path in pathlib.Path(str(self.library.library_dir)).glob(
49+
f"***/{match_entry.path.name}",
50+
flags=PATH_GLOB_FLAGS,
51+
exclude=ignore_patterns,
52+
):
5153
if path.name == match_entry.path.name:
5254
new_path = Path(path).relative_to(self.library.library_dir)
5355
matches.append(new_path)

src/tagstudio/core/utils/refresh_dir.py

Lines changed: 14 additions & 32 deletions
Original file line numberDiff line numberDiff line change
@@ -5,27 +5,14 @@
55
from time import time
66

77
import structlog
8+
from wcmatch import pathlib
89

9-
from tagstudio.core.constants import TS_FOLDER_NAME
1010
from tagstudio.core.library.alchemy.library import Library
1111
from tagstudio.core.library.alchemy.models import Entry
12+
from tagstudio.core.library.ignore import PATH_GLOB_FLAGS, Ignore
1213

1314
logger = structlog.get_logger(__name__)
1415

15-
GLOBAL_IGNORE_SET: set[str] = set(
16-
[
17-
TS_FOLDER_NAME,
18-
"$RECYCLE.BIN",
19-
".Trashes",
20-
".Trash",
21-
"tagstudio_thumbs",
22-
".fseventsd",
23-
".Spotlight-V100",
24-
"System Volume Information",
25-
".DS_Store",
26-
]
27-
)
28-
2916

3017
@dataclass
3118
class RefreshDirTracker:
@@ -42,7 +29,7 @@ def save_new_files(self):
4229
entries = [
4330
Entry(
4431
path=entry_path,
45-
folder=self.library.folder,
32+
folder=self.library.folder, # pyright: ignore[reportArgumentType]
4633
fields=[],
4734
date_added=dt.now(),
4835
)
@@ -54,7 +41,7 @@ def save_new_files(self):
5441

5542
yield
5643

57-
def refresh_dir(self, lib_path: Path) -> Iterator[int]:
44+
def refresh_dir(self, library_dir: Path) -> Iterator[int]:
5845
"""Scan a directory for files, and add those relative filenames to internal variables."""
5946
if self.library.library_dir is None:
6047
raise ValueError("No library directory set.")
@@ -65,13 +52,19 @@ def refresh_dir(self, lib_path: Path) -> Iterator[int]:
6552
self.files_not_in_library = []
6653
dir_file_count = 0
6754

68-
for f in lib_path.glob("**/*"):
55+
ignore_patterns = Ignore.get_patterns(library_dir)
56+
logger.info(ignore_patterns)
57+
for f in pathlib.Path(str(library_dir)).glob(
58+
"***/*", flags=PATH_GLOB_FLAGS, exclude=ignore_patterns
59+
):
6960
end_time_loop = time()
7061
# Yield output every 1/30 of a second
7162
if (end_time_loop - start_time_loop) > 0.034:
7263
yield dir_file_count
7364
start_time_loop = time()
7465

66+
logger.info(f)
67+
7568
# Skip if the file/path is already mapped in the Library
7669
if f in self.library.included_files:
7770
dir_file_count += 1
@@ -81,21 +74,10 @@ def refresh_dir(self, lib_path: Path) -> Iterator[int]:
8174
if f.is_dir():
8275
continue
8376

84-
# Ensure new file isn't in a globally ignored folder
85-
skip: bool = False
86-
for part in f.parts:
87-
# NOTE: Files starting with "._" are sometimes generated by macOS Finder.
88-
# More info: https://lists.apple.com/archives/applescript-users/2006/Jun/msg00180.html
89-
if part.startswith("._") or part in GLOBAL_IGNORE_SET:
90-
skip = True
91-
break
92-
if skip:
93-
continue
94-
9577
dir_file_count += 1
9678
self.library.included_files.add(f)
9779

98-
relative_path = f.relative_to(lib_path)
80+
relative_path = f.relative_to(library_dir)
9981
# TODO - load these in batch somehow
10082
if not self.library.has_path_entry(relative_path):
10183
self.files_not_in_library.append(relative_path)
@@ -104,8 +86,8 @@ def refresh_dir(self, lib_path: Path) -> Iterator[int]:
10486
yield dir_file_count
10587
logger.info(
10688
"Directory scan time",
107-
path=lib_path,
89+
path=library_dir,
10890
duration=(end_time_total - start_time_total),
109-
files_not_in_lib=self.files_not_in_library,
11091
files_scanned=dir_file_count,
92+
ignore_patterns=ignore_patterns,
11193
)

0 commit comments

Comments
 (0)