TagStudioDev
diff --git a/‎src/tagstudio/core/constants.py
Lines changed: 1 addition & 0 deletions b/‎src/tagstudio/core/constants.py
Lines changed: 1 addition & 0 deletions
diff --git a/‎src/tagstudio/core/library/alchemy/library.py
Lines changed: 31 additions & 1 deletion b/‎src/tagstudio/core/library/alchemy/library.py
Lines changed: 31 additions & 1 deletion
diff --git a/‎src/tagstudio/core/library/ignore.py
Lines changed: 148 additions & 0 deletions b/‎src/tagstudio/core/library/ignore.py
Lines changed: 148 additions & 0 deletions
diff --git a/‎src/tagstudio/core/utils/missing_files.py
Lines changed: 13 additions & 11 deletions b/‎src/tagstudio/core/utils/missing_files.py
Lines changed: 13 additions & 11 deletions
diff --git a/‎src/tagstudio/core/utils/refresh_dir.py
Lines changed: 14 additions & 32 deletions b/‎src/tagstudio/core/utils/refresh_dir.py
Lines changed: 14 additions & 32 deletions
@@ -9,6 +9,7 @@
 TS_FOLDER_NAME: str = ".TagStudio"
 BACKUP_FOLDER_NAME: str = "backups"
 COLLAGE_FOLDER_NAME: str = "collages"
+IGNORE_NAME: str = ".ts_ignore"
 THUMB_CACHE_NAME: str = "thumbs"
 
 FONT_SAMPLE_TEXT: str = (
 
@@ -83,6 +83,7 @@
     ValueType,
 )
 from tagstudio.core.library.alchemy.visitors import SQLBoolExpressionBuilder
+from tagstudio.core.library.ignore import Ignore
 from tagstudio.core.library.json.library import Library as JsonLibrary
 from tagstudio.qt.translations import Translations
 
@@ -92,6 +93,7 @@
 
 logger = structlog.get_logger(__name__)
 
+
 TAG_CHILDREN_QUERY = text("""
 -- Note for this entire query that tag_parents.child_id is the parent id and tag_parents.parent_id is the child id due to bad naming
 WITH RECURSIVE ChildTags AS (
@@ -866,6 +868,7 @@ def search_library(
         """
         assert isinstance(search, BrowsingState)
         assert self.engine
+        assert self.library_dir
 
         with Session(self.engine, expire_on_commit=False) as session:
             statement = select(Entry)
@@ -878,6 +881,7 @@ def search_library(
                     f"SQL Expression Builder finished ({format_timespan(end_time - start_time)})"
                 )
 
+            # TODO: Convert old extension lists to new .ts_ignore format
             extensions = self.prefs(LibraryPrefs.EXTENSION_LIST)
             is_exclude_list = self.prefs(LibraryPrefs.IS_EXCLUDE_LIST)
 
@@ -887,11 +891,37 @@ def search_library(
                 statement = statement.where(Entry.suffix.in_(extensions))
 
             statement = statement.distinct(Entry.id)
+            ignore_patterns: list[str] = Ignore.get_patterns(self.library_dir)
+
+            # Add glob pattern filters with exclusion patterns allowing for overrides.
+            statement = statement.filter(
+                and_(
+                    or_(
+                        or_(
+                            *[
+                                Entry.path.op("GLOB")(p.lstrip("!"))
+                                for p in ignore_patterns
+                                if p.startswith("!")
+                            ]
+                        ),
+                        and_(
+                            *[
+                                Entry.path.op("NOT GLOB")(p)
+                                for p in ignore_patterns
+                                if not p.startswith("!")
+                            ]
+                        ),
+                    )
+                )
+            )
+
+            # TODO: This query will become unnecessary once this method returns unlimited IDs and
+            # the it becomes the frontend's responsibility (once again) to split and display them.
             start_time = time.time()
             query_count = select(func.count()).select_from(statement.alias("entries"))
             count_all: int = session.execute(query_count).scalar() or 0
             end_time = time.time()
-            logger.info(f"finished counting ({format_timespan(end_time - start_time)})")
+            logger.info(f"[Library] Finished counting ({format_timespan(end_time - start_time)})")
 
             sort_on: ColumnExpressionArgument = Entry.id
             match search.sorting_mode:
 
@@ -0,0 +1,148 @@
+# Copyright (C) 2025 Travis Abendshien (CyanVoxel).
+# Licensed under the GPL-3.0 License.
+# Created for TagStudio: https://github.com/CyanVoxel/TagStudio
+
+from copy import deepcopy
+from pathlib import Path
+
+import structlog
+from wcmatch import glob, pathlib
+
+from tagstudio.core.constants import IGNORE_NAME, TS_FOLDER_NAME
+from tagstudio.core.singleton import Singleton
+
+logger = structlog.get_logger()
+
+PATH_GLOB_FLAGS = glob.GLOBSTARLONG | glob.DOTGLOB | glob.NEGATE | pathlib.MATCHBASE
+
+
+def _ignore_to_glob(ignore_patterns: list[str]) -> list[str]:
+    """Convert .gitignore-like patterns to explicit glob syntax.
+
+    Args:
+        ignore_patterns (list[str]): The .gitignore-like patterns to convert.
+    """
+    glob_patterns: list[str] = deepcopy(ignore_patterns)
+    additional_patterns: list[str] = []
+
+    # Mimic implicit .gitignore syntax behavior for the SQLite GLOB function.
+    for pattern in glob_patterns:
+        # Temporarily remove any exclusion character before processing
+        exclusion_char = ""
+        gp = pattern
+        if pattern.startswith("!"):
+            gp = pattern[1:]
+            exclusion_char = "!"
+
+        if not gp.startswith("**/") and not gp.startswith("*/") and not gp.startswith("/"):
+            # Create a version of a prefix-less pattern that starts with "**/"
+            gp = "**/" + gp
+            additional_patterns.append(exclusion_char + gp)
+
+            gp = gp.removesuffix("/**").removesuffix("/*").removesuffix("/")
+            additional_patterns.append(exclusion_char + gp)
+
+            gp = gp.removeprefix("**/").removeprefix("*/")
+            additional_patterns.append(exclusion_char + gp)
+
+    glob_patterns = glob_patterns + additional_patterns
+
+    # Add "/**" suffix to suffix-less patterns to match implicit .gitignore behavior.
+    for pattern in glob_patterns:
+        if pattern.endswith("/**"):
+            continue
+
+        glob_patterns.append(pattern.removesuffix("/*").removesuffix("/") + "/**")
+
+    glob_patterns = list(set(glob_patterns))
+
+    logger.info("[Ignore]", glob_patterns=glob_patterns)
+    return glob_patterns
+
+
+GLOBAL_IGNORE = _ignore_to_glob(
+    [
+        # TagStudio -------------------
+        f"{TS_FOLDER_NAME}",
+        # System Trashes --------------
+        ".Trash",
+        ".Trash-*",
+        ".Trashes",
+        "$RECYCLE.BIN",
+        # macOS Generated -------------
+        ".DS_Store",
+        ".fseventsd",
+        ".Spotlight-V100",
+        "._*",
+        "System Volume Information",
+    ]
+)
+
+
+class Ignore(metaclass=Singleton):
+    """Class for processing and managing glob-like file ignore file patterns."""
+
+    _last_loaded: tuple[Path, float] | None = None
+    _patterns: list[str] = []
+
+    @staticmethod
+    def get_patterns(library_dir: Path, include_global: bool = True) -> list[str]:
+        """Get the ignore patterns for the given library directory.
+
+        Args:
+            library_dir (Path): The path of the library to load patterns from.
+            include_global (bool): Flag for including the global ignore set.
+                In most scenarios, this should be True.
+        """
+        patterns = GLOBAL_IGNORE if include_global else []
+        ts_ignore_path = Path(library_dir / TS_FOLDER_NAME / IGNORE_NAME)
+
+        if not ts_ignore_path.exists():
+            logger.info(
+                "[Ignore] No .ts_ignore file found",
+                path=ts_ignore_path,
+            )
+            Ignore._last_loaded = None
+            Ignore._patterns = patterns
+
+            return Ignore._patterns
+
+        # Process the .ts_ignore file if the previous result is non-existent or outdated.
+        loaded = (ts_ignore_path, ts_ignore_path.stat().st_mtime)
+        if not Ignore._last_loaded or (Ignore._last_loaded and Ignore._last_loaded != loaded):
+            logger.info(
+                "[Ignore] Processing the .ts_ignore file...",
+                library=library_dir,
+                last_mtime=Ignore._last_loaded[1] if Ignore._last_loaded else None,
+                new_mtime=loaded[1],
+            )
+            Ignore._patterns = _ignore_to_glob(patterns + Ignore._load_ignore_file(ts_ignore_path))
+        else:
+            logger.info(
+                "[Ignore] No updates to the .ts_ignore detected",
+                library=library_dir,
+                last_mtime=Ignore._last_loaded[1],
+                new_mtime=loaded[1],
+            )
+        Ignore._last_loaded = loaded
+
+        return Ignore._patterns
+
+    @staticmethod
+    def _load_ignore_file(path: Path) -> list[str]:
+        """Load and process the .ts_ignore file into a list of glob patterns.
+
+        Args:
+            path (Path): The path of the .ts_ignore file.
+        """
+        patterns: list[str] = []
+        if path.exists():
+            with open(path, encoding="utf8") as f:
+                for line_raw in f.readlines():
+                    line = line_raw.strip()
+                    # Ignore blank lines and comments
+                    if not line or line.startswith("#"):
+                        continue
+                    patterns.append(line)
+
+        return patterns
@@ -3,10 +3,11 @@
 from pathlib import Path
 
 import structlog
+from wcmatch import pathlib
 
 from tagstudio.core.library.alchemy.library import Library
 from tagstudio.core.library.alchemy.models import Entry
-from tagstudio.core.utils.refresh_dir import GLOBAL_IGNORE_SET
+from tagstudio.core.library.ignore import PATH_GLOB_FLAGS, Ignore
 
 logger = structlog.get_logger()
 
@@ -25,7 +26,9 @@ def missing_file_entries_count(self) -> int:
 
     def refresh_missing_files(self) -> Iterator[int]:
         """Track the number of entries that point to an invalid filepath."""
+        assert self.library.library_dir
         logger.info("[refresh_missing_files] Refreshing missing files...")
+
         self.missing_file_entries = []
         for i, entry in enumerate(self.library.get_entries()):
             full_path = self.library.library_dir / entry.path
@@ -38,16 +41,15 @@ def match_missing_file_entry(self, match_entry: Entry) -> list[Path]:
 
         Works if files were just moved to different subfolders and don't have duplicate names.
         """
-        matches = []
-        for path in self.library.library_dir.glob(f"**/{match_entry.path.name}"):
-            # Ensure matched file isn't in a globally ignored folder
-            skip: bool = False
-            for part in path.parts:
-                if part in GLOBAL_IGNORE_SET:
-                    skip = True
-                    break
-            if skip:
-                continue
+        assert self.library.library_dir
+        matches: list[Path] = []
+
+        ignore_patterns = Ignore.get_patterns(self.library.library_dir)
+        for path in pathlib.Path(str(self.library.library_dir)).glob(
+            f"***/{match_entry.path.name}",
+            flags=PATH_GLOB_FLAGS,
+            exclude=ignore_patterns,
+        ):
             if path.name == match_entry.path.name:
                 new_path = Path(path).relative_to(self.library.library_dir)
                 matches.append(new_path)
 
@@ -5,27 +5,14 @@
 from time import time
 
 import structlog
+from wcmatch import pathlib
 
-from tagstudio.core.constants import TS_FOLDER_NAME
 from tagstudio.core.library.alchemy.library import Library
 from tagstudio.core.library.alchemy.models import Entry
+from tagstudio.core.library.ignore import PATH_GLOB_FLAGS, Ignore
 
 logger = structlog.get_logger(__name__)
 
-GLOBAL_IGNORE_SET: set[str] = set(
-    [
-        TS_FOLDER_NAME,
-        "$RECYCLE.BIN",
-        ".Trashes",
-        ".Trash",
-        "tagstudio_thumbs",
-        ".fseventsd",
-        ".Spotlight-V100",
-        "System Volume Information",
-        ".DS_Store",
-    ]
-)
-
 
 @dataclass
 class RefreshDirTracker:
@@ -42,7 +29,7 @@ def save_new_files(self):
             entries = [
                 Entry(
                     path=entry_path,
-                    folder=self.library.folder,
+                    folder=self.library.folder,  # pyright: ignore[reportArgumentType]
                     fields=[],
                     date_added=dt.now(),
                 )
@@ -54,7 +41,7 @@ def save_new_files(self):
 
         yield
 
-    def refresh_dir(self, lib_path: Path) -> Iterator[int]:
+    def refresh_dir(self, library_dir: Path) -> Iterator[int]:
         """Scan a directory for files, and add those relative filenames to internal variables."""
         if self.library.library_dir is None:
             raise ValueError("No library directory set.")
@@ -65,13 +52,19 @@ def refresh_dir(self, lib_path: Path) -> Iterator[int]:
         self.files_not_in_library = []
         dir_file_count = 0
 
-        for f in lib_path.glob("**/*"):
+        ignore_patterns = Ignore.get_patterns(library_dir)
+        logger.info(ignore_patterns)
+        for f in pathlib.Path(str(library_dir)).glob(
+            "***/*", flags=PATH_GLOB_FLAGS, exclude=ignore_patterns
+        ):
             end_time_loop = time()
             # Yield output every 1/30 of a second
             if (end_time_loop - start_time_loop) > 0.034:
                 yield dir_file_count
                 start_time_loop = time()
 
+            logger.info(f)
+
             # Skip if the file/path is already mapped in the Library
             if f in self.library.included_files:
                 dir_file_count += 1
@@ -81,21 +74,10 @@ def refresh_dir(self, lib_path: Path) -> Iterator[int]:
             if f.is_dir():
                 continue
 
-            # Ensure new file isn't in a globally ignored folder
-            skip: bool = False
-            for part in f.parts:
-                # NOTE: Files starting with "._" are sometimes generated by macOS Finder.
-                # More info: https://lists.apple.com/archives/applescript-users/2006/Jun/msg00180.html
-                if part.startswith("._") or part in GLOBAL_IGNORE_SET:
-                    skip = True
-                    break
-            if skip:
-                continue
-
             dir_file_count += 1
             self.library.included_files.add(f)
 
-            relative_path = f.relative_to(lib_path)
+            relative_path = f.relative_to(library_dir)
             # TODO - load these in batch somehow
             if not self.library.has_path_entry(relative_path):
                 self.files_not_in_library.append(relative_path)
@@ -104,8 +86,8 @@ def refresh_dir(self, lib_path: Path) -> Iterator[int]:
         yield dir_file_count
         logger.info(
             "Directory scan time",
-            path=lib_path,
+            path=library_dir,
             duration=(end_time_total - start_time_total),
-            files_not_in_lib=self.files_not_in_library,
             files_scanned=dir_file_count,
+            ignore_patterns=ignore_patterns,
         )