-
-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathcaa_verify.py
More file actions
executable file
·139 lines (110 loc) · 4.69 KB
/
caa_verify.py
File metadata and controls
executable file
·139 lines (110 loc) · 4.69 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
#!/usr/bin/env python3
#
# This module verifies the local cover art cache against the database records.
# It now first resets the status of all records to 'NOT_DOWNLOADED' and then
# updates the status of records for which a corresponding file exists in the
# cache to 'DOWNLOADED' in batches to improve performance and memory usage.
import logging
import os
import time
import click
from dotenv import load_dotenv
from helpers import parse_local_filename
from store import CAABackupDataStore
# How often to log verification progress (in seconds)
VERIFY_PROGRESS_INTERVAL = 10
# -----------------------------------------------------------------------------
# The main class for the verifier project.
# -----------------------------------------------------------------------------
class CAAVerifier:
"""
A class to verify the local cover art cache against the database.
It scans the local file system and updates the database records to ensure
they accurately reflect the status of the downloaded files.
"""
def __init__(self, db_path: str, images_dir: str):
"""
Initializes the verifier with paths to the datastore and images directory.
Args:
db_path (str): The path to the local SQLite database file.
images_dir (str): The root directory where images are stored.
"""
self.datastore = CAABackupDataStore(db_path=db_path)
self.images_dir = images_dir
def _scan_and_update(self, batch_size: int = 1000):
"""
Scans the images directory and updates the database in streaming batches.
Avoids building a full list of all caa_ids in memory.
"""
logging.info("Scanning local images directory for files...")
batch = []
processed = 0
updated = 0
last_log = time.time()
for root, _, files in os.walk(self.images_dir):
for file in files:
parsed = parse_local_filename(file)
if parsed:
batch.append(parsed["caa_id"])
processed += 1
if len(batch) >= batch_size:
self.datastore.bulk_update_downloaded_status(batch)
updated += len(batch)
batch = []
if processed % 10000 == 0:
now = time.time()
if now - last_log >= VERIFY_PROGRESS_INTERVAL:
logging.info(f"Scanned {processed} files, updated {updated} records...")
last_log = now
# Flush remaining batch
if batch:
self.datastore.bulk_update_downloaded_status(batch)
updated += len(batch)
logging.info(f"Finished scanning. Files processed: {processed}, records updated: {updated}")
return updated
def run_verifier(self):
"""
Executes the verification process.
"""
logging.info("Starting cache verification process...")
with self.datastore:
# Step 1: Mark all records in the database as NOT_DOWNLOADED.
logging.info("Resetting all records to 'NOT_DOWNLOADED' status...")
self.datastore.mark_all_as_undownloaded()
# Step 2: Scan files and update DB in streaming batches.
self._scan_and_update()
self._print_summary()
logging.info("Verification complete.")
def _print_summary(self):
"""
Private method to fetch and print a summary of the download statuses.
"""
with self.datastore:
logging.info("--- Verification Summary ---")
status_counts = self.datastore.get_status_counts()
for status, count in status_counts.items():
logging.info(f"- {status.replace('_', ' ').title()}: {count}")
logging.info("----------------------------")
# -----------------------------------------------------------------------------
# Main entry point for the script
# -----------------------------------------------------------------------------
@click.command()
def main():
"""
Script to verify the local cover art backup.
Configuration is read from a .env file.
"""
# Load environment variables from a .env file
load_dotenv()
db_path = os.getenv("DB_PATH")
images_dir = os.getenv("IMAGES_DIR") or os.getenv("CACHE_DIR") or os.getenv("BACKUP_DIR")
if not db_path:
click.echo("Error: DB_PATH environment variable is not set.", err=True)
return
if not images_dir:
click.echo("Error: IMAGES_DIR environment variable is not set.", err=True)
return
verifier = CAAVerifier(db_path=db_path, images_dir=images_dir)
verifier.run_verifier()
if __name__ == "__main__":
main()