Skip to content

Commit ed00d04

Browse files
committed
feat: clean tracker
Signed-off-by: discord9 <[email protected]>
1 parent 3a58ad5 commit ed00d04

File tree

1 file changed

+64
-0
lines changed

1 file changed

+64
-0
lines changed

src/meta-srv/src/gc_scheduler.rs

Lines changed: 64 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -76,6 +76,10 @@ pub struct GcSchedulerOptions {
7676
/// Set to a larger value (e.g., 24 hours) to balance performance and cleanup.
7777
/// Every Nth GC cycle will use full file listing, where N = full_file_listing_interval / TICKER_INTERVAL.
7878
pub full_file_listing_interval: Duration,
79+
/// Interval for cleaning up stale region entries from the GC tracker.
80+
/// This removes entries for regions that no longer exist (e.g., after table drops).
81+
/// Set to a larger value (e.g., 6 hours) since this is just for memory cleanup.
82+
pub tracker_cleanup_interval: Duration,
7983
}
8084

8185
impl Default for GcSchedulerOptions {
@@ -93,6 +97,8 @@ impl Default for GcSchedulerOptions {
9397
mailbox_timeout: Duration::from_secs(60), // 60 seconds
9498
// Perform full file listing every 24 hours to find orphan files
9599
full_file_listing_interval: Duration::from_secs(60 * 60 * 24),
100+
// Clean up stale tracker entries every 6 hours
101+
tracker_cleanup_interval: Duration::from_secs(60 * 60 * 6),
96102
}
97103
}
98104
}
@@ -163,6 +169,13 @@ impl GcSchedulerOptions {
163169
.fail();
164170
}
165171

172+
if self.tracker_cleanup_interval.is_zero() {
173+
return error::InvalidArgumentsSnafu {
174+
err_msg: "tracker_cleanup_interval must be greater than 0",
175+
}
176+
.fail();
177+
}
178+
166179
Ok(())
167180
}
168181
}
@@ -245,6 +258,8 @@ pub struct GcScheduler {
245258
config: GcSchedulerOptions,
246259
/// Tracks the last GC time for regions.
247260
region_gc_tracker: Arc<tokio::sync::Mutex<RegionGcTracker>>,
261+
/// Last time the tracker was cleaned up.
262+
last_tracker_cleanup: Arc<tokio::sync::Mutex<Instant>>,
248263
}
249264

250265
impl GcScheduler {
@@ -286,6 +301,7 @@ impl GcScheduler {
286301
receiver: rx,
287302
config,
288303
region_gc_tracker: Arc::new(tokio::sync::Mutex::new(HashMap::new())),
304+
last_tracker_cleanup: Arc::new(tokio::sync::Mutex::new(Instant::now())),
289305
};
290306
Ok((gc_trigger, gc_ticker))
291307
}
@@ -317,6 +333,12 @@ impl GcScheduler {
317333
if let Err(e) = self.trigger_gc().await {
318334
error!(e; "Failed to trigger gc");
319335
}
336+
337+
// Periodically clean up stale tracker entries
338+
if let Err(e) = self.cleanup_tracker_if_needed().await {
339+
error!(e; "Failed to cleanup tracker");
340+
}
341+
320342
info!("Finished gc trigger");
321343
}
322344

@@ -980,4 +1002,46 @@ impl GcScheduler {
9801002
}
9811003
}
9821004
}
1005+
1006+
/// Clean up stale entries from the region GC tracker if enough time has passed.
1007+
/// This removes entries for regions that no longer exist in the current table routes.
1008+
async fn cleanup_tracker_if_needed(&self) -> Result<()> {
1009+
let mut last_cleanup = self.last_tracker_cleanup.lock().await;
1010+
let now = Instant::now();
1011+
1012+
// Check if enough time has passed since last cleanup
1013+
if now.duration_since(*last_cleanup) < self.config.tracker_cleanup_interval {
1014+
return Ok(());
1015+
}
1016+
1017+
info!("Starting region GC tracker cleanup");
1018+
let cleanup_start = Instant::now();
1019+
1020+
// Get all current region IDs from table routes
1021+
let table_to_region_stats = self.get_table_to_region_stats().await?;
1022+
let mut current_regions = HashSet::new();
1023+
for region_stats in table_to_region_stats.values() {
1024+
for region_stat in region_stats {
1025+
current_regions.insert(region_stat.id);
1026+
}
1027+
}
1028+
1029+
// Remove stale entries from tracker
1030+
let mut tracker = self.region_gc_tracker.lock().await;
1031+
let initial_count = tracker.len();
1032+
tracker.retain(|region_id, _| current_regions.contains(region_id));
1033+
let removed_count = initial_count - tracker.len();
1034+
1035+
*last_cleanup = now;
1036+
1037+
info!(
1038+
"Completed region GC tracker cleanup: removed {} stale entries out of {} total (retained {}). Duration: {:?}",
1039+
removed_count,
1040+
initial_count,
1041+
tracker.len(),
1042+
cleanup_start.elapsed()
1043+
);
1044+
1045+
Ok(())
1046+
}
9831047
}

0 commit comments

Comments
 (0)