Skip to content

Commit e2ee377

Browse files
codethewebgithub-actions[bot]
authored andcommitted
[ENH]: don't skip GC for collection in fork tree if using GC v2 (#4681)
## Description of changes Does what the title says. Missed this in the original PR. ## Test plan _How are these changes tested?_ Updated the basic GC v2 E2E test to include a forked collection. ## Documentation Changes _Are all docstrings for user-facing APIs updated if required? Do we need to make documentation changes in the [docs section](https://github.com/chroma-core/chroma/tree/main/docs/docs.trychroma.com)?_ n/a
1 parent 5e58b94 commit e2ee377

File tree

2 files changed

+53
-35
lines changed

2 files changed

+53
-35
lines changed

rust/garbage_collector/src/garbage_collector_component.rs

Lines changed: 47 additions & 35 deletions
Original file line numberDiff line numberDiff line change
@@ -140,21 +140,20 @@ impl GarbageCollector {
140140
.as_ref()
141141
.ok_or(GarbageCollectCollectionError::Uninitialized)?;
142142

143-
let use_v1_orchestrator = match cleanup_mode {
144-
CleanupMode::DryRun | CleanupMode::Rename | CleanupMode::Delete => true,
145-
CleanupMode::DryRunV2 | CleanupMode::DeleteV2 => false,
146-
};
147-
148-
if use_v1_orchestrator {
143+
if cleanup_mode.is_v2() {
149144
let orchestrator =
150-
crate::garbage_collector_orchestrator::GarbageCollectorOrchestrator::new(
145+
crate::garbage_collector_orchestrator_v2::GarbageCollectorOrchestrator::new(
151146
collection.id,
152147
collection.version_file_path,
148+
collection.lineage_file_path,
153149
absolute_cutoff_time,
154150
self.sysdb_client.clone(),
155151
dispatcher.clone(),
152+
system.clone(),
156153
self.storage.clone(),
154+
self.root_manager.clone(),
157155
cleanup_mode,
156+
2,
158157
);
159158

160159
let started_at = SystemTime::now();
@@ -178,23 +177,19 @@ impl GarbageCollector {
178177
format!("{:?}", cleanup_mode),
179178
)],
180179
);
180+
181181
return Ok(result);
182182
}
183183

184-
let orchestrator =
185-
crate::garbage_collector_orchestrator_v2::GarbageCollectorOrchestrator::new(
186-
collection.id,
187-
collection.version_file_path,
188-
collection.lineage_file_path,
189-
absolute_cutoff_time,
190-
self.sysdb_client.clone(),
191-
dispatcher.clone(),
192-
system.clone(),
193-
self.storage.clone(),
194-
self.root_manager.clone(),
195-
cleanup_mode,
196-
2,
197-
);
184+
let orchestrator = crate::garbage_collector_orchestrator::GarbageCollectorOrchestrator::new(
185+
collection.id,
186+
collection.version_file_path,
187+
absolute_cutoff_time,
188+
self.sysdb_client.clone(),
189+
dispatcher.clone(),
190+
self.storage.clone(),
191+
cleanup_mode,
192+
);
198193

199194
let started_at = SystemTime::now();
200195
let result = orchestrator.run(system.clone()).await?;
@@ -217,7 +212,6 @@ impl GarbageCollector {
217212
format!("{:?}", cleanup_mode),
218213
)],
219214
);
220-
221215
Ok(result)
222216
}
223217
}
@@ -360,9 +354,18 @@ impl Handler<GarbageCollectMessage> for GarbageCollector {
360354
continue;
361355
}
362356

363-
if collection.lineage_file_path.is_some() {
357+
let cleanup_mode = if let Some(tenant_mode_overrides) = &self.tenant_mode_overrides {
358+
tenant_mode_overrides
359+
.get(&collection.tenant)
360+
.cloned()
361+
.unwrap_or(self.default_cleanup_mode)
362+
} else {
363+
self.default_cleanup_mode
364+
};
365+
366+
if collection.lineage_file_path.is_some() && !cleanup_mode.is_v2() {
364367
tracing::debug!(
365-
"Skipping garbage collection for root of fork tree: {}",
368+
"Skipping garbage collection for root of fork tree because GC v1 cannot handle fork trees: {}",
366369
collection.id
367370
);
368371
num_skipped_jobs += 1;
@@ -376,17 +379,6 @@ impl Handler<GarbageCollectMessage> for GarbageCollector {
376379
collection.version_file_path
377380
);
378381

379-
let cleanup_mode = if let Some(tenant_mode_overrides) = &self.tenant_mode_overrides {
380-
tenant_mode_overrides
381-
.get(&collection.tenant)
382-
.cloned()
383-
.unwrap_or(self.default_cleanup_mode)
384-
} else {
385-
self.default_cleanup_mode
386-
};
387-
388-
tracing::info!("Creating gc orchestrator for collection: {}", collection.id);
389-
390382
let instrumented_span = span!(parent: None, tracing::Level::INFO, "Garbage collection job", collection_id = ?collection.id, tenant_id = %collection.tenant, cleanup_mode = ?cleanup_mode);
391383
instrumented_span.follows_from(Span::current());
392384

@@ -1011,6 +1003,26 @@ mod tests {
10111003
let (collection_in_delete_mode, database_name_in_delete_mode) =
10121004
collection_in_delete_mode_handle.await.unwrap();
10131005

1006+
// Fork collection in delete mode to give it a lineage file (only GC v2 can handle fork trees)
1007+
{
1008+
let source_collection = sysdb
1009+
.get_collections(Some(collection_in_delete_mode), None, None, None, None, 0)
1010+
.await
1011+
.unwrap();
1012+
let source_collection = source_collection.first().unwrap();
1013+
1014+
sysdb
1015+
.fork_collection(
1016+
collection_in_delete_mode,
1017+
source_collection.total_records_post_compaction,
1018+
source_collection.total_records_post_compaction,
1019+
CollectionUuid::new(),
1020+
"test-fork".to_string(),
1021+
)
1022+
.await
1023+
.unwrap();
1024+
}
1025+
10141026
// Wait 1 second for cutoff time
10151027
tokio::time::sleep(Duration::from_secs(1)).await;
10161028

rust/garbage_collector/src/types.rs

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,12 @@ pub enum CleanupMode {
1919
DeleteV2,
2020
}
2121

22+
impl CleanupMode {
23+
pub fn is_v2(&self) -> bool {
24+
matches!(self, CleanupMode::DryRunV2 | CleanupMode::DeleteV2)
25+
}
26+
}
27+
2228
#[derive(Debug, Clone, Copy)]
2329
pub enum VersionStatus {
2430
#[allow(dead_code)]

0 commit comments

Comments
 (0)