Skip to content

Commit 31a461d

Browse files
authored
[ENH] Add hook to initialize collection before running verification (#4369)
## Description of changes This adds a hook for workload executors to initialize the data sets before starting the verification process. It resets the collection by deleting + recreating it. ## Test plan Tested locally, using same process as in #4341, but without manual step to create the test collection, since it's no longer necessary.
1 parent cc30583 commit 31a461d

File tree

2 files changed

+44
-0
lines changed

2 files changed

+44
-0
lines changed

rust/load/src/data_sets.rs

Lines changed: 31 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -972,6 +972,37 @@ impl DataSet for VerifyingDataSet {
972972
self.reference_data_set.cardinality()
973973
}
974974

975+
// Reset the test collection to an empty state by deleting and recreating it.
976+
async fn initialize(
977+
&self,
978+
client: &ChromaClient,
979+
) -> Result<(), Box<dyn std::error::Error + Send>> {
980+
// Attempt to delete the collection. If it doesn't exist, ignore the error.
981+
match client.delete_collection(&self.test_data_set).await {
982+
Ok(_) => (),
983+
Err(err) => {
984+
if !format!("{err:?}").contains("404") {
985+
return Err(Box::new(Error::InvalidRequest(format!(
986+
"failed to delete collection: {err:?}"
987+
))));
988+
}
989+
}
990+
};
991+
992+
// Create the collection.
993+
match client
994+
.create_collection(&self.test_data_set, None, true)
995+
.await
996+
{
997+
Ok(_) => Ok(()),
998+
Err(err) => {
999+
return Err(Box::new(Error::InvalidRequest(format!(
1000+
"failed to create collection: {err:?}"
1001+
))));
1002+
}
1003+
}
1004+
}
1005+
9751006
async fn get(
9761007
&self,
9771008
client: &ChromaClient,

rust/load/src/lib.rs

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -237,6 +237,11 @@ pub trait DataSet: std::fmt::Debug + Send + Sync {
237237
self.cardinality()
238238
}
239239

240+
// Hook to perform initialization of the data set, if necessary.
241+
async fn initialize(&self, _: &ChromaClient) -> Result<(), Box<dyn std::error::Error + Send>> {
242+
Ok(())
243+
}
244+
240245
/// Get documents by key. This is used when one workload references another. Return None to
241246
/// indicate the data set does not support referencing by index.
242247
async fn get_by_key(
@@ -1302,6 +1307,14 @@ impl LoadService {
13021307
}
13031308
}
13041309
});
1310+
1311+
// Initialize the data set.
1312+
let data_set = Arc::clone(&spec.data_set);
1313+
if let Err(err) = data_set.initialize(&client).await {
1314+
tracing::error!("failed to initialize data set: {err:?}");
1315+
return;
1316+
}
1317+
13051318
let seq_no = Arc::new(TokioMutex::new(0u64));
13061319
let start = Instant::now();
13071320
while !done.load(std::sync::atomic::Ordering::Relaxed) {

0 commit comments

Comments
 (0)