Skip to content

Commit ad1ac3d

Browse files
authored
[ENH] Fix 422 Unprocessable Entity error in Chroma Load (#4537)
## Description of changes If the bit-difference workload is used for pure-upsert workloads it will violate quota. This PR shrinks the batch size to just one cluster at a time. By construction clusters fit a batch. ## Test plan - [X] Tests pass locally with `pytest` for python, `yarn test` for js, `cargo test` for rust ## Documentation Changes N/A
1 parent 3028a39 commit ad1ac3d

File tree

1 file changed

+10
-12
lines changed

1 file changed

+10
-12
lines changed

rust/load/src/bit_difference.rs

Lines changed: 10 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -266,19 +266,17 @@ impl SyntheticDataSet {
266266
let mut embeddings = vec![];
267267
let mut docs = vec![];
268268
let mut seen: HashSet<String> = HashSet::default();
269-
for _ in 0..uq.batch_size {
270-
let cluster = self.cluster_by_skew(skew, guac);
271-
let num_this_cluster = (cluster.docs.len() as f64 * uq.associativity).ceil() as usize;
272-
for _ in 0..num_this_cluster {
273-
let doc_idx = (any::<u32>(guac) as u64 * cluster.docs.len() as u64) >> 32;
274-
if seen.contains(&cluster.docs[doc_idx as usize].id()) {
275-
continue;
276-
}
277-
seen.insert(cluster.docs[doc_idx as usize].id());
278-
ids.push(cluster.docs[doc_idx as usize].id());
279-
embeddings.push(cluster.docs[doc_idx as usize].embedding());
280-
docs.push(cluster.docs[doc_idx as usize].content.clone());
269+
let cluster = self.cluster_by_skew(skew, guac);
270+
let num_this_cluster = (cluster.docs.len() as f64 * uq.associativity).ceil() as usize;
271+
for _ in 0..num_this_cluster {
272+
let doc_idx = (any::<u32>(guac) as u64 * cluster.docs.len() as u64) >> 32;
273+
if seen.contains(&cluster.docs[doc_idx as usize].id()) {
274+
continue;
281275
}
276+
seen.insert(cluster.docs[doc_idx as usize].id());
277+
ids.push(cluster.docs[doc_idx as usize].id());
278+
embeddings.push(cluster.docs[doc_idx as usize].embedding());
279+
docs.push(cluster.docs[doc_idx as usize].content.clone());
282280
}
283281
let ids = ids.iter().map(String::as_str).collect();
284282
let docs = docs.iter().map(String::as_str).collect();

0 commit comments

Comments
 (0)