Skip to content

Commit 6a30842

Browse files
authored
[CHORE] Move distributed to default collection configuration (#4377)
## Description of changes This PR removes the ability to set collection configuration on distributed chroma. If the user sets any configuration other than space, it will error in distributed chroma _Summarize the changes made by this PR._ - Improvements & Bug fixes - ... - New functionality - ... ## Test plan _How are these changes tested?_ - [ ] Tests pass locally with `pytest` for python, `yarn test` for js, `cargo test` for rust ## Documentation Changes _Are all docstrings for user-facing APIs updated if required? Do we need to make documentation changes in the [docs section](https://github.com/chroma-core/chroma/tree/main/docs/docs.trychroma.com)?_
1 parent 7d01a2b commit 6a30842

File tree

11 files changed

+259
-134
lines changed

11 files changed

+259
-134
lines changed

clients/js/packages/chromadb-core/src/generated/models.ts

Lines changed: 26 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -203,35 +203,35 @@ export namespace Api {
203203

204204
export interface HnswConfiguration {
205205
/**
206-
* @type {number}
206+
* @type {number | null}
207207
* @memberof HnswConfiguration
208208
* minimum: 0
209209
*/
210-
ef_construction?: number;
210+
ef_construction?: number | null;
211211
/**
212-
* @type {number}
212+
* @type {number | null}
213213
* @memberof HnswConfiguration
214214
* minimum: 0
215215
*/
216-
ef_search?: number;
216+
ef_search?: number | null;
217217
/**
218-
* @type {number}
218+
* @type {number | null}
219219
* @memberof HnswConfiguration
220220
* minimum: 0
221221
*/
222-
max_neighbors?: number;
222+
max_neighbors?: number | null;
223223
/**
224-
* @type {number}
224+
* @type {number | null}
225225
* @memberof HnswConfiguration
226226
*/
227-
resize_factor?: number;
227+
resize_factor?: number | null;
228228
space?: Api.HnswSpace;
229229
/**
230-
* @type {number}
230+
* @type {number | null}
231231
* @memberof HnswConfiguration
232232
* minimum: 0
233233
*/
234-
sync_threshold?: number;
234+
sync_threshold?: number | null;
235235
}
236236

237237
export enum HnswSpace {
@@ -279,54 +279,54 @@ export namespace Api {
279279

280280
export interface SpannConfiguration {
281281
/**
282-
* @type {number}
282+
* @type {number | null}
283283
* @memberof SpannConfiguration
284284
* minimum: 0
285285
*/
286-
ef_construction?: number;
286+
ef_construction?: number | null;
287287
/**
288-
* @type {number}
288+
* @type {number | null}
289289
* @memberof SpannConfiguration
290290
* minimum: 0
291291
*/
292-
ef_search?: number;
292+
ef_search?: number | null;
293293
/**
294-
* @type {number}
294+
* @type {number | null}
295295
* @memberof SpannConfiguration
296296
* minimum: 0
297297
*/
298-
max_neighbors?: number;
298+
max_neighbors?: number | null;
299299
/**
300-
* @type {number}
300+
* @type {number | null}
301301
* @memberof SpannConfiguration
302302
* minimum: 0
303303
*/
304-
merge_threshold?: number;
304+
merge_threshold?: number | null;
305305
/**
306-
* @type {number}
306+
* @type {number | null}
307307
* @memberof SpannConfiguration
308308
* minimum: 0
309309
*/
310-
reassign_neighbor_count?: number;
310+
reassign_neighbor_count?: number | null;
311311
/**
312-
* @type {number}
312+
* @type {number | null}
313313
* @memberof SpannConfiguration
314314
* minimum: 0
315315
*/
316-
search_nprobe?: number;
316+
search_nprobe?: number | null;
317317
space?: Api.HnswSpace;
318318
/**
319-
* @type {number}
319+
* @type {number | null}
320320
* @memberof SpannConfiguration
321321
* minimum: 0
322322
*/
323-
split_threshold?: number;
323+
split_threshold?: number | null;
324324
/**
325-
* @type {number}
325+
* @type {number | null}
326326
* @memberof SpannConfiguration
327327
* minimum: 0
328328
*/
329-
write_nprobe?: number;
329+
write_nprobe?: number | null;
330330
}
331331

332332
export interface UpdateCollectionConfiguration {

rust/frontend/sample_configs/distributed.yaml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -47,3 +47,4 @@ scorecard:
4747
score: 100
4848
enable_span_indexing: true
4949
default_knn_index: "spann"
50+
enable_set_index_params: false

rust/frontend/sample_configs/tilt_config.yaml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -55,3 +55,4 @@ circuit_breaker:
5555
requests: 1000
5656
enable_span_indexing: true
5757
default_knn_index: "spann"
58+
enable_set_index_params: true

rust/frontend/src/config.rs

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -119,6 +119,10 @@ fn default_enable_span_indexing() -> bool {
119119
false
120120
}
121121

122+
fn default_enable_set_index_params() -> bool {
123+
true
124+
}
125+
122126
#[derive(Deserialize, Serialize, Clone, Debug)]
123127
pub struct FrontendServerConfig {
124128
#[serde(flatten)]
@@ -144,6 +148,8 @@ pub struct FrontendServerConfig {
144148
pub cors_allow_origins: Option<Vec<String>>,
145149
#[serde(default = "default_enable_span_indexing")]
146150
pub enable_span_indexing: bool,
151+
#[serde(default = "default_enable_set_index_params")]
152+
pub enable_set_index_params: bool,
147153
}
148154

149155
const DEFAULT_CONFIG_PATH: &str = "sample_configs/distributed.yaml";

rust/frontend/src/server.rs

Lines changed: 74 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -13,11 +13,12 @@ use chroma_types::{
1313
CreateDatabaseResponse, CreateTenantRequest, CreateTenantResponse,
1414
DeleteCollectionRecordsResponse, DeleteDatabaseRequest, DeleteDatabaseResponse,
1515
GetCollectionRequest, GetDatabaseRequest, GetDatabaseResponse, GetRequest, GetResponse,
16-
GetTenantRequest, GetTenantResponse, GetUserIdentityResponse, HeartbeatResponse, IncludeList,
17-
InternalCollectionConfiguration, ListCollectionsRequest, ListCollectionsResponse,
18-
ListDatabasesRequest, ListDatabasesResponse, Metadata, QueryRequest, QueryResponse,
19-
UpdateCollectionConfiguration, UpdateCollectionRecordsResponse, UpdateCollectionResponse,
20-
UpdateMetadata, UpsertCollectionRecordsResponse,
16+
GetTenantRequest, GetTenantResponse, GetUserIdentityResponse, HeartbeatResponse,
17+
HnswConfiguration, IncludeList, InternalCollectionConfiguration, ListCollectionsRequest,
18+
ListCollectionsResponse, ListDatabasesRequest, ListDatabasesResponse, Metadata, QueryRequest,
19+
QueryResponse, SpannConfiguration, UpdateCollectionConfiguration,
20+
UpdateCollectionRecordsResponse, UpdateCollectionResponse, UpdateMetadata,
21+
UpsertCollectionRecordsResponse,
2122
};
2223
use chroma_types::{ForkCollectionResponse, RawWhereFields};
2324
use mdac::{Rule, Scorecard, ScorecardTicket};
@@ -911,19 +912,75 @@ async fn create_collection(
911912
format!("tenant:{}", tenant).as_str(),
912913
])?;
913914

914-
let configuration = match payload.configuration {
915-
Some(c) => Some(InternalCollectionConfiguration::try_from_config(
916-
c,
917-
server.config.frontend.default_knn_index,
918-
)?),
919-
None => Some(InternalCollectionConfiguration::try_from_config(
920-
CollectionConfiguration {
921-
hnsw: None,
922-
spann: None,
923-
embedding_function: None,
924-
},
915+
let payload_clone = payload.clone();
916+
917+
let configuration = if !server.config.enable_set_index_params {
918+
let mut new_configuration = payload.configuration.clone().unwrap();
919+
let hnsw_config = new_configuration.hnsw.clone();
920+
let spann_config = new_configuration.spann.clone();
921+
922+
if let Some(hnsw) = &hnsw_config {
923+
let space = hnsw.space.clone();
924+
if hnsw.ef_construction.is_some()
925+
|| hnsw.ef_search.is_some()
926+
|| hnsw.max_neighbors.is_some()
927+
|| hnsw.num_threads.is_some()
928+
|| hnsw.resize_factor.is_some()
929+
|| hnsw.sync_threshold.is_some()
930+
|| hnsw.batch_size.is_some()
931+
{
932+
return Err(ValidationError::SpaceConfigurationForVectorIndexType(
933+
"HNSW".to_string(),
934+
)
935+
.into());
936+
}
937+
let new_hnsw = HnswConfiguration {
938+
space,
939+
..Default::default()
940+
};
941+
new_configuration.hnsw = Some(new_hnsw);
942+
} else if let Some(spann) = &spann_config {
943+
let space = spann.space.clone();
944+
if spann.search_nprobe.is_some()
945+
|| spann.write_nprobe.is_some()
946+
|| spann.ef_construction.is_some()
947+
|| spann.ef_search.is_some()
948+
|| spann.max_neighbors.is_some()
949+
|| spann.reassign_neighbor_count.is_some()
950+
|| spann.split_threshold.is_some()
951+
|| spann.merge_threshold.is_some()
952+
{
953+
return Err(ValidationError::SpaceConfigurationForVectorIndexType(
954+
"SPANN".to_string(),
955+
)
956+
.into());
957+
}
958+
let new_spann = SpannConfiguration {
959+
space,
960+
..Default::default()
961+
};
962+
new_configuration.spann = Some(new_spann);
963+
}
964+
965+
Some(InternalCollectionConfiguration::try_from_config(
966+
new_configuration,
925967
server.config.frontend.default_knn_index,
926-
)?),
968+
)?)
969+
} else {
970+
match payload_clone.configuration {
971+
Some(c) => Some(InternalCollectionConfiguration::try_from_config(
972+
c,
973+
server.config.frontend.default_knn_index,
974+
)?),
975+
None => Some(InternalCollectionConfiguration::try_from_config(
976+
CollectionConfiguration {
977+
hnsw: None,
978+
spann: None,
979+
embedding_function: None,
980+
},
981+
server.config.frontend.default_knn_index,
982+
)?),
983+
}
927984
};
928985

929986
let request = CreateCollectionRequest::try_new(

rust/frontend/src/types/errors.rs

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -26,6 +26,8 @@ pub enum ValidationError {
2626
UpdateCollection(#[from] UpdateCollectionError),
2727
#[error("Error parsing collection configuration: {0}")]
2828
ParseCollectionConfiguration(#[from] CollectionConfigurationToInternalConfigurationError),
29+
#[error("For optimal performance and accuracy, can't set parameters other than space for index type {0}")]
30+
SpaceConfigurationForVectorIndexType(String),
2931
}
3032

3133
impl ChromaError for ValidationError {
@@ -37,6 +39,7 @@ impl ChromaError for ValidationError {
3739
ValidationError::GetCollection(err) => err.code(),
3840
ValidationError::UpdateCollection(err) => err.code(),
3941
ValidationError::ParseCollectionConfiguration(_) => ErrorCodes::InvalidArgument,
42+
ValidationError::SpaceConfigurationForVectorIndexType(_) => ErrorCodes::InvalidArgument,
4043
}
4144
}
4245
}

rust/index/src/hnsw_provider.rs

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -623,7 +623,7 @@ mod tests {
623623
use super::*;
624624
use chroma_cache::new_non_persistent_cache_for_test;
625625
use chroma_storage::local::LocalStorage;
626-
use chroma_types::HnswConfiguration;
626+
use chroma_types::InternalHnswConfiguration;
627627

628628
#[tokio::test]
629629
async fn test_fork() {
@@ -640,7 +640,7 @@ mod tests {
640640

641641
let dimensionality = 128;
642642
let distance_function = DistanceFunction::Euclidean;
643-
let default_hnsw_params = HnswConfiguration::default();
643+
let default_hnsw_params = InternalHnswConfiguration::default();
644644
let created_index = provider
645645
.create(
646646
&collection_id,
@@ -683,7 +683,7 @@ mod tests {
683683

684684
let dimensionality = 2;
685685
let distance_function = DistanceFunction::Euclidean;
686-
let default_hnsw_params = HnswConfiguration::default();
686+
let default_hnsw_params = InternalHnswConfiguration::default();
687687
let created_index = provider
688688
.create(
689689
&collection_id,

rust/segment/src/distributed_hnsw.rs

Lines changed: 10 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -379,8 +379,8 @@ pub mod test {
379379

380380
use chroma_index::{HnswIndexConfig, DEFAULT_MAX_ELEMENTS};
381381
use chroma_types::{
382-
Collection, CollectionUuid, HnswConfiguration, InternalCollectionConfiguration, Segment,
383-
SegmentUuid,
382+
Collection, CollectionUuid, InternalCollectionConfiguration, InternalHnswConfiguration,
383+
Segment, SegmentUuid,
384384
};
385385
use tempfile::tempdir;
386386
use uuid::Uuid;
@@ -389,7 +389,7 @@ pub mod test {
389389
fn parameter_defaults() {
390390
let persist_path = tempdir().unwrap().path().to_owned();
391391

392-
let hnsw_configuration = HnswConfiguration::default();
392+
let hnsw_configuration = InternalHnswConfiguration::default();
393393
let config = HnswIndexConfig::new_persistent(
394394
hnsw_configuration.max_neighbors,
395395
hnsw_configuration.ef_construction,
@@ -398,7 +398,7 @@ pub mod test {
398398
)
399399
.expect("Error creating hnsw index config");
400400

401-
let default_hnsw_params = HnswConfiguration::default();
401+
let default_hnsw_params = InternalHnswConfiguration::default();
402402

403403
assert_eq!(config.max_elements, DEFAULT_MAX_ELEMENTS);
404404
assert_eq!(config.m, default_hnsw_params.max_neighbors);
@@ -413,10 +413,12 @@ pub mod test {
413413
// Try partial override
414414
let collection = Collection {
415415
config: InternalCollectionConfiguration {
416-
vector_index: chroma_types::VectorIndexConfiguration::Hnsw(HnswConfiguration {
417-
max_neighbors: 10,
418-
..Default::default()
419-
}),
416+
vector_index: chroma_types::VectorIndexConfiguration::Hnsw(
417+
InternalHnswConfiguration {
418+
max_neighbors: 10,
419+
..Default::default()
420+
},
421+
),
420422
embedding_function: None,
421423
},
422424
..Default::default()

0 commit comments

Comments
 (0)