Skip to content

Commit 5cb238c

Browse files
[ENH] Turn on spann by default (chroma-core#4351)
1 parent 4e75aec commit 5cb238c

File tree

2 files changed

+15
-6
lines changed

2 files changed

+15
-6
lines changed

chromadb/test/property/invariants.py

Lines changed: 10 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -246,16 +246,20 @@ def fd_not_exceeding_threadpool_size(threadpool_size: int) -> None:
246246
)
247247

248248
def get_space(collection: Collection):
249+
# TODO: this is a hack to get the space
250+
# We should update the tests to not pass space via metadata instead use collection
251+
# configuration_json
252+
space = None
249253
if "hnsw:space" in collection.metadata:
250-
return collection.metadata["hnsw:space"]
254+
space = collection.metadata["hnsw:space"]
251255
if collection._model.configuration_json is None:
252-
return None
256+
return space
253257
if 'spann' in collection._model.configuration_json and collection._model.configuration_json.get('spann') is not None and 'space' in collection._model.configuration_json.get('spann'):
254-
return collection._model.configuration_json.get('spann').get('space')
258+
space = collection._model.configuration_json.get('spann').get('space')
255259
elif 'hnsw' in collection._model.configuration_json and collection._model.configuration_json.get('hnsw') is not None and 'space' in collection._model.configuration_json.get('hnsw'):
256-
return collection._model.configuration_json.get('hnsw').get('space')
257-
else:
258-
return None
260+
if space is None:
261+
space = collection._model.configuration_json.get('hnsw').get('space')
262+
return space
259263

260264
def ann_accuracy(
261265
collection: Collection,

rust/index/src/spann/types.rs

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2022,6 +2022,11 @@ impl<'me> SpannIndexReader<'me> {
20222022
dimensionality: usize,
20232023
ef_search: usize,
20242024
) -> Result<HnswIndexRef, SpannIndexReaderError> {
2025+
// We take a lock here to synchronize concurrent open of the same index.
2026+
// Otherwise, we could end up with a corrupted index since the filesystem
2027+
// operations are not guaranteed to be atomic.
2028+
// The lock is a partitioned mutex to allow for higher concurrency across collections.
2029+
let _guard = hnsw_provider.write_mutex.lock(id).await;
20252030
match hnsw_provider.get(id, cache_key).await {
20262031
Some(index) => Ok(index),
20272032
None => {

0 commit comments

Comments
 (0)