File tree Expand file tree Collapse file tree 2 files changed +15
-6
lines changed Expand file tree Collapse file tree 2 files changed +15
-6
lines changed Original file line number Diff line number Diff line change @@ -246,16 +246,20 @@ def fd_not_exceeding_threadpool_size(threadpool_size: int) -> None:
246
246
)
247
247
248
248
def get_space (collection : Collection ):
249
+ # TODO: this is a hack to get the space
250
+ # We should update the tests to not pass space via metadata instead use collection
251
+ # configuration_json
252
+ space = None
249
253
if "hnsw:space" in collection .metadata :
250
- return collection .metadata ["hnsw:space" ]
254
+ space = collection .metadata ["hnsw:space" ]
251
255
if collection ._model .configuration_json is None :
252
- return None
256
+ return space
253
257
if 'spann' in collection ._model .configuration_json and collection ._model .configuration_json .get ('spann' ) is not None and 'space' in collection ._model .configuration_json .get ('spann' ):
254
- return collection ._model .configuration_json .get ('spann' ).get ('space' )
258
+ space = collection ._model .configuration_json .get ('spann' ).get ('space' )
255
259
elif 'hnsw' in collection ._model .configuration_json and collection ._model .configuration_json .get ('hnsw' ) is not None and 'space' in collection ._model .configuration_json .get ('hnsw' ):
256
- return collection . _model . configuration_json . get ( 'hnsw' ). get ( ' space' )
257
- else :
258
- return None
260
+ if space is None :
261
+ space = collection . _model . configuration_json . get ( 'hnsw' ). get ( 'space' )
262
+ return space
259
263
260
264
def ann_accuracy (
261
265
collection : Collection ,
Original file line number Diff line number Diff line change @@ -2022,6 +2022,11 @@ impl<'me> SpannIndexReader<'me> {
2022
2022
dimensionality : usize ,
2023
2023
ef_search : usize ,
2024
2024
) -> Result < HnswIndexRef , SpannIndexReaderError > {
2025
+ // We take a lock here to synchronize concurrent open of the same index.
2026
+ // Otherwise, we could end up with a corrupted index since the filesystem
2027
+ // operations are not guaranteed to be atomic.
2028
+ // The lock is a partitioned mutex to allow for higher concurrency across collections.
2029
+ let _guard = hnsw_provider. write_mutex . lock ( id) . await ;
2025
2030
match hnsw_provider. get ( id, cache_key) . await {
2026
2031
Some ( index) => Ok ( index) ,
2027
2032
None => {
You can’t perform that action at this time.
0 commit comments