@@ -25,6 +25,7 @@ import (
25
25
26
26
"github.com/RoaringBitmap/roaring/v2"
27
27
faiss "github.com/blevesearch/go-faiss"
28
+ segment "github.com/blevesearch/scorch_segment_api/v2"
28
29
)
29
30
30
31
func newVectorIndexCache () * vectorIndexCache {
@@ -56,17 +57,17 @@ func (vc *vectorIndexCache) Clear() {
56
57
// present. It also returns the batch executor for the field if it's present in the
57
58
// cache.
58
59
func (vc * vectorIndexCache ) loadOrCreate (fieldID uint16 , mem []byte ,
59
- loadDocVecIDMap bool , except * roaring.Bitmap ) (
60
+ loadDocVecIDMap bool , except * roaring.Bitmap , options segment. InterpretVectorIndexOptions ) (
60
61
index * faiss.IndexImpl , vecDocIDMap map [int64 ]uint32 , docVecIDMap map [uint32 ][]int64 ,
61
- vecIDsToExclude []int64 , err error ) {
62
+ vecIDsToExclude []int64 , batchExec * batchExecutor , err error ) {
62
63
vc .m .RLock ()
63
64
entry , ok := vc .cache [fieldID ]
64
65
if ok {
65
- index , vecDocIDMap , docVecIDMap = entry .load ()
66
+ index , vecDocIDMap , docVecIDMap , batchExec = entry .load ()
66
67
vecIDsToExclude = getVecIDsToExclude (vecDocIDMap , except )
67
68
if ! loadDocVecIDMap || len (entry .docVecIDMap ) > 0 {
68
69
vc .m .RUnlock ()
69
- return index , vecDocIDMap , docVecIDMap , vecIDsToExclude , nil
70
+ return index , vecDocIDMap , docVecIDMap , vecIDsToExclude , batchExec , nil
70
71
}
71
72
72
73
vc .m .RUnlock ()
@@ -76,14 +77,14 @@ func (vc *vectorIndexCache) loadOrCreate(fieldID uint16, mem []byte,
76
77
// typically seen for the first filtered query.
77
78
docVecIDMap = vc .addDocVecIDMapToCacheLOCKED (entry )
78
79
vc .m .Unlock ()
79
- return index , vecDocIDMap , docVecIDMap , vecIDsToExclude , nil
80
+ return index , vecDocIDMap , docVecIDMap , vecIDsToExclude , batchExec , nil
80
81
}
81
82
82
83
vc .m .RUnlock ()
83
84
// acquiring a lock since this is modifying the cache.
84
85
vc .m .Lock ()
85
86
defer vc .m .Unlock ()
86
- return vc .createAndCacheLOCKED (fieldID , mem , loadDocVecIDMap , except )
87
+ return vc .createAndCacheLOCKED (fieldID , mem , loadDocVecIDMap , except , options )
87
88
}
88
89
89
90
func (vc * vectorIndexCache ) addDocVecIDMapToCacheLOCKED (ce * cacheEntry ) map [uint32 ][]int64 {
@@ -104,21 +105,22 @@ func (vc *vectorIndexCache) addDocVecIDMapToCacheLOCKED(ce *cacheEntry) map[uint
104
105
105
106
// Rebuilding the cache on a miss.
106
107
func (vc * vectorIndexCache ) createAndCacheLOCKED (fieldID uint16 , mem []byte ,
107
- loadDocVecIDMap bool , except * roaring.Bitmap ) (
108
+ loadDocVecIDMap bool , except * roaring.Bitmap , options segment. InterpretVectorIndexOptions ) (
108
109
index * faiss.IndexImpl , vecDocIDMap map [int64 ]uint32 ,
109
- docVecIDMap map [uint32 ][]int64 , vecIDsToExclude []int64 , err error ) {
110
+ docVecIDMap map [uint32 ][]int64 , vecIDsToExclude []int64 ,
111
+ batchExec * batchExecutor , err error ) {
110
112
111
113
// Handle concurrent accesses (to avoid unnecessary work) by adding a
112
114
// check within the write lock here.
113
115
entry := vc .cache [fieldID ]
114
116
if entry != nil {
115
- index , vecDocIDMap , docVecIDMap = entry .load ()
117
+ index , vecDocIDMap , docVecIDMap , batchExec = entry .load ()
116
118
vecIDsToExclude = getVecIDsToExclude (vecDocIDMap , except )
117
119
if ! loadDocVecIDMap || len (entry .docVecIDMap ) > 0 {
118
- return index , vecDocIDMap , docVecIDMap , vecIDsToExclude , nil
120
+ return index , vecDocIDMap , docVecIDMap , vecIDsToExclude , batchExec , nil
119
121
}
120
122
docVecIDMap = vc .addDocVecIDMapToCacheLOCKED (entry )
121
- return index , vecDocIDMap , docVecIDMap , vecIDsToExclude , nil
123
+ return index , vecDocIDMap , docVecIDMap , vecIDsToExclude , batchExec , nil
122
124
}
123
125
124
126
// if the cache doesn't have the entry, construct the vector to doc id map and
@@ -154,16 +156,17 @@ func (vc *vectorIndexCache) createAndCacheLOCKED(fieldID uint16, mem []byte,
154
156
155
157
index , err = faiss .ReadIndexFromBuffer (mem [pos :pos + int (indexSize )], faissIOFlags )
156
158
if err != nil {
157
- return nil , nil , nil , nil , err
159
+ return nil , nil , nil , nil , nil , err
158
160
}
159
161
160
- vc .insertLOCKED (fieldID , index , vecDocIDMap , loadDocVecIDMap , docVecIDMap )
161
- return index , vecDocIDMap , docVecIDMap , vecIDsToExclude , nil
162
+ batchExec = newBatchExecutor (options )
163
+ vc .insertLOCKED (fieldID , index , vecDocIDMap , loadDocVecIDMap , docVecIDMap , batchExec )
164
+ return index , vecDocIDMap , docVecIDMap , vecIDsToExclude , batchExec , nil
162
165
}
163
166
164
167
func (vc * vectorIndexCache ) insertLOCKED (fieldIDPlus1 uint16 ,
165
168
index * faiss.IndexImpl , vecDocIDMap map [int64 ]uint32 , loadDocVecIDMap bool ,
166
- docVecIDMap map [uint32 ][]int64 ) {
169
+ docVecIDMap map [uint32 ][]int64 , batchExec * batchExecutor ) {
167
170
// the first time we've hit the cache, try to spawn a monitoring routine
168
171
// which will reconcile the moving averages for all the fields being hit
169
172
if len (vc .cache ) == 0 {
@@ -178,7 +181,7 @@ func (vc *vectorIndexCache) insertLOCKED(fieldIDPlus1 uint16,
178
181
// longer time and thereby the index to be resident in the cache
179
182
// for longer time.
180
183
vc .cache [fieldIDPlus1 ] = createCacheEntry (index , vecDocIDMap ,
181
- loadDocVecIDMap , docVecIDMap , 0.4 )
184
+ loadDocVecIDMap , docVecIDMap , 0.4 , batchExec )
182
185
}
183
186
}
184
187
@@ -272,15 +275,17 @@ func (e *ewma) add(val uint64) {
272
275
// -----------------------------------------------------------------------------
273
276
274
277
func createCacheEntry (index * faiss.IndexImpl , vecDocIDMap map [int64 ]uint32 ,
275
- loadDocVecIDMap bool , docVecIDMap map [uint32 ][]int64 , alpha float64 ) * cacheEntry {
278
+ loadDocVecIDMap bool , docVecIDMap map [uint32 ][]int64 , alpha float64 ,
279
+ batchExec * batchExecutor ) * cacheEntry {
276
280
ce := & cacheEntry {
277
281
index : index ,
278
282
vecDocIDMap : vecDocIDMap ,
279
283
tracker : & ewma {
280
284
alpha : alpha ,
281
285
sample : 1 ,
282
286
},
283
- refs : 1 ,
287
+ refs : 1 ,
288
+ batchExec : batchExec ,
284
289
}
285
290
if loadDocVecIDMap {
286
291
ce .docVecIDMap = docVecIDMap
@@ -299,6 +304,8 @@ type cacheEntry struct {
299
304
index * faiss.IndexImpl
300
305
vecDocIDMap map [int64 ]uint32
301
306
docVecIDMap map [uint32 ][]int64
307
+
308
+ batchExec * batchExecutor
302
309
}
303
310
304
311
func (ce * cacheEntry ) incHit () {
@@ -313,10 +320,14 @@ func (ce *cacheEntry) decRef() {
313
320
atomic .AddInt64 (& ce .refs , - 1 )
314
321
}
315
322
316
- func (ce * cacheEntry ) load () (* faiss.IndexImpl , map [int64 ]uint32 , map [uint32 ][]int64 ) {
323
+ func (ce * cacheEntry ) load () (
324
+ * faiss.IndexImpl ,
325
+ map [int64 ]uint32 ,
326
+ map [uint32 ][]int64 ,
327
+ * batchExecutor ) {
317
328
ce .incHit ()
318
329
ce .addRef ()
319
- return ce .index , ce .vecDocIDMap , ce .docVecIDMap
330
+ return ce .index , ce .vecDocIDMap , ce .docVecIDMap , ce . batchExec
320
331
}
321
332
322
333
func (ce * cacheEntry ) close () {
@@ -325,6 +336,7 @@ func (ce *cacheEntry) close() {
325
336
ce .index = nil
326
337
ce .vecDocIDMap = nil
327
338
ce .docVecIDMap = nil
339
+ ce .batchExec .close ()
328
340
}()
329
341
}
330
342
0 commit comments