5
5
"encoding/json"
6
6
"errors"
7
7
"fmt"
8
- "slices"
9
8
"time"
10
9
11
10
"github.com/chroma-core/chroma/go/pkg/common"
@@ -292,19 +291,15 @@ func (tc *Catalog) createCollectionImpl(txCtx context.Context, createCollection
292
291
}
293
292
294
293
dbCollection := & dbmodel.Collection {
295
- ID : createCollection .ID .String (),
296
- Name : & createCollection .Name ,
297
- ConfigurationJsonStr : & createCollection .ConfigurationJsonStr ,
298
- Dimension : createCollection .Dimension ,
299
- DatabaseID : databases [0 ].ID ,
300
- VersionFileName : versionFileName ,
301
- Tenant : createCollection .TenantID ,
302
- Ts : ts ,
303
- LogPosition : createCollection .LogPosition ,
304
- RootCollectionId : createCollection .RootCollectionId ,
305
- TotalRecordsPostCompaction : createCollection .TotalRecordsPostCompaction ,
306
- SizeBytesPostCompaction : createCollection .SizeBytesPostCompaction ,
307
- LastCompactionTimeSecs : createCollection .LastCompactionTimeSecs ,
294
+ ID : createCollection .ID .String (),
295
+ Name : & createCollection .Name ,
296
+ ConfigurationJsonStr : & createCollection .ConfigurationJsonStr ,
297
+ Dimension : createCollection .Dimension ,
298
+ DatabaseID : databases [0 ].ID ,
299
+ Ts : ts ,
300
+ LogPosition : 0 ,
301
+ VersionFileName : versionFileName ,
302
+ Tenant : createCollection .TenantID ,
308
303
}
309
304
310
305
err = tc .metaDomain .CollectionDb (txCtx ).Insert (dbCollection )
@@ -471,7 +466,7 @@ func (tc *Catalog) GetCollectionWithSegments(ctx context.Context, collectionID t
471
466
var segments []* model.Segment
472
467
473
468
err := tc .txImpl .Transaction (ctx , func (txCtx context.Context ) error {
474
- collections , e := tc .GetCollections (txCtx , collectionID , nil , "" , "" , nil , nil )
469
+ collections , e := tc .GetCollections (ctx , collectionID , nil , "" , "" , nil , nil )
475
470
if e != nil {
476
471
return e
477
472
}
@@ -483,7 +478,7 @@ func (tc *Catalog) GetCollectionWithSegments(ctx context.Context, collectionID t
483
478
}
484
479
collection = collections [0 ]
485
480
486
- segments , e = tc .GetSegments (txCtx , types .NilUniqueID (), nil , nil , collectionID )
481
+ segments , e = tc .GetSegments (ctx , types .NilUniqueID (), nil , nil , collectionID )
487
482
if e != nil {
488
483
return e
489
484
}
@@ -818,179 +813,23 @@ func (tc *Catalog) UpdateCollection(ctx context.Context, updateCollection *model
818
813
return result , nil
819
814
}
820
815
821
- func (tc * Catalog ) getLineageFile (ctx context.Context , collection * model.Collection ) (* coordinatorpb.CollectionLineageFile , error ) {
822
- if len (collection .LineageFileName ) == 0 {
823
- // There is no lineage file for the given collection
824
- return & coordinatorpb.CollectionLineageFile {
825
- Dependencies : []* coordinatorpb.CollectionVersionDependency {},
826
- }, nil
827
- }
828
-
829
- return tc .s3Store .GetLineageFile (collection .LineageFileName )
830
- }
831
-
832
816
func (tc * Catalog ) ForkCollection (ctx context.Context , forkCollection * model.ForkCollection ) (* model.Collection , []* model.Segment , error ) {
833
817
log .Info ("Forking collection" , zap .String ("sourceCollectionId" , forkCollection .SourceCollectionID .String ()), zap .String ("targetCollectionName" , forkCollection .TargetCollectionName ))
834
818
819
+ var source_collection * model.Collection
820
+ var source_segments []* model.Segment
821
+
835
822
err := tc .txImpl .Transaction (ctx , func (txCtx context.Context ) error {
836
823
var err error
837
- var rootCollection * model.Collection
838
- var rootCollectionID types.UniqueID
839
- var rootCollectionIDStr string
840
- var sourceCollection * model.Collection
841
- var sourceSegments []* model.Segment
842
- var newLineageFileFullName string
843
-
844
- ts := time .Now ().UTC ()
845
-
846
- sourceCollectionIDStr := forkCollection .SourceCollectionID .String ()
847
-
848
- // NOTE: We need to retrieve the source collection to get root collection id, then acquire locks on source and root collections in order to avoid deadlock.
849
- // This step is because root collection id is always populated when the collection is created and is never modified.
850
- sourceCollectionDb , err := tc .metaDomain .CollectionDb (txCtx ).GetCollectionEntry (& sourceCollectionIDStr , nil )
851
- if err != nil {
852
- return err
853
- }
854
-
855
- if len (sourceCollectionDb .RootCollectionId ) > 0 {
856
- rootCollectionID , err = types .Parse (sourceCollectionDb .RootCollectionId )
857
- if err != nil {
858
- return err
859
- }
860
- } else {
861
- rootCollectionID = forkCollection .SourceCollectionID
862
- }
863
- rootCollectionIDStr = rootCollectionID .String ()
864
-
865
- // Lock source and root collections in order
866
- collectionsToLock := []string {sourceCollectionIDStr }
867
- if rootCollectionID != forkCollection .SourceCollectionID {
868
- collectionsToLock = append (collectionsToLock , rootCollectionIDStr )
869
- slices .Sort (collectionsToLock )
870
- }
871
- for _ , collectionID := range collectionsToLock {
872
- err = tc .metaDomain .CollectionDb (txCtx ).LockCollection (collectionID )
873
- if err != nil {
874
- return err
875
- }
876
- }
877
-
878
- // Get source and root collections after they are locked
879
- sourceCollection , sourceSegments , err = tc .GetCollectionWithSegments (txCtx , forkCollection .SourceCollectionID )
880
- if err != nil {
881
- return err
882
- }
883
- if rootCollectionID != forkCollection .SourceCollectionID {
884
- limit := int32 (1 )
885
- collections , err := tc .GetCollections (txCtx , rootCollectionID , nil , "" , "" , & limit , nil )
886
- if err != nil {
887
- return err
888
- }
889
- if len (collections ) == 0 {
890
- return common .ErrCollectionNotFound
891
- }
892
- rootCollection = collections [0 ]
893
- } else {
894
- rootCollection = sourceCollection
895
- }
896
- databases , err := tc .metaDomain .DatabaseDb (txCtx ).GetDatabases (sourceCollection .TenantID , sourceCollection .DatabaseName )
897
- if err != nil {
898
- return err
899
- }
900
- if len (databases ) == 0 {
901
- return common .ErrDatabaseNotFound
902
- }
903
-
904
- databaseID := databases [0 ].ID
905
-
906
- // Verify that the source collection log position is between the compaction offset (inclusive) and enumeration offset (inclusive)
907
- // This check is necessary for next compaction to fetch the right logs
908
- // This scenario could occur during fork because we will reach out to log service first to fork logs. For exampls:
909
- // t0: Fork source collection in log with offset [200, 300] (i.e. compaction offset 200, enumeration offset 300)
910
- // t1: User writes to source collection, compaction takes place, source collection log offset become [400, 500]
911
- // t2: Fork source collection in sysdb, the latest source collection compaction offset is 400. If we add new logs, it will start after offset 300, and the data is lost after compaction.
912
- latestSourceCompactionOffset := uint64 (sourceCollection .LogPosition )
913
- if forkCollection .SourceCollectionLogEnumerationOffset < latestSourceCompactionOffset || latestSourceCompactionOffset < forkCollection .SourceCollectionLogCompactionOffset {
914
- return common .ErrCollectionLogPositionStale
915
- }
916
-
917
- // Create the new collection with source collection information
918
- createCollection := & model.CreateCollection {
919
- ID : forkCollection .TargetCollectionID ,
920
- Name : forkCollection .TargetCollectionName ,
921
- ConfigurationJsonStr : sourceCollection .ConfigurationJsonStr ,
922
- Dimension : sourceCollection .Dimension ,
923
- Metadata : sourceCollection .Metadata ,
924
- GetOrCreate : false ,
925
- TenantID : sourceCollection .TenantID ,
926
- DatabaseName : sourceCollection .DatabaseName ,
927
- Ts : ts .Unix (),
928
- LogPosition : sourceCollection .LogPosition ,
929
- RootCollectionId : rootCollectionIDStr ,
930
- TotalRecordsPostCompaction : sourceCollection .TotalRecordsPostCompaction ,
931
- SizeBytesPostCompaction : sourceCollection .SizeBytesPostCompaction ,
932
- LastCompactionTimeSecs : sourceCollection .LastCompactionTimeSecs ,
933
- }
934
-
935
- createSegments := []* model.CreateSegment {}
936
- flushFilePaths := []* model.FlushSegmentCompaction {}
937
- for _ , segment := range sourceSegments {
938
- newSegmentID := types .NewUniqueID ()
939
- createSegment := & model.CreateSegment {
940
- ID : newSegmentID ,
941
- Type : segment .Type ,
942
- Scope : segment .Scope ,
943
- CollectionID : forkCollection .TargetCollectionID ,
944
- Metadata : segment .Metadata ,
945
- Ts : ts .Unix (),
946
- }
947
- createSegments = append (createSegments , createSegment )
948
- flushFilePath := & model.FlushSegmentCompaction {
949
- ID : newSegmentID ,
950
- FilePaths : segment .FilePaths ,
951
- }
952
- flushFilePaths = append (flushFilePaths , flushFilePath )
953
- }
954
-
955
- _ , _ , err = tc .CreateCollectionAndSegments (txCtx , createCollection , createSegments , ts .Unix ())
956
- if err != nil {
957
- return err
958
- }
959
-
960
- err = tc .metaDomain .SegmentDb (txCtx ).RegisterFilePaths (flushFilePaths )
961
- if err != nil {
962
- return err
963
- }
964
-
965
- // Update the lineage file
966
- lineageFile , err := tc .getLineageFile (txCtx , rootCollection )
967
- if err != nil {
968
- return err
969
- }
970
- // NOTE: This is a temporary hardcoded limit for the size of the lineage file
971
- // TODO: Load the limit value from quota / scorecard, and/or improve the lineage file design to avoid large lineage file
972
- if len (lineageFile .Dependencies ) > 1000000 {
973
- return common .ErrCollectionTooManyFork
974
- }
975
- lineageFile .Dependencies = append (lineageFile .Dependencies , & coordinatorpb.CollectionVersionDependency {
976
- SourceCollectionId : sourceCollectionIDStr ,
977
- SourceCollectionVersion : uint64 (sourceCollection .Version ),
978
- TargetCollectionId : forkCollection .TargetCollectionID .String (),
979
- })
980
-
981
- newLineageFileBaseName := fmt .Sprintf ("%s/%d/%s.binpb" , sourceCollectionIDStr , sourceCollection .Version , forkCollection .TargetCollectionID )
982
- newLineageFileFullName , err = tc .s3Store .PutLineageFile (rootCollection .TenantID , databaseID , rootCollectionIDStr , newLineageFileBaseName , lineageFile )
983
- if err != nil {
984
- return err
985
- }
986
-
987
- return tc .metaDomain .CollectionDb (txCtx ).UpdateCollectionLineageFilePath (rootCollectionIDStr , rootCollection .LineageFileName , newLineageFileFullName )
824
+ source_collection , source_segments , err = tc .GetCollectionWithSegments (ctx , forkCollection .SourceCollectionID )
825
+ return err
988
826
})
989
827
if err != nil {
990
828
return nil , nil , err
991
829
}
992
830
993
- return tc .GetCollectionWithSegments (ctx , forkCollection .TargetCollectionID )
831
+ // TODO: Implement forking logic
832
+ return source_collection , source_segments , nil
994
833
}
995
834
996
835
func (tc * Catalog ) CreateSegment (ctx context.Context , createSegment * model.CreateSegment , ts types.Timestamp ) (* model.Segment , error ) {
@@ -1309,7 +1148,7 @@ func (tc *Catalog) ListCollectionVersions(ctx context.Context,
1309
1148
zap .Int64 ("version" , int64 (collectionEntry .Version )),
1310
1149
zap .String ("version_file_name" , collectionEntry .VersionFileName ))
1311
1150
1312
- versionFile , err := tc .s3Store .GetVersionFile (collectionEntry .VersionFileName )
1151
+ versionFile , err := tc .s3Store .GetVersionFile (tenantID , collectionID . String (), int64 ( collectionEntry . Version ), collectionEntry .VersionFileName )
1313
1152
if err != nil {
1314
1153
log .Error ("error getting version file" , zap .Error (err ))
1315
1154
return nil , err
@@ -1550,7 +1389,7 @@ func (tc *Catalog) FlushCollectionCompactionForVersionedCollection(ctx context.C
1550
1389
}
1551
1390
} else {
1552
1391
// Read the VersionFile from S3MetaStore.
1553
- existingVersionFilePb , err = tc .s3Store .GetVersionFile (existingVersionFileName )
1392
+ existingVersionFilePb , err = tc .s3Store .GetVersionFile (flushCollectionCompaction . TenantID , flushCollectionCompaction . ID . String (), existingVersion , existingVersionFileName )
1554
1393
if err != nil {
1555
1394
return nil , err
1556
1395
}
@@ -1745,7 +1584,7 @@ func (tc *Catalog) markVersionForDeletionInSingleCollection(
1745
1584
// TODO(rohit): log error if collection in file is different from the one in request.
1746
1585
1747
1586
existingVersionFileName := collectionEntry .VersionFileName
1748
- versionFilePb , err := tc .s3Store .GetVersionFile (existingVersionFileName )
1587
+ versionFilePb , err := tc .s3Store .GetVersionFile (tenantID , collectionID , int64 ( collectionEntry . Version ), existingVersionFileName )
1749
1588
if err != nil {
1750
1589
return err
1751
1590
}
@@ -1869,7 +1708,7 @@ func (tc *Catalog) DeleteVersionEntriesForCollection(ctx context.Context, tenant
1869
1708
}
1870
1709
1871
1710
existingVersionFileName := collectionEntry .VersionFileName
1872
- versionFilePb , err := tc .s3Store .GetVersionFile (existingVersionFileName )
1711
+ versionFilePb , err := tc .s3Store .GetVersionFile (tenantID , collectionID , int64 ( collectionEntry . Version ), existingVersionFileName )
1873
1712
if err != nil {
1874
1713
return err
1875
1714
}
0 commit comments