Skip to content

Commit 0dcd725

Browse files
[ENH] Disable quota and add limit for collection fork (chroma-core#4359)
1 parent 3434401 commit 0dcd725

File tree

3 files changed

+21
-13
lines changed

3 files changed

+21
-13
lines changed

go/pkg/common/errors.go

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -26,6 +26,7 @@ var (
2626
ErrCollectionVersionInvalid = errors.New("collection version invalid")
2727
ErrCollectionVersionFileNameStale = errors.New("collection version file name stale")
2828
ErrCollectionEntryIsStale = errors.New("collection entry is stale - one of version, version_file_name, or log_position is stale")
29+
ErrCollectionTooManyFork = errors.New("collection entry has too many forks")
2930

3031
// Collection metadata errors
3132
ErrUnknownCollectionMetadataType = errors.New("collection metadata value type not supported")

go/pkg/sysdb/coordinator/table_catalog.go

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -894,7 +894,7 @@ func (tc *Catalog) ForkCollection(ctx context.Context, forkCollection *model.For
894894
// This scenario could occur during fork because we will reach out to log service first to fork logs. For exampls:
895895
// t0: Fork source collection in log with offset [200, 300] (i.e. compaction offset 200, enumeration offset 300)
896896
// t1: User writes to source collection, compaction takes place, source collection log offset become [400, 500]
897-
// t2: Fork source collection in sysdb, the latest source collection compaction offset is 400. We could not fork this version unless we update the log offsets of the forking collection
897+
// t2: Fork source collection in sysdb, the latest source collection compaction offset is 400. If we add new logs, it will start after offset 300, and the data is lost after compaction.
898898
latestSourceCompactionOffset := uint64(sourceCollection.LogPosition)
899899
if forkCollection.SourceCollectionLogEnumerationOffset < latestSourceCompactionOffset || latestSourceCompactionOffset < forkCollection.SourceCollectionLogCompactionOffset {
900900
return common.ErrCollectionLogPositionStale
@@ -953,6 +953,11 @@ func (tc *Catalog) ForkCollection(ctx context.Context, forkCollection *model.For
953953
if err != nil {
954954
return err
955955
}
956+
// NOTE: This is a temporary hardcoded limit for the size of the lineage file
957+
// TODO: Load the limit value from quota / scorecard, and/or improve the lineage file design to avoid large lineage file
958+
if len(lineageFile.Dependencies) > 1000000 {
959+
return common.ErrCollectionTooManyFork
960+
}
956961
lineageFile.Dependencies = append(lineageFile.Dependencies, &coordinatorpb.CollectionVersionDependency{
957962
SourceCollectionId: sourceCollectionIDStr,
958963
SourceCollectionVersion: uint64(sourceCollection.Version),

rust/frontend/src/server.rs

Lines changed: 14 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -1135,7 +1135,7 @@ pub struct ForkCollectionPayload {
11351135
)
11361136
)]
11371137
async fn fork_collection(
1138-
headers: HeaderMap,
1138+
_headers: HeaderMap,
11391139
Path((tenant, database, collection_id)): Path<(String, String, String)>,
11401140
State(mut server): State<FrontendServer>,
11411141
Json(payload): Json<ForkCollectionPayload>,
@@ -1150,17 +1150,19 @@ async fn fork_collection(
11501150
tracing::info!(
11511151
"Forking collection [{collection_id}] in database [{database}] for tenant [{tenant}]"
11521152
);
1153-
server
1154-
.authenticate_and_authorize(
1155-
&headers,
1156-
AuthzAction::ForkCollection,
1157-
AuthzResource {
1158-
tenant: Some(tenant.clone()),
1159-
database: Some(database.clone()),
1160-
collection: Some(collection_id.clone()),
1161-
},
1162-
)
1163-
.await?;
1153+
// NOTE: The quota check if skipped for fork collection for now, and we rely on the scorecard to limit access to certain tenents
1154+
// TODO: Unify the quota and scorecard
1155+
// server
1156+
// .authenticate_and_authorize(
1157+
// &headers,
1158+
// AuthzAction::ForkCollection,
1159+
// AuthzResource {
1160+
// tenant: Some(tenant.clone()),
1161+
// database: Some(database.clone()),
1162+
// collection: Some(collection_id.clone()),
1163+
// },
1164+
// )
1165+
// .await?;
11641166
let _guard =
11651167
server.scorecard_request(&["op:fork_collection", format!("tenant:{}", tenant).as_str()]);
11661168
let collection_id =

0 commit comments

Comments
 (0)