Skip to content

[ENH] Add a scout-logs function to find the max log position. #4232

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 21 commits into from
Apr 10, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

12 changes: 12 additions & 0 deletions go/pkg/log/repository/log.go
Original file line number Diff line number Diff line change
Expand Up @@ -169,6 +169,18 @@ func (r *LogRepository) GetLastCompactedOffsetForCollection(ctx context.Context,
return
}

func (r *LogRepository) GetBoundsForCollection(ctx context.Context, collectionId string) (start, limit int64, err error) {
bounds, err := r.queries.GetBoundsForCollection(ctx, collectionId)
if err != nil {
trace_log.Error("Error in getting minimum and maximum offset for collection", zap.Error(err), zap.String("collectionId", collectionId))
return
}
start = bounds.RecordCompactionOffsetPosition
limit = bounds.RecordEnumerationOffsetPosition + 1
err = nil
return
}

func (r *LogRepository) GarbageCollection(ctx context.Context) error {
collectionToCompact, err := r.queries.GetAllCollections(ctx)
if err != nil {
Expand Down
17 changes: 17 additions & 0 deletions go/pkg/log/server/server.go
Original file line number Diff line number Diff line change
Expand Up @@ -44,6 +44,23 @@ func (s *logServer) PushLogs(ctx context.Context, req *logservicepb.PushLogsRequ
return
}

func (s *logServer) ScoutLogs(ctx context.Context, req *logservicepb.ScoutLogsRequest) (res *logservicepb.ScoutLogsResponse, err error) {
var collectionID types.UniqueID
collectionID, err = types.ToUniqueID(&req.CollectionId)
if err != nil {
return
}
var limit int64
_, limit, err = s.lr.GetBoundsForCollection(ctx, collectionID.String())
if err != nil {
return
}
res = &logservicepb.ScoutLogsResponse {
LimitOffset: int64(limit),
}
return
}

func (s *logServer) PullLogs(ctx context.Context, req *logservicepb.PullLogsRequest) (res *logservicepb.PullLogsResponse, err error) {
var collectionID types.UniqueID
collectionID, err = types.ToUniqueID(&req.CollectionId)
Expand Down
2 changes: 1 addition & 1 deletion go/pkg/log/store/db/copyfrom.go

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

2 changes: 1 addition & 1 deletion go/pkg/log/store/db/db.go

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

2 changes: 1 addition & 1 deletion go/pkg/log/store/db/models.go

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

22 changes: 21 additions & 1 deletion go/pkg/log/store/db/queries.sql.go

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

5 changes: 5 additions & 0 deletions go/pkg/log/store/queries/queries.sql
Original file line number Diff line number Diff line change
Expand Up @@ -45,6 +45,11 @@ SELECT CAST(COALESCE(MIN(r.offset), 0) as bigint) AS min_offset, CAST(COALESCE(M
FROM record_log r
WHERE r.collection_id = $1;

-- name: GetBoundsForCollection :one
SELECT record_compaction_offset_position, record_enumeration_offset_position
FROM collection
WHERE id = $1;

-- name: DeleteCollection :exec
DELETE FROM collection c where c.id = ANY(@collection_ids::text[]);

Expand Down
9 changes: 9 additions & 0 deletions idl/chromadb/proto/logservice.proto
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,14 @@ message PushLogsResponse {
int32 record_count = 1;
}

message ScoutLogsRequest {
string collection_id = 1;
}

message ScoutLogsResponse {
int64 limit_offset = 1;
}

message PullLogsRequest {
string collection_id = 1;
int64 start_from_offset = 2;
Expand Down Expand Up @@ -68,6 +76,7 @@ message PurgeDirtyForCollectionResponse {

service LogService {
rpc PushLogs(PushLogsRequest) returns (PushLogsResponse) {}
rpc ScoutLogs(ScoutLogsRequest) returns (ScoutLogsResponse) {}
rpc PullLogs(PullLogsRequest) returns (PullLogsResponse) {}
rpc GetAllCollectionInfoToCompact(GetAllCollectionInfoToCompactRequest) returns (GetAllCollectionInfoToCompactResponse) {}
rpc UpdateCollectionLogOffset(UpdateCollectionLogOffsetRequest) returns (UpdateCollectionLogOffsetResponse) {}
Expand Down
28 changes: 26 additions & 2 deletions rust/log-service/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -15,8 +15,8 @@ use chroma_types::chroma_proto::{
log_service_server::LogService, CollectionInfo, GetAllCollectionInfoToCompactRequest,
GetAllCollectionInfoToCompactResponse, LogRecord, OperationRecord, PullLogsRequest,
PullLogsResponse, PurgeDirtyForCollectionRequest, PurgeDirtyForCollectionResponse,
PushLogsRequest, PushLogsResponse, UpdateCollectionLogOffsetRequest,
UpdateCollectionLogOffsetResponse,
PushLogsRequest, PushLogsResponse, ScoutLogsRequest, ScoutLogsResponse,
UpdateCollectionLogOffsetRequest, UpdateCollectionLogOffsetResponse,
};
use chroma_types::CollectionUuid;
use figment::providers::{Env, Format, Yaml};
Expand Down Expand Up @@ -643,6 +643,30 @@ impl LogService for LogServer {
Ok(Response::new(PushLogsResponse { record_count }))
}

#[tracing::instrument(skip(self, request), err(Display))]
async fn scout_logs(
&self,
request: Request<ScoutLogsRequest>,
) -> Result<Response<ScoutLogsResponse>, Status> {
let scout_logs = request.into_inner();
let collection_id = Uuid::parse_str(&scout_logs.collection_id)
.map(CollectionUuid)
.map_err(|_| Status::invalid_argument("Failed to parse collection id"))?;
tracing::info!("Scouting logs for collection {}", collection_id,);
let prefix = storage_prefix_for_log(collection_id);
let log_reader = LogReader::new(
self.config.reader.clone(),
Arc::clone(&self.storage),
prefix,
);
let limit_position = log_reader
.maximum_log_position()
.await
.map_err(|err| Status::new(err.code().into(), err.to_string()))?;
let limit_offset = limit_position.offset() as i64;
Ok(Response::new(ScoutLogsResponse { limit_offset }))
}

#[tracing::instrument(skip(self, request), err(Display))]
async fn pull_logs(
&self,
Expand Down
26 changes: 26 additions & 0 deletions rust/log/src/grpc_log.rs
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,8 @@ use uuid::Uuid;
pub enum GrpcPullLogsError {
#[error("Failed to fetch")]
FailedToPullLogs(#[from] tonic::Status),
#[error("Failed to scout logs: {0}")]
FailedToScoutLogs(tonic::Status),
#[error("Failed to convert proto embedding record into EmbeddingRecord")]
ConversionError(#[from] RecordConversionError),
}
Expand All @@ -26,6 +28,7 @@ impl ChromaError for GrpcPullLogsError {
fn code(&self) -> ErrorCodes {
match self {
GrpcPullLogsError::FailedToPullLogs(err) => err.code().into(),
GrpcPullLogsError::FailedToScoutLogs(err) => err.code().into(),
GrpcPullLogsError::ConversionError(_) => ErrorCodes::Internal,
}
}
Expand Down Expand Up @@ -194,6 +197,29 @@ impl GrpcLog {
&mut self.client
}

pub(super) async fn scout_logs(
&mut self,
collection_id: CollectionUuid,
starting_offset: u64,
) -> Result<u64, Box<dyn ChromaError>> {
let request = self
.client_for(collection_id)
.scout_logs(chroma_proto::ScoutLogsRequest {
collection_id: collection_id.0.to_string(),
});
let response = request.await;
let response = match response {
Ok(response) => response,
Err(err) => {
tracing::error!("Failed to scout logs: {}", err);
return Err(Box::new(GrpcPullLogsError::FailedToScoutLogs(err)));
}
};
let scout = response.into_inner();
tracing::info!("scout logs: {} -> {}", starting_offset, scout.limit_offset);
Ok(scout.limit_offset as u64)
}

pub(super) async fn read(
&mut self,
collection_id: CollectionUuid,
Expand Down
26 changes: 24 additions & 2 deletions rust/log/src/in_memory_log.rs
Original file line number Diff line number Diff line change
@@ -1,8 +1,11 @@
use crate::types::CollectionInfo;
use chroma_types::{CollectionUuid, LogRecord};
use std::collections::HashMap;
use std::fmt::Debug;

use chroma_error::ChromaError;
use chroma_types::{CollectionUuid, LogRecord};

use crate::types::CollectionInfo;

// This is used for testing only, it represents a log record that is stored in memory
// internal to a mock log implementation
#[derive(Clone)]
Expand Down Expand Up @@ -123,6 +126,25 @@ impl InMemoryLog {
) {
self.offsets.insert(collection_id, new_offset);
}

pub(super) async fn scout_logs(
&mut self,
collection_id: CollectionUuid,
starting_offset: u64,
) -> Result<u64, Box<dyn ChromaError>> {
let answer = self
.collection_to_log
.get(&collection_id)
.iter()
.flat_map(|x| x.iter().map(|rec| rec.log_offset + 1).max())
.max()
.unwrap_or(starting_offset as i64) as u64;
if answer >= starting_offset {
Ok(answer)
} else {
Ok(starting_offset)
}
}
}

impl Default for InMemoryLog {
Expand Down
22 changes: 22 additions & 0 deletions rust/log/src/log.rs
Original file line number Diff line number Diff line change
Expand Up @@ -50,6 +50,28 @@ impl Log {
}
}

#[tracing::instrument(skip(self))]
pub async fn scout_logs(
&mut self,
collection_id: CollectionUuid,
starting_offset: u64,
) -> Result<u64, Box<dyn ChromaError>> {
match self {
Log::Sqlite(log) => log
.scout_logs(collection_id, starting_offset as i64)
.await
.map_err(|e| Box::new(e) as Box<dyn ChromaError>),
Log::Grpc(log) => log
.scout_logs(collection_id, starting_offset)
.await
.map_err(|e| Box::new(e) as Box<dyn ChromaError>),
Log::InMemory(log) => log
.scout_logs(collection_id, starting_offset)
.await
.map_err(|e| Box::new(e) as Box<dyn ChromaError>),
}
}

#[tracing::instrument(skip(self, records))]
pub async fn push_logs(
&mut self,
Expand Down
13 changes: 13 additions & 0 deletions rust/log/src/sqlite_log.rs
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,8 @@ pub enum SqlitePullLogsError {
InvalidEmbedding(bytemuck::PodCastError),
#[error("Failed to parse metadata: {0}")]
InvalidMetadata(#[from] serde_json::Error),
#[error("Method {0} is not implemented")]
NotImplemented(String),
}

impl ChromaError for SqlitePullLogsError {
Expand All @@ -36,6 +38,7 @@ impl ChromaError for SqlitePullLogsError {
SqlitePullLogsError::InvalidEncoding(_) => ErrorCodes::InvalidArgument,
SqlitePullLogsError::InvalidEmbedding(_) => ErrorCodes::InvalidArgument,
SqlitePullLogsError::InvalidMetadata(_) => ErrorCodes::InvalidArgument,
SqlitePullLogsError::NotImplemented(_) => ErrorCodes::Internal,
}
}
}
Expand Down Expand Up @@ -162,6 +165,16 @@ impl SqliteLog {
.map_err(|_| SqlitePushLogsError::CompactorHandleSetError)
}

pub(super) async fn scout_logs(
&mut self,
_collection_id: CollectionUuid,
_starting_offset: i64,
) -> Result<u64, SqlitePullLogsError> {
Err(SqlitePullLogsError::NotImplemented(
"scout_logs".to_string(),
))
}

pub(super) async fn read(
&mut self,
collection_id: CollectionUuid,
Expand Down
Loading
Loading