1
- use super :: client_manager:: NodeNameToClient ;
2
- use super :: { client_manager:: ClientManager , config} ;
1
+ use super :: config;
3
2
use async_trait:: async_trait;
4
3
use backon:: ExponentialBuilder ;
5
4
use backon:: Retryable ;
6
5
use chroma_config:: registry;
7
6
use chroma_config:: { assignment:: assignment_policy:: AssignmentPolicy , Configurable } ;
8
7
use chroma_error:: ChromaError ;
8
+ use chroma_memberlist:: client_manager:: ClientAssigner ;
9
+ use chroma_memberlist:: client_manager:: { ClientManager , ClientOptions } ;
9
10
use chroma_memberlist:: {
10
11
config:: MemberlistProviderConfig ,
11
12
memberlist_provider:: { CustomResourceMemberlistProvider , MemberlistProvider } ,
12
13
} ;
13
14
use chroma_system:: System ;
15
+ use chroma_types:: chroma_proto:: query_executor_client:: QueryExecutorClient ;
14
16
use chroma_types:: SegmentType ;
15
17
use chroma_types:: {
16
- chroma_proto:: query_executor_client:: QueryExecutorClient ,
17
18
operator:: { CountResult , GetResult , KnnBatchResult } ,
18
19
plan:: { Count , Get , Knn } ,
19
- CollectionUuid , ExecutorError ,
20
+ ExecutorError ,
20
21
} ;
21
22
use rand:: seq:: SliceRandom ;
22
- use std:: cmp:: min;
23
23
use tonic:: Request ;
24
24
25
- type Client = QueryExecutorClient < chroma_tracing:: GrpcTraceService < tonic:: transport:: Channel > > ;
25
+ // Convenience type alias for the gRPC query client used by the DistributedExecutor
26
+ type QueryClient = QueryExecutorClient < chroma_tracing:: GrpcTraceService < tonic:: transport:: Channel > > ;
26
27
27
28
/// A distributed executor that routes requests to the appropriate node based on the assignment policy
28
29
/// # Fields
@@ -36,8 +37,7 @@ type Client = QueryExecutorClient<chroma_tracing::GrpcTraceService<tonic::transp
36
37
/// outside.
37
38
#[ derive( Clone , Debug ) ]
38
39
pub struct DistributedExecutor {
39
- node_name_to_client : NodeNameToClient ,
40
- assignment_policy : Box < dyn AssignmentPolicy > ,
40
+ client_assigner : ClientAssigner < QueryClient > ,
41
41
replication_factor : usize ,
42
42
backoff : ExponentialBuilder ,
43
43
}
@@ -50,13 +50,13 @@ impl Configurable<(config::DistributedExecutorConfig, System)> for DistributedEx
50
50
) -> Result < Self , Box < dyn ChromaError > > {
51
51
let assignment_policy =
52
52
Box :: < dyn AssignmentPolicy > :: try_from_config ( & config. assignment , registry) . await ?;
53
- let node_name_to_client = NodeNameToClient :: default ( ) ;
53
+ let client_assigner = ClientAssigner :: new ( assignment_policy , config . replication_factor ) ;
54
54
let client_manager = ClientManager :: new (
55
- node_name_to_client . clone ( ) ,
55
+ client_assigner . clone ( ) ,
56
56
config. connections_per_node ,
57
57
config. connect_timeout_ms ,
58
58
config. request_timeout_ms ,
59
- config. max_query_service_response_size_bytes ,
59
+ ClientOptions :: new ( Some ( config. max_query_service_response_size_bytes ) ) ,
60
60
) ;
61
61
let client_manager_handle = system. start_component ( client_manager) ;
62
62
@@ -75,8 +75,7 @@ impl Configurable<(config::DistributedExecutorConfig, System)> for DistributedEx
75
75
let retry_config = & config. retry ;
76
76
let backoff = retry_config. into ( ) ;
77
77
Ok ( Self {
78
- node_name_to_client,
79
- assignment_policy,
78
+ client_assigner,
80
79
replication_factor : config. replication_factor ,
81
80
backoff,
82
81
} )
@@ -97,7 +96,17 @@ impl DistributedExecutor {
97
96
impl DistributedExecutor {
98
97
///////////////////////// Plan Operations /////////////////////////
99
98
pub async fn count ( & mut self , plan : Count ) -> Result < CountResult , ExecutorError > {
100
- let clients = self . clients ( plan. scan . collection_and_segments . collection . collection_id ) ?;
99
+ let clients = self
100
+ . client_assigner
101
+ . clients (
102
+ & plan
103
+ . scan
104
+ . collection_and_segments
105
+ . collection
106
+ . collection_id
107
+ . to_string ( ) ,
108
+ )
109
+ . map_err ( |e| ExecutorError :: Internal ( e. boxed ( ) ) ) ?;
101
110
let plan: chroma_types:: chroma_proto:: CountPlan = plan. clone ( ) . try_into ( ) ?;
102
111
let res = ( || async {
103
112
choose_client ( clients. as_slice ( ) ) ?
@@ -111,7 +120,17 @@ impl DistributedExecutor {
111
120
}
112
121
113
122
pub async fn get ( & mut self , plan : Get ) -> Result < GetResult , ExecutorError > {
114
- let clients = self . clients ( plan. scan . collection_and_segments . collection . collection_id ) ?;
123
+ let clients = self
124
+ . client_assigner
125
+ . clients (
126
+ & plan
127
+ . scan
128
+ . collection_and_segments
129
+ . collection
130
+ . collection_id
131
+ . to_string ( ) ,
132
+ )
133
+ . map_err ( |e| ExecutorError :: Internal ( e. boxed ( ) ) ) ?;
115
134
let res = ( || async {
116
135
choose_client ( clients. as_slice ( ) ) ?
117
136
. get ( Request :: new ( plan. clone ( ) . try_into ( ) ?) )
@@ -124,7 +143,17 @@ impl DistributedExecutor {
124
143
}
125
144
126
145
pub async fn knn ( & mut self , plan : Knn ) -> Result < KnnBatchResult , ExecutorError > {
127
- let clients = self . clients ( plan. scan . collection_and_segments . collection . collection_id ) ?;
146
+ let clients = self
147
+ . client_assigner
148
+ . clients (
149
+ & plan
150
+ . scan
151
+ . collection_and_segments
152
+ . collection
153
+ . collection_id
154
+ . to_string ( ) ,
155
+ )
156
+ . map_err ( |e| ExecutorError :: Internal ( e. boxed ( ) ) ) ?;
128
157
let res = ( || async {
129
158
choose_client ( clients. as_slice ( ) ) ?
130
159
. knn ( Request :: new ( plan. clone ( ) . try_into ( ) ?) )
@@ -137,38 +166,15 @@ impl DistributedExecutor {
137
166
}
138
167
139
168
pub async fn is_ready ( & self ) -> bool {
140
- !self . node_name_to_client . read ( ) . is_empty ( )
169
+ !self . client_assigner . is_empty ( )
141
170
}
171
+ }
142
172
143
- ///////////////////////// Helpers /////////////////////////
144
-
145
- /// Get the gRPC clients for the given collection id by performing the assignment policy
146
- /// # Arguments
147
- /// - `collection_id` - The collection id for which the client is to be fetched
148
- /// # Returns
149
- /// - The gRPC clients for the given collection id in the order of the assignment policy
150
- /// # Errors
151
- /// - If no client is found for the given collection id
152
- /// - If the assignment policy fails to assign the collection id
153
- fn clients ( & mut self , collection_id : CollectionUuid ) -> Result < Vec < Client > , ExecutorError > {
154
- let node_name_to_client_guard = self . node_name_to_client . read ( ) ;
155
- let members: Vec < String > = node_name_to_client_guard. keys ( ) . cloned ( ) . collect ( ) ;
156
- let target_replication_factor = min ( self . replication_factor , members. len ( ) ) ;
157
- self . assignment_policy . set_members ( members) ;
158
- let assigned = self
159
- . assignment_policy
160
- . assign ( & collection_id. to_string ( ) , target_replication_factor) ?;
161
- let clients = assigned
162
- . iter ( )
163
- . map ( |node_name| {
164
- node_name_to_client_guard
165
- . get ( node_name)
166
- . ok_or_else ( || ExecutorError :: NoClientFound ( node_name. clone ( ) ) )
167
- . cloned ( )
168
- } )
169
- . collect :: < Result < Vec < _ > , _ > > ( ) ?;
170
- Ok ( clients)
171
- }
173
+ fn choose_client ( clients : & [ QueryClient ] ) -> Result < QueryClient , tonic:: Status > {
174
+ Ok ( clients
175
+ . choose ( & mut rand:: thread_rng ( ) )
176
+ . ok_or ( no_clients_found_status ( ) ) ?
177
+ . clone ( ) )
172
178
}
173
179
174
180
fn is_retryable_error ( e : & tonic:: Status ) -> bool {
@@ -181,10 +187,3 @@ fn is_retryable_error(e: &tonic::Status) -> bool {
181
187
fn no_clients_found_status ( ) -> tonic:: Status {
182
188
tonic:: Status :: internal ( "No clients found" )
183
189
}
184
-
185
- fn choose_client ( clients : & [ Client ] ) -> Result < Client , tonic:: Status > {
186
- Ok ( clients
187
- . choose ( & mut rand:: thread_rng ( ) )
188
- . ok_or ( no_clients_found_status ( ) ) ?
189
- . clone ( ) )
190
- }
0 commit comments