Skip to content

Commit 205f96c

Browse files
fix: use status code 400 when batch is empty (#413)
1 parent 416efe1 commit 205f96c

File tree

5 files changed

+46
-10
lines changed

5 files changed

+46
-10
lines changed

.github/workflows/build.yaml

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -127,6 +127,7 @@ jobs:
127127

128128
- name: Extract metadata (tags, labels) for Docker
129129
id: meta-grpc
130+
if: ${{ matrix.grpc }}
130131
uses: docker/metadata-action@v5
131132
with:
132133
images: |
@@ -142,6 +143,7 @@ jobs:
142143
143144
- name: Build and push Docker image
144145
id: build-and-push-grpc
146+
if: ${{ matrix.grpc }}
145147
uses: docker/build-push-action@v6
146148
with:
147149
context: .

.github/workflows/matrix.json

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,7 @@
66
"sccache": true,
77
"cudaComputeCap": 75,
88
"extraBuildArgs": "DEFAULT_USE_FLASH_ATTENTION=False",
9+
"grpc": true,
910
"dockerfile": "Dockerfile-cuda"
1011
},
1112
{
@@ -14,6 +15,7 @@
1415
"runOn": "always",
1516
"sccache": true,
1617
"cudaComputeCap": 80,
18+
"grpc": true,
1719
"dockerfile": "Dockerfile-cuda"
1820
},
1921
{
@@ -22,6 +24,7 @@
2224
"runOn": "main",
2325
"sccache": true,
2426
"cudaComputeCap": 86,
27+
"grpc": true,
2528
"dockerfile": "Dockerfile-cuda"
2629
},
2730
{
@@ -30,6 +33,7 @@
3033
"runOn": "main",
3134
"sccache": true,
3235
"cudaComputeCap": 89,
36+
"grpc": true,
3337
"dockerfile": "Dockerfile-cuda"
3438
},
3539
{
@@ -38,20 +42,23 @@
3842
"runOn": "main",
3943
"sccache": true,
4044
"cudaComputeCap": 90,
45+
"grpc": true,
4146
"dockerfile": "Dockerfile-cuda"
4247
},
4348
{
4449
"name": "All",
4550
"imageNamePrefix": "cuda-",
4651
"runOn": "main",
4752
"sccache": false,
53+
"grpc": false,
4854
"dockerfile": "Dockerfile-cuda-all"
4955
},
5056
{
5157
"name": "cpu",
5258
"imageNamePrefix": "cpu-",
5359
"runOn": "main",
5460
"sccache": true,
61+
"grpc": true,
5562
"dockerfile": "Dockerfile"
5663
}
5764
]

router/src/grpc/server.rs

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1534,6 +1534,7 @@ impl From<ErrorResponse> for Status {
15341534
ErrorType::Overloaded => Code::ResourceExhausted,
15351535
ErrorType::Validation => Code::InvalidArgument,
15361536
ErrorType::Tokenizer => Code::FailedPrecondition,
1537+
ErrorType::Empty => Code::InvalidArgument,
15371538
};
15381539

15391540
Status::new(code, value.error)

router/src/http/server.rs

Lines changed: 35 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -89,6 +89,8 @@ example = json ! ({"error": "Inference failed", "error_type": "backend"})),
8989
example = json ! ({"error": "Model is overloaded", "error_type": "overloaded"})),
9090
(status = 422, description = "Tokenization error", body = ErrorResponse,
9191
example = json ! ({"error": "Tokenization error", "error_type": "tokenizer"})),
92+
(status = 400, description = "Batch is empty", body = ErrorResponse,
93+
example = json ! ({"error": "Batch is empty", "error_type": "empty"})),
9294
(status = 413, description = "Batch size error", body = ErrorResponse,
9395
example = json ! ({"error": "Batch size error", "error_type": "validation"})),
9496
)
@@ -285,6 +287,8 @@ example = json ! ({"error": "Inference failed", "error_type": "backend"})),
285287
example = json ! ({"error": "Model is overloaded", "error_type": "overloaded"})),
286288
(status = 422, description = "Tokenization error", body = ErrorResponse,
287289
example = json ! ({"error": "Tokenization error", "error_type": "tokenizer"})),
290+
(status = 400, description = "Batch is empty", body = ErrorResponse,
291+
example = json ! ({"error": "Batch is empty", "error_type": "empty"})),
288292
(status = 413, description = "Batch size error", body = ErrorResponse,
289293
example = json ! ({"error": "Batch size error", "error_type": "validation"})),
290294
)
@@ -306,7 +310,7 @@ async fn rerank(
306310
tracing::error!("{message}");
307311
let err = ErrorResponse {
308312
error: message,
309-
error_type: ErrorType::Validation,
313+
error_type: ErrorType::Empty,
310314
};
311315
let counter = metrics::counter!("te_request_failure", "err" => "validation");
312316
counter.increment(1);
@@ -471,6 +475,8 @@ example = json ! ({"error": "Inference failed", "error_type": "backend"})),
471475
example = json ! ({"error": "Model is overloaded", "error_type": "overloaded"})),
472476
(status = 422, description = "Tokenization error", body = ErrorResponse,
473477
example = json ! ({"error": "Tokenization error", "error_type": "tokenizer"})),
478+
(status = 400, description = "Batch is empty", body = ErrorResponse,
479+
example = json ! ({"error": "Batch is empty", "error_type": "empty"})),
474480
(status = 413, description = "Batch size error", body = ErrorResponse,
475481
example = json ! ({"error": "Batch size error", "error_type": "validation"})),
476482
)
@@ -489,7 +495,7 @@ async fn similarity(
489495
tracing::error!("{message}");
490496
let err = ErrorResponse {
491497
error: message,
492-
error_type: ErrorType::Validation,
498+
error_type: ErrorType::Empty,
493499
};
494500
let counter = metrics::counter!("te_request_failure", "err" => "validation");
495501
counter.increment(1);
@@ -553,6 +559,8 @@ example = json ! ({"error": "Inference failed", "error_type": "backend"})),
553559
example = json ! ({"error": "Model is overloaded", "error_type": "overloaded"})),
554560
(status = 422, description = "Tokenization error", body = ErrorResponse,
555561
example = json ! ({"error": "Tokenization error", "error_type": "tokenizer"})),
562+
(status = 400, description = "Batch is empty", body = ErrorResponse,
563+
example = json ! ({"error": "Batch is empty", "error_type": "empty"})),
556564
(status = 413, description = "Batch size error", body = ErrorResponse,
557565
example = json ! ({"error": "Batch size error", "error_type": "validation"})),
558566
)
@@ -615,7 +623,7 @@ async fn embed(
615623
tracing::error!("{message}");
616624
let err = ErrorResponse {
617625
error: message,
618-
error_type: ErrorType::Validation,
626+
error_type: ErrorType::Empty,
619627
};
620628
let counter = metrics::counter!("te_request_failure", "err" => "validation");
621629
counter.increment(1);
@@ -722,6 +730,8 @@ example = json ! ({"error": "Inference failed", "error_type": "backend"})),
722730
example = json ! ({"error": "Model is overloaded", "error_type": "overloaded"})),
723731
(status = 422, description = "Tokenization error", body = ErrorResponse,
724732
example = json ! ({"error": "Tokenization error", "error_type": "tokenizer"})),
733+
(status = 400, description = "Batch is empty", body = ErrorResponse,
734+
example = json ! ({"error": "Batch is empty", "error_type": "empty"})),
725735
(status = 413, description = "Batch size error", body = ErrorResponse,
726736
example = json ! ({"error": "Batch size error", "error_type": "validation"})),
727737
)
@@ -792,7 +802,7 @@ async fn embed_sparse(
792802
tracing::error!("{message}");
793803
let err = ErrorResponse {
794804
error: message,
795-
error_type: ErrorType::Validation,
805+
error_type: ErrorType::Empty,
796806
};
797807
let counter = metrics::counter!("te_request_failure", "err" => "validation");
798808
counter.increment(1);
@@ -900,6 +910,8 @@ example = json ! ({"error": "Inference failed", "error_type": "backend"})),
900910
example = json ! ({"error": "Model is overloaded", "error_type": "overloaded"})),
901911
(status = 422, description = "Tokenization error", body = ErrorResponse,
902912
example = json ! ({"error": "Tokenization error", "error_type": "tokenizer"})),
913+
(status = 400, description = "Batch is empty", body = ErrorResponse,
914+
example = json ! ({"error": "Batch is empty", "error_type": "empty"})),
903915
(status = 413, description = "Batch size error", body = ErrorResponse,
904916
example = json ! ({"error": "Batch size error", "error_type": "validation"})),
905917
)
@@ -961,7 +973,7 @@ async fn embed_all(
961973
tracing::error!("{message}");
962974
let err = ErrorResponse {
963975
error: message,
964-
error_type: ErrorType::Validation,
976+
error_type: ErrorType::Empty,
965977
};
966978
let counter = metrics::counter!("te_request_failure", "err" => "validation");
967979
counter.increment(1);
@@ -1067,6 +1079,8 @@ example = json ! ({"message": "Inference failed", "type": "backend"})),
10671079
example = json ! ({"message": "Model is overloaded", "type": "overloaded"})),
10681080
(status = 422, description = "Tokenization error", body = OpenAICompatErrorResponse,
10691081
example = json ! ({"message": "Tokenization error", "type": "tokenizer"})),
1082+
(status = 400, description = "Batch is empty", body = OpenAICompatErrorResponse,
1083+
example = json ! ({"message": "Batch is empty", "type": "empty"})),
10701084
(status = 413, description = "Batch size error", body = OpenAICompatErrorResponse,
10711085
example = json ! ({"message": "Batch size error", "type": "validation"})),
10721086
)
@@ -1150,7 +1164,7 @@ async fn openai_embed(
11501164
tracing::error!("{message}");
11511165
let err = ErrorResponse {
11521166
error: message,
1153-
error_type: ErrorType::Validation,
1167+
error_type: ErrorType::Empty,
11541168
};
11551169
let counter = metrics::counter!("te_request_failure", "err" => "validation");
11561170
counter.increment(1);
@@ -1265,8 +1279,12 @@ path = "/tokenize",
12651279
request_body = TokenizeRequest,
12661280
responses(
12671281
(status = 200, description = "Tokenized ids", body = TokenizeResponse),
1282+
(status = 400, description = "Batch is empty", body = ErrorResponse,
1283+
example = json ! ({"error": "Batch is empty", "error_type": "empty"})),
1284+
(status = 413, description = "Batch size error", body = ErrorResponse,
1285+
example = json ! ({"error": "Batch size error", "error_type": "validation"})),
12681286
(status = 422, description = "Tokenization error", body = ErrorResponse,
1269-
example = json ! ({"message": "Tokenization error", "type": "tokenizer"})),
1287+
example = json ! ({"error": "Tokenization error", "error_type": "tokenizer"})),
12701288
)
12711289
)]
12721290
#[instrument(skip_all)]
@@ -1327,7 +1345,7 @@ async fn tokenize(
13271345
tracing::error!("{message}");
13281346
let err = ErrorResponse {
13291347
error: message,
1330-
error_type: ErrorType::Validation,
1348+
error_type: ErrorType::Empty,
13311349
};
13321350
let counter = metrics::counter!("te_request_failure", "err" => "validation");
13331351
counter.increment(1);
@@ -1377,8 +1395,12 @@ path = "/decode",
13771395
request_body = DecodeRequest,
13781396
responses(
13791397
(status = 200, description = "Decoded ids", body = DecodeResponse),
1398+
(status = 400, description = "Batch is empty", body = ErrorResponse,
1399+
example = json ! ({"error": "Batch is empty", "error_type": "empty"})),
1400+
(status = 413, description = "Batch size error", body = ErrorResponse,
1401+
example = json ! ({"error": "Batch size error", "error_type": "validation"})),
13801402
(status = 422, description = "Tokenization error", body = ErrorResponse,
1381-
example = json ! ({"message": "Tokenization error", "type": "tokenizer"})),
1403+
example = json ! ({"error": "Tokenization error", "error_type": "tokenizer"})),
13821404
)
13831405
)]
13841406
#[instrument(skip_all)]
@@ -1403,7 +1425,7 @@ async fn decode(
14031425
tracing::error!("{message}");
14041426
let err = ErrorResponse {
14051427
error: message,
1406-
error_type: ErrorType::Validation,
1428+
error_type: ErrorType::Empty,
14071429
};
14081430
let counter = metrics::counter!("te_request_failure", "err" => "validation");
14091431
counter.increment(1);
@@ -1454,6 +1476,8 @@ example = json ! ({"error": "Inference failed", "error_type": "backend"})),
14541476
example = json ! ({"error": "Model is overloaded", "error_type": "overloaded"})),
14551477
(status = 422, description = "Tokenization error", body = ErrorResponse,
14561478
example = json ! ({"error": "Tokenization error", "error_type": "tokenizer"})),
1479+
(status = 400, description = "Batch is empty", body = ErrorResponse,
1480+
example = json ! ({"error": "Batch is empty", "error_type": "empty"})),
14571481
(status = 413, description = "Batch size error", body = ErrorResponse,
14581482
example = json ! ({"error": "Batch size error", "error_type": "validation"})),
14591483
)
@@ -1804,6 +1828,7 @@ impl From<&ErrorType> for StatusCode {
18041828
ErrorType::Overloaded => StatusCode::TOO_MANY_REQUESTS,
18051829
ErrorType::Tokenizer => StatusCode::UNPROCESSABLE_ENTITY,
18061830
ErrorType::Validation => StatusCode::PAYLOAD_TOO_LARGE,
1831+
ErrorType::Empty => StatusCode::BAD_REQUEST,
18071832
}
18081833
}
18091834
}

router/src/lib.rs

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -516,6 +516,7 @@ pub enum ErrorType {
516516
Overloaded,
517517
Validation,
518518
Tokenizer,
519+
Empty,
519520
}
520521

521522
#[derive(Serialize)]

0 commit comments

Comments
 (0)