Skip to content

Commit 8962422

Browse files
authored
llama-bench : add JSONL (NDJSON) output mode (ggml-org#9288)
* llama-bench : add JSONL (NDJSON) output mode * llama-bench : update usage docs
1 parent b69a480 commit 8962422

File tree

2 files changed

+124
-78
lines changed

2 files changed

+124
-78
lines changed

examples/llama-bench/README.md

Lines changed: 43 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,8 @@ Performance testing tool for llama.cpp.
1414
1. [Markdown](#markdown)
1515
2. [CSV](#csv)
1616
3. [JSON](#json)
17-
4. [SQL](#sql)
17+
4. [JSONL](#jsonl)
18+
5. [SQL](#sql)
1819

1920
## Syntax
2021

@@ -23,27 +24,34 @@ usage: ./llama-bench [options]
2324
2425
options:
2526
-h, --help
26-
-m, --model <filename> (default: models/7B/ggml-model-q4_0.gguf)
27-
-p, --n-prompt <n> (default: 512)
28-
-n, --n-gen <n> (default: 128)
29-
-pg <pp,tg> (default: 512,128)
30-
-b, --batch-size <n> (default: 2048)
31-
-ub, --ubatch-size <n> (default: 512)
32-
-ctk, --cache-type-k <t> (default: f16)
33-
-ctv, --cache-type-v <t> (default: f16)
34-
-t, --threads <n> (default: 16)
35-
-ngl, --n-gpu-layers <n> (default: 99)
36-
-sm, --split-mode <none|layer|row> (default: layer)
37-
-mg, --main-gpu <i> (default: 0)
38-
-nkvo, --no-kv-offload <0|1> (default: 0)
39-
-fa, --flash-attn <0|1> (default: 0)
40-
-mmp, --mmap <0|1> (default: 1)
41-
--numa <distribute|isolate|numactl> (default: disabled)
42-
-embd, --embeddings <0|1> (default: 0)
43-
-ts, --tensor-split <ts0/ts1/..> (default: 0)
44-
-r, --repetitions <n> (default: 5)
45-
-o, --output <csv|json|md|sql> (default: md)
46-
-v, --verbose (default: 0)
27+
-m, --model <filename> (default: models/7B/ggml-model-q4_0.gguf)
28+
-p, --n-prompt <n> (default: 512)
29+
-n, --n-gen <n> (default: 128)
30+
-pg <pp,tg> (default: )
31+
-b, --batch-size <n> (default: 2048)
32+
-ub, --ubatch-size <n> (default: 512)
33+
-ctk, --cache-type-k <t> (default: f16)
34+
-ctv, --cache-type-v <t> (default: f16)
35+
-t, --threads <n> (default: 8)
36+
-C, --cpu-mask <hex,hex> (default: 0x0)
37+
--cpu-strict <0|1> (default: 0)
38+
--poll <0...100> (default: 50)
39+
-ngl, --n-gpu-layers <n> (default: 99)
40+
-rpc, --rpc <rpc_servers> (default: )
41+
-sm, --split-mode <none|layer|row> (default: layer)
42+
-mg, --main-gpu <i> (default: 0)
43+
-nkvo, --no-kv-offload <0|1> (default: 0)
44+
-fa, --flash-attn <0|1> (default: 0)
45+
-mmp, --mmap <0|1> (default: 1)
46+
--numa <distribute|isolate|numactl> (default: disabled)
47+
-embd, --embeddings <0|1> (default: 0)
48+
-ts, --tensor-split <ts0/ts1/..> (default: 0)
49+
-r, --repetitions <n> (default: 5)
50+
--prio <0|1|2|3> (default: 0)
51+
--delay <0...N> (seconds) (default: 0)
52+
-o, --output <csv|json|jsonl|md|sql> (default: md)
53+
-oe, --output-err <csv|json|jsonl|md|sql> (default: none)
54+
-v, --verbose (default: 0)
4755
4856
Multiple values can be given for each parameter by separating them with ',' or by specifying the parameter multiple times.
4957
```
@@ -238,6 +246,19 @@ $ ./llama-bench -o json
238246
]
239247
```
240248

249+
250+
### JSONL
251+
252+
```sh
253+
$ ./llama-bench -o jsonl
254+
```
255+
256+
```json lines
257+
{"build_commit":"3469684","build_number":1275,"cuda":true,"metal":false,"gpu_blas":true,"blas":true,"cpu_info":"13th Gen Intel(R) Core(TM) i9-13900K","gpu_info":"NVIDIA GeForce RTX 3090 Ti","model_filename":"models/7B/ggml-model-q4_0.gguf","model_type":"llama 7B mostly Q4_0","model_size":3825065984,"model_n_params":6738415616,"n_batch":512,"n_threads":16,"f16_kv":true,"n_gpu_layers":99,"main_gpu":0,"mul_mat_q":true,"tensor_split":"0.00","n_prompt":512,"n_gen":0,"test_time":"2023-09-23T12:09:57Z","avg_ns":212365953,"stddev_ns":985423,"avg_ts":2410.974041,"stddev_ts":11.163766,"samples_ns":[213837238,211635853,212328053,211329715,212698907],"samples_ts":[2394.34,2419.25,2411.36,2422.75,2407.16]}
258+
{"build_commit":"3469684","build_number":1275,"cuda":true,"metal":false,"gpu_blas":true,"blas":true,"cpu_info":"13th Gen Intel(R) Core(TM) i9-13900K","gpu_info":"NVIDIA GeForce RTX 3090 Ti","model_filename":"models/7B/ggml-model-q4_0.gguf","model_type":"llama 7B mostly Q4_0","model_size":3825065984,"model_n_params":6738415616,"n_batch":512,"n_threads":16,"f16_kv":true,"n_gpu_layers":99,"main_gpu":0,"mul_mat_q":true,"tensor_split":"0.00","n_prompt":0,"n_gen":128,"test_time":"2023-09-23T12:09:59Z","avg_ns":977425219,"stddev_ns":9268593,"avg_ts":130.965708,"stddev_ts":1.238924,"samples_ns":[984472709,974901233,989474741,970729355,967548060],"samples_ts":[130.019,131.295,129.362,131.86,132.293]}
259+
```
260+
261+
241262
### SQL
242263

243264
SQL output is suitable for importing into a SQLite database. The output can be piped into the `sqlite3` command line tool to add the results to a database.

examples/llama-bench/llama-bench.cpp

Lines changed: 81 additions & 56 deletions
Original file line numberDiff line numberDiff line change
@@ -171,13 +171,14 @@ static std::string get_gpu_info() {
171171
}
172172

173173
// command line params
174-
enum output_formats {NONE, CSV, JSON, MARKDOWN, SQL};
174+
enum output_formats {NONE, CSV, JSON, JSONL, MARKDOWN, SQL};
175175

176176
static const char * output_format_str(output_formats format) {
177177
switch (format) {
178178
case NONE: return "none";
179179
case CSV: return "csv";
180180
case JSON: return "json";
181+
case JSONL: return "jsonl";
181182
case MARKDOWN: return "md";
182183
case SQL: return "sql";
183184
default: GGML_ABORT("invalid output format");
@@ -191,6 +192,8 @@ static bool output_format_from_str(const std::string & s, output_formats & forma
191192
format = CSV;
192193
} else if (s == "json") {
193194
format = JSON;
195+
} else if (s == "jsonl") {
196+
format = JSONL;
194197
} else if (s == "md") {
195198
format = MARKDOWN;
196199
} else if (s == "sql") {
@@ -283,34 +286,34 @@ static void print_usage(int /* argc */, char ** argv) {
283286
printf("\n");
284287
printf("options:\n");
285288
printf(" -h, --help\n");
286-
printf(" -m, --model <filename> (default: %s)\n", join(cmd_params_defaults.model, ",").c_str());
287-
printf(" -p, --n-prompt <n> (default: %s)\n", join(cmd_params_defaults.n_prompt, ",").c_str());
288-
printf(" -n, --n-gen <n> (default: %s)\n", join(cmd_params_defaults.n_gen, ",").c_str());
289-
printf(" -pg <pp,tg> (default: %s)\n", join(transform_to_str(cmd_params_defaults.n_pg, pair_str), ",").c_str());
290-
printf(" -b, --batch-size <n> (default: %s)\n", join(cmd_params_defaults.n_batch, ",").c_str());
291-
printf(" -ub, --ubatch-size <n> (default: %s)\n", join(cmd_params_defaults.n_ubatch, ",").c_str());
292-
printf(" -ctk, --cache-type-k <t> (default: %s)\n", join(transform_to_str(cmd_params_defaults.type_k, ggml_type_name), ",").c_str());
293-
printf(" -ctv, --cache-type-v <t> (default: %s)\n", join(transform_to_str(cmd_params_defaults.type_v, ggml_type_name), ",").c_str());
294-
printf(" -t, --threads <n> (default: %s)\n", join(cmd_params_defaults.n_threads, ",").c_str());
295-
printf(" -C, --cpu-mask <hex,hex> (default: %s)\n", join(cmd_params_defaults.cpu_mask, ",").c_str());
296-
printf(" --cpu-strict <0|1> (default: %s)\n", join(cmd_params_defaults.cpu_strict, ",").c_str());
297-
printf(" --poll <0...100> (default: %s)\n", join(cmd_params_defaults.poll, ",").c_str());
298-
printf(" -ngl, --n-gpu-layers <n> (default: %s)\n", join(cmd_params_defaults.n_gpu_layers, ",").c_str());
299-
printf(" -rpc, --rpc <rpc_servers> (default: %s)\n", join(cmd_params_defaults.rpc_servers, ",").c_str());
300-
printf(" -sm, --split-mode <none|layer|row> (default: %s)\n", join(transform_to_str(cmd_params_defaults.split_mode, split_mode_str), ",").c_str());
301-
printf(" -mg, --main-gpu <i> (default: %s)\n", join(cmd_params_defaults.main_gpu, ",").c_str());
302-
printf(" -nkvo, --no-kv-offload <0|1> (default: %s)\n", join(cmd_params_defaults.no_kv_offload, ",").c_str());
303-
printf(" -fa, --flash-attn <0|1> (default: %s)\n", join(cmd_params_defaults.flash_attn, ",").c_str());
304-
printf(" -mmp, --mmap <0|1> (default: %s)\n", join(cmd_params_defaults.use_mmap, ",").c_str());
305-
printf(" --numa <distribute|isolate|numactl> (default: disabled)\n");
306-
printf(" -embd, --embeddings <0|1> (default: %s)\n", join(cmd_params_defaults.embeddings, ",").c_str());
307-
printf(" -ts, --tensor-split <ts0/ts1/..> (default: 0)\n");
308-
printf(" -r, --repetitions <n> (default: %d)\n", cmd_params_defaults.reps);
309-
printf(" --prio <0|1|2|3> (default: %d)\n", cmd_params_defaults.prio);
310-
printf(" --delay <0...N> (seconds) (default: %d)\n", cmd_params_defaults.delay);
311-
printf(" -o, --output <csv|json|md|sql> (default: %s)\n", output_format_str(cmd_params_defaults.output_format));
312-
printf(" -oe, --output-err <csv|json|md|sql> (default: %s)\n", output_format_str(cmd_params_defaults.output_format_stderr));
313-
printf(" -v, --verbose (default: %s)\n", cmd_params_defaults.verbose ? "1" : "0");
289+
printf(" -m, --model <filename> (default: %s)\n", join(cmd_params_defaults.model, ",").c_str());
290+
printf(" -p, --n-prompt <n> (default: %s)\n", join(cmd_params_defaults.n_prompt, ",").c_str());
291+
printf(" -n, --n-gen <n> (default: %s)\n", join(cmd_params_defaults.n_gen, ",").c_str());
292+
printf(" -pg <pp,tg> (default: %s)\n", join(transform_to_str(cmd_params_defaults.n_pg, pair_str), ",").c_str());
293+
printf(" -b, --batch-size <n> (default: %s)\n", join(cmd_params_defaults.n_batch, ",").c_str());
294+
printf(" -ub, --ubatch-size <n> (default: %s)\n", join(cmd_params_defaults.n_ubatch, ",").c_str());
295+
printf(" -ctk, --cache-type-k <t> (default: %s)\n", join(transform_to_str(cmd_params_defaults.type_k, ggml_type_name), ",").c_str());
296+
printf(" -ctv, --cache-type-v <t> (default: %s)\n", join(transform_to_str(cmd_params_defaults.type_v, ggml_type_name), ",").c_str());
297+
printf(" -t, --threads <n> (default: %s)\n", join(cmd_params_defaults.n_threads, ",").c_str());
298+
printf(" -C, --cpu-mask <hex,hex> (default: %s)\n", join(cmd_params_defaults.cpu_mask, ",").c_str());
299+
printf(" --cpu-strict <0|1> (default: %s)\n", join(cmd_params_defaults.cpu_strict, ",").c_str());
300+
printf(" --poll <0...100> (default: %s)\n", join(cmd_params_defaults.poll, ",").c_str());
301+
printf(" -ngl, --n-gpu-layers <n> (default: %s)\n", join(cmd_params_defaults.n_gpu_layers, ",").c_str());
302+
printf(" -rpc, --rpc <rpc_servers> (default: %s)\n", join(cmd_params_defaults.rpc_servers, ",").c_str());
303+
printf(" -sm, --split-mode <none|layer|row> (default: %s)\n", join(transform_to_str(cmd_params_defaults.split_mode, split_mode_str), ",").c_str());
304+
printf(" -mg, --main-gpu <i> (default: %s)\n", join(cmd_params_defaults.main_gpu, ",").c_str());
305+
printf(" -nkvo, --no-kv-offload <0|1> (default: %s)\n", join(cmd_params_defaults.no_kv_offload, ",").c_str());
306+
printf(" -fa, --flash-attn <0|1> (default: %s)\n", join(cmd_params_defaults.flash_attn, ",").c_str());
307+
printf(" -mmp, --mmap <0|1> (default: %s)\n", join(cmd_params_defaults.use_mmap, ",").c_str());
308+
printf(" --numa <distribute|isolate|numactl> (default: disabled)\n");
309+
printf(" -embd, --embeddings <0|1> (default: %s)\n", join(cmd_params_defaults.embeddings, ",").c_str());
310+
printf(" -ts, --tensor-split <ts0/ts1/..> (default: 0)\n");
311+
printf(" -r, --repetitions <n> (default: %d)\n", cmd_params_defaults.reps);
312+
printf(" --prio <0|1|2|3> (default: %d)\n", cmd_params_defaults.prio);
313+
printf(" --delay <0...N> (seconds) (default: %d)\n", cmd_params_defaults.delay);
314+
printf(" -o, --output <csv|json|jsonl|md|sql> (default: %s)\n", output_format_str(cmd_params_defaults.output_format));
315+
printf(" -oe, --output-err <csv|json|jsonl|md|sql> (default: %s)\n", output_format_str(cmd_params_defaults.output_format_stderr));
316+
printf(" -v, --verbose (default: %s)\n", cmd_params_defaults.verbose ? "1" : "0");
314317
printf("\n");
315318
printf("Multiple values can be given for each parameter by separating them with ',' or by specifying the parameter multiple times.\n");
316319
}
@@ -1074,37 +1077,38 @@ struct csv_printer : public printer {
10741077
}
10751078
};
10761079

1077-
struct json_printer : public printer {
1078-
bool first = true;
10791080

1080-
static std::string escape_json(const std::string & value) {
1081-
std::string escaped;
1082-
for (auto c : value) {
1083-
if (c == '"') {
1084-
escaped += "\\\"";
1085-
} else if (c == '\\') {
1086-
escaped += "\\\\";
1087-
} else if (c <= 0x1f) {
1088-
char buf[8];
1089-
snprintf(buf, sizeof(buf), "\\u%04x", c);
1090-
escaped += buf;
1091-
} else {
1092-
escaped += c;
1093-
}
1081+
static std::string escape_json(const std::string & value) {
1082+
std::string escaped;
1083+
for (auto c : value) {
1084+
if (c == '"') {
1085+
escaped += "\\\"";
1086+
} else if (c == '\\') {
1087+
escaped += "\\\\";
1088+
} else if (c <= 0x1f) {
1089+
char buf[8];
1090+
snprintf(buf, sizeof(buf), "\\u%04x", c);
1091+
escaped += buf;
1092+
} else {
1093+
escaped += c;
10941094
}
1095-
return escaped;
10961095
}
1096+
return escaped;
1097+
}
10971098

1098-
static std::string format_value(const std::string & field, const std::string & value) {
1099-
switch (test::get_field_type(field)) {
1100-
case test::STRING:
1101-
return "\"" + escape_json(value) + "\"";
1102-
case test::BOOL:
1103-
return value == "0" ? "false" : "true";
1104-
default:
1105-
return value;
1106-
}
1099+
static std::string format_json_value(const std::string & field, const std::string & value) {
1100+
switch (test::get_field_type(field)) {
1101+
case test::STRING:
1102+
return "\"" + escape_json(value) + "\"";
1103+
case test::BOOL:
1104+
return value == "0" ? "false" : "true";
1105+
default:
1106+
return value;
11071107
}
1108+
}
1109+
1110+
struct json_printer : public printer {
1111+
bool first = true;
11081112

11091113
void print_header(const cmd_params & params) override {
11101114
fprintf(fout, "[\n");
@@ -1114,7 +1118,7 @@ struct json_printer : public printer {
11141118
void print_fields(const std::vector<std::string> & fields, const std::vector<std::string> & values) {
11151119
assert(fields.size() == values.size());
11161120
for (size_t i = 0; i < fields.size(); i++) {
1117-
fprintf(fout, " \"%s\": %s,\n", fields.at(i).c_str(), format_value(fields.at(i), values.at(i)).c_str());
1121+
fprintf(fout, " \"%s\": %s,\n", fields.at(i).c_str(), format_json_value(fields.at(i), values.at(i)).c_str());
11181122
}
11191123
}
11201124

@@ -1137,6 +1141,25 @@ struct json_printer : public printer {
11371141
}
11381142
};
11391143

1144+
1145+
struct jsonl_printer : public printer {
1146+
void print_fields(const std::vector<std::string> & fields, const std::vector<std::string> & values) {
1147+
assert(fields.size() == values.size());
1148+
for (size_t i = 0; i < fields.size(); i++) {
1149+
fprintf(fout, "\"%s\": %s, ", fields.at(i).c_str(), format_json_value(fields.at(i), values.at(i)).c_str());
1150+
}
1151+
}
1152+
1153+
void print_test(const test & t) override {
1154+
fprintf(fout, "{");
1155+
print_fields(test::get_fields(), t.get_values());
1156+
fprintf(fout, "\"samples_ns\": [ %s ],", join(t.samples_ns, ", ").c_str());
1157+
fprintf(fout, "\"samples_ts\": [ %s ]", join(t.get_ts(), ", ").c_str());
1158+
fprintf(fout, "}\n");
1159+
fflush(fout);
1160+
}
1161+
};
1162+
11401163
struct markdown_printer : public printer {
11411164
std::vector<std::string> fields;
11421165

@@ -1437,6 +1460,8 @@ static std::unique_ptr<printer> create_printer(output_formats format) {
14371460
return std::unique_ptr<printer>(new csv_printer());
14381461
case JSON:
14391462
return std::unique_ptr<printer>(new json_printer());
1463+
case JSONL:
1464+
return std::unique_ptr<printer>(new jsonl_printer());
14401465
case MARKDOWN:
14411466
return std::unique_ptr<printer>(new markdown_printer());
14421467
case SQL:

0 commit comments

Comments
 (0)