Skip to content

Commit 194b3b1

Browse files
committed
feat: use llama.cpp ggml-org/llama.cpp#9639
1 parent f87b2e5 commit 194b3b1

14 files changed

+491
-137
lines changed

example/lib/main.dart

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -480,7 +480,7 @@ class _MyAppState extends State<MyApp> {
480480
if (_tool != null)
481481
Tool(
482482
name: _tool!.name,
483-
jsonSchema: jsonEncode(_tool!.parametersAsString),
483+
jsonSchema: _tool!.parametersAsString,
484484
),
485485
],
486486
maxTokens: _maxTokens.round(),
@@ -516,11 +516,11 @@ class _MyAppState extends State<MyApp> {
516516
print(
517517
'Download progress: $downloadProgress, Load progress: $loadProgress');
518518
});
519-
}, (response, done) {
519+
}, (response, responseJson, done) {
520520
setState(() {
521521
_mlcDownloadProgress = null;
522522
_mlcLoadProgress = null;
523-
latestResult = response;
523+
latestResult = responseJson;
524524
if (done) {
525525
_runningRequestId = null;
526526
}
@@ -549,7 +549,7 @@ class _MyAppState extends State<MyApp> {
549549

550550
_inferenceStartTime = DateTime.now();
551551

552-
int requestId = await fllamaChat(request, (response, done) {
552+
int requestId = await fllamaChat(request, (response, responseJson, done) {
553553
setState(() {
554554
latestResult = response;
555555
fllamaTokenize(FllamaTokenizeRequest(

example/macos/Podfile.lock

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -26,7 +26,7 @@ EXTERNAL SOURCES:
2626

2727
SPEC CHECKSUMS:
2828
file_selector_macos: 585232b688707857504f9cb5f985a7c97fe4dd30
29-
fllama: 70eecc4bce57bde96fc985659224585564a2bfa2
29+
fllama: 6136327ecf6807fee16e195977b999e40b7c3a2d
3030
FlutterMacOS: 8f6f14fa908a6fb3fba0cd85dbd81ec4b251fb24
3131
shared_preferences_foundation: 9e1978ff2562383bd5676f64ec4e9aa8fa06a6f7
3232

example/pubspec.lock

Lines changed: 35 additions & 35 deletions
Original file line numberDiff line numberDiff line change
@@ -5,42 +5,42 @@ packages:
55
dependency: transitive
66
description:
77
name: async
8-
sha256: "947bfcf187f74dbc5e146c9eb9c0f10c9f8b30743e341481c1e2ed3ecc18c20c"
8+
sha256: d2872f9c19731c2e5f10444b14686eb7cc85c76274bd6c16e1816bff9a3bab63
99
url: "https://pub.dev"
1010
source: hosted
11-
version: "2.11.0"
11+
version: "2.12.0"
1212
boolean_selector:
1313
dependency: transitive
1414
description:
1515
name: boolean_selector
16-
sha256: "6cfb5af12253eaf2b368f07bacc5a80d1301a071c73360d746b7f2e32d762c66"
16+
sha256: "8aab1771e1243a5063b8b0ff68042d67334e3feab9e95b9490f9a6ebf73b42ea"
1717
url: "https://pub.dev"
1818
source: hosted
19-
version: "2.1.1"
19+
version: "2.1.2"
2020
characters:
2121
dependency: transitive
2222
description:
2323
name: characters
24-
sha256: "04a925763edad70e8443c99234dc3328f442e811f1d8fd1a72f1c8ad0f69a605"
24+
sha256: f71061c654a3380576a52b451dd5532377954cf9dbd272a78fc8479606670803
2525
url: "https://pub.dev"
2626
source: hosted
27-
version: "1.3.0"
27+
version: "1.4.0"
2828
clock:
2929
dependency: transitive
3030
description:
3131
name: clock
32-
sha256: cb6d7f03e1de671e34607e909a7213e31d7752be4fb66a86d29fe1eb14bfb5cf
32+
sha256: fddb70d9b5277016c77a80201021d40a2247104d9f4aa7bab7157b7e3f05b84b
3333
url: "https://pub.dev"
3434
source: hosted
35-
version: "1.1.1"
35+
version: "1.1.2"
3636
collection:
3737
dependency: transitive
3838
description:
3939
name: collection
40-
sha256: a1ace0a119f20aabc852d165077c036cd864315bd99b7eaa10a60100341941bf
40+
sha256: "2f5709ae4d3d59dd8f7cd309b4e023046b57d8a6c82130785d2b0e5868084e76"
4141
url: "https://pub.dev"
4242
source: hosted
43-
version: "1.19.0"
43+
version: "1.19.1"
4444
cross_file:
4545
dependency: transitive
4646
description:
@@ -69,10 +69,10 @@ packages:
6969
dependency: transitive
7070
description:
7171
name: fake_async
72-
sha256: "511392330127add0b769b75a987850d136345d9227c6b94c96a04cf4a391bf78"
72+
sha256: "6a95e56b2449df2273fd8c45a662d6947ce1ebb7aafe80e550a3f68297f3cacc"
7373
url: "https://pub.dev"
7474
source: hosted
75-
version: "1.3.1"
75+
version: "1.3.2"
7676
ffi:
7777
dependency: transitive
7878
description:
@@ -244,18 +244,18 @@ packages:
244244
dependency: transitive
245245
description:
246246
name: leak_tracker
247-
sha256: "7bb2830ebd849694d1ec25bf1f44582d6ac531a57a365a803a6034ff751d2d06"
247+
sha256: c35baad643ba394b40aac41080300150a4f08fd0fd6a10378f8f7c6bc161acec
248248
url: "https://pub.dev"
249249
source: hosted
250-
version: "10.0.7"
250+
version: "10.0.8"
251251
leak_tracker_flutter_testing:
252252
dependency: transitive
253253
description:
254254
name: leak_tracker_flutter_testing
255-
sha256: "9491a714cca3667b60b5c420da8217e6de0d1ba7a5ec322fab01758f6998f379"
255+
sha256: f8b613e7e6a13ec79cfdc0e97638fddb3ab848452eff057653abd3edba760573
256256
url: "https://pub.dev"
257257
source: hosted
258-
version: "3.0.8"
258+
version: "3.0.9"
259259
leak_tracker_testing:
260260
dependency: transitive
261261
description:
@@ -276,10 +276,10 @@ packages:
276276
dependency: transitive
277277
description:
278278
name: matcher
279-
sha256: d2323aa2060500f906aa31a895b4030b6da3ebdcc5619d14ce1aada65cd161cb
279+
sha256: dc58c723c3c24bf8d3e2d3ad3f2f9d7bd9cf43ec6feaa64181775e60190153f2
280280
url: "https://pub.dev"
281281
source: hosted
282-
version: "0.12.16+1"
282+
version: "0.12.17"
283283
material_color_utilities:
284284
dependency: transitive
285285
description:
@@ -300,10 +300,10 @@ packages:
300300
dependency: transitive
301301
description:
302302
name: path
303-
sha256: "087ce49c3f0dc39180befefc60fdb4acd8f8620e5682fe2476afd0b3688bb4af"
303+
sha256: "75cca69d1490965be98c73ceaea117e8a04dd21217b37b292c9ddbec0d955bc5"
304304
url: "https://pub.dev"
305305
source: hosted
306-
version: "1.9.0"
306+
version: "1.9.1"
307307
path_provider_linux:
308308
dependency: transitive
309309
description:
@@ -409,10 +409,10 @@ packages:
409409
dependency: transitive
410410
description:
411411
name: source_span
412-
sha256: "53e943d4206a5e30df338fd4c6e7a077e02254531b138a15aec3bd143c1a8b3c"
412+
sha256: "254ee5351d6cb365c859e20ee823c3bb479bf4a293c22d17a9f1bf144ce86f7c"
413413
url: "https://pub.dev"
414414
source: hosted
415-
version: "1.10.0"
415+
version: "1.10.1"
416416
sprintf:
417417
dependency: transitive
418418
description:
@@ -425,42 +425,42 @@ packages:
425425
dependency: transitive
426426
description:
427427
name: stack_trace
428-
sha256: "9f47fd3630d76be3ab26f0ee06d213679aa425996925ff3feffdec504931c377"
428+
sha256: "8b27215b45d22309b5cddda1aa2b19bdfec9df0e765f2de506401c071d38d1b1"
429429
url: "https://pub.dev"
430430
source: hosted
431-
version: "1.12.0"
431+
version: "1.12.1"
432432
stream_channel:
433433
dependency: transitive
434434
description:
435435
name: stream_channel
436-
sha256: ba2aa5d8cc609d96bbb2899c28934f9e1af5cddbd60a827822ea467161eb54e7
436+
sha256: "969e04c80b8bcdf826f8f16579c7b14d780458bd97f56d107d3950fdbeef059d"
437437
url: "https://pub.dev"
438438
source: hosted
439-
version: "2.1.2"
439+
version: "2.1.4"
440440
string_scanner:
441441
dependency: transitive
442442
description:
443443
name: string_scanner
444-
sha256: "688af5ed3402a4bde5b3a6c15fd768dbf2621a614950b17f04626c431ab3c4c3"
444+
sha256: "921cd31725b72fe181906c6a94d987c78e3b98c2e205b397ea399d4054872b43"
445445
url: "https://pub.dev"
446446
source: hosted
447-
version: "1.3.0"
447+
version: "1.4.1"
448448
term_glyph:
449449
dependency: transitive
450450
description:
451451
name: term_glyph
452-
sha256: a29248a84fbb7c79282b40b8c72a1209db169a2e0542bce341da992fe1bc7e84
452+
sha256: "7f554798625ea768a7518313e58f83891c7f5024f88e46e7182a4558850a4b8e"
453453
url: "https://pub.dev"
454454
source: hosted
455-
version: "1.2.1"
455+
version: "1.2.2"
456456
test_api:
457457
dependency: transitive
458458
description:
459459
name: test_api
460-
sha256: "664d3a9a64782fcdeb83ce9c6b39e78fd2971d4e37827b9b06c3aa1edc5e760c"
460+
sha256: fb31f383e2ee25fbbfe06b40fe21e1e458d14080e3c67e7ba0acfde4df4e0bbd
461461
url: "https://pub.dev"
462462
source: hosted
463-
version: "0.7.3"
463+
version: "0.7.4"
464464
textwrap:
465465
dependency: transitive
466466
description:
@@ -497,10 +497,10 @@ packages:
497497
dependency: transitive
498498
description:
499499
name: vm_service
500-
sha256: f6be3ed8bd01289b34d679c2b62226f63c0e69f9fd2e50a6b3c1c729a961041b
500+
sha256: "0968250880a6c5fe7edc067ed0a13d4bae1577fe2771dcf3010d52c4a9d3ca14"
501501
url: "https://pub.dev"
502502
source: hosted
503-
version: "14.3.0"
503+
version: "14.3.1"
504504
web:
505505
dependency: transitive
506506
description:
@@ -526,5 +526,5 @@ packages:
526526
source: hosted
527527
version: "1.0.4"
528528
sdks:
529-
dart: ">=3.4.0 <4.0.0"
529+
dart: ">=3.7.0-0 <4.0.0"
530530
flutter: ">=3.19.0"

lib/fllama_io.dart

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,7 @@ import 'package:fllama/io/fllama_bindings_generated.dart';
1010
import 'package:fllama/io/fllama_io_helpers.dart';
1111
import 'package:fllama/misc/openai.dart';
1212

13-
typedef FllamaInferenceCallback = void Function(String response, bool done);
13+
typedef FllamaInferenceCallback = void Function(String response, String openaiResponseJsonString, bool done);
1414
typedef FllamaMlcLoadCallback = void Function(
1515
double downloadProgress, double loadProgress);
1616

lib/fllama_unimplemented.dart

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,7 @@ import 'package:fllama/fllama.dart';
55
import 'package:fllama/fllama_io.dart';
66
import 'package:fllama/io/fllama_io_helpers.dart';
77

8-
typedef FllamaInferenceCallback = void Function(String response, bool done);
8+
typedef FllamaInferenceCallback = void Function(String response, String openaiResponseJsonString, bool done);
99
typedef FllamaMlcLoadCallback = void Function(
1010
double downloadProgress, double loadProgress);
1111

lib/fllama_universal.dart

Lines changed: 19 additions & 36 deletions
Original file line numberDiff line numberDiff line change
@@ -41,6 +41,7 @@ class FllamaInferenceRequest {
4141
int maxTokens;
4242
String modelPath;
4343
String? modelMmprojPath;
44+
String? openAiRequestJsonString;
4445
int numGpuLayers;
4546

4647
/// Number of threads to use for inference.
@@ -84,6 +85,7 @@ class FllamaInferenceRequest {
8485
this.modelMmprojPath,
8586
this.numThreads = 2,
8687
this.logger,
88+
this.openAiRequestJsonString,
8789
});
8890
}
8991

@@ -129,23 +131,6 @@ Future<int> fllamaChat(
129131
request: request,
130132
);
131133

132-
final String grammar;
133-
if (request.tools.isNotEmpty) {
134-
if (request.tools.length > 1) {
135-
// ignore: avoid_print
136-
print(
137-
'[fllama] WARNING: More than one tool was specified. No grammar will be enforced. (via fllamaChat)');
138-
grammar = '';
139-
} else {
140-
grammar = request.tools.first.grammar;
141-
// ignore: avoid_print
142-
print('[fllama] Grammar to be enforced: $grammar');
143-
}
144-
} else {
145-
// ignore: avoid_print
146-
print('[fllama] No tools were specified. No grammar will be enforced.');
147-
grammar = '';
148-
}
149134
final inferenceRequest = FllamaInferenceRequest(
150135
contextSize: request.contextSize,
151136
input: text,
@@ -157,10 +142,12 @@ Future<int> fllamaChat(
157142
penaltyRepeat: request.presencePenalty,
158143
temperature: request.temperature,
159144
topP: request.topP,
160-
grammar: grammar,
145+
grammar: '', // deprecated, llama.cpp handles tools internally now
161146
logger: request.logger,
162147
eosToken: eosToken,
148+
openAiRequestJsonString: request.toJsonString(),
163149
);
150+
164151
return fllamaInference(inferenceRequest, callback);
165152
}
166153

@@ -185,20 +172,20 @@ String fllamaApplyChatTemplate({
185172
}
186173

187174
if (request.tools.isNotEmpty) {
188-
final tools = request.tools.map((tool) {
189-
return tool.typescriptDefinition;
190-
}).join('\n\n');
191-
jsonMessages.insert(0, {
192-
'role': 'system',
193-
'content': '''
194-
You have access to the following functions:
195-
$tools
175+
// final tools = request.tools.map((tool) {
176+
// return tool.typescriptDefinition;
177+
// }).join('\n\n');
178+
// jsonMessages.insert(0, {
179+
// 'role': 'system',
180+
// 'content': '''
181+
// You have access to the following functions:
182+
// $tools
196183

197-
You are a helpful assistant with tool calling capabilities.
198-
When you receive a tool call response, use the output to format an answer to the orginal use question.
199-
If you are using tools, respond in the format {"name": function name, "parameters": dictionary of function arguments}. If multiple tools are used, use array format.
200-
''',
201-
});
184+
// You are a helpful assistant with tool calling capabilities.
185+
// When you receive a tool call response, use the output to format an answer to the orginal use question.
186+
// If you are using tools, respond in the format {"name": function name, "parameters": dictionary of function arguments}. If multiple tools are used, use array format.
187+
// ''',
188+
// });
202189
}
203190

204191
if (jsonMessages.isEmpty) {
@@ -261,22 +248,18 @@ If you are using tools, respond in the format {"name": function name, "parameter
261248
if (chatTemplate != chatMlTemplate) {
262249
final llamaChatTemplate = Llama3ChatTemplate();
263250
// ignore: avoid_print
264-
265251
if (llamaChatTemplate.matches(chatTemplate)) {
266252
// ex. bartowski's llama 3.2 8B cannot be parsed, but it is
267253
// desirable to use. ChatML as a fallback breaks it. First response
268254
// generally works, then it fails.
269-
print(
270-
'[fllama] Using Llama 3 chat template as a fallback because the chat template could not be applied. Exception: $e. Chat template: $chatTemplate. Messages: $jsonMessages.');
255+
271256
return fllamaApplyChatTemplate(
272257
chatTemplate: llamaChatTemplate.template,
273258
request: request,
274259
bosToken: llamaChatTemplate.bosToken,
275260
eosToken: llamaChatTemplate.eosToken,
276261
);
277262
}
278-
print(
279-
'[fllama] Using ChatML because the chat template could not be applied. Exception: $e. Chat template: $chatTemplate. Messages: $jsonMessages.');
280263
return fllamaApplyChatTemplate(
281264
chatTemplate: chatMlTemplate,
282265
request: request,

0 commit comments

Comments
 (0)