Skip to content

Commit e268f94

Browse files
committed
feat: enable tdrz
Enables tinydiarize models ggml-org/whisper.cpp#1058
1 parent d07a114 commit e268f94

File tree

6 files changed

+12
-1
lines changed

6 files changed

+12
-1
lines changed

android/src/main/java/com/rnwhisper/WhisperContext.java

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -474,6 +474,8 @@ private int full(int jobId, ReadableMap options, float[] audioData, int audioDat
474474
options.hasKey("speedUp") ? options.getBoolean("speedUp") : false,
475475
// jboolean translate,
476476
options.hasKey("translate") ? options.getBoolean("translate") : false,
477+
// jboolean tdrz_enable,
478+
options.hasKey("tdrzEnable") ? options.getBoolean("tdrzEnable") : false,
477479
// jstring language,
478480
options.hasKey("language") ? options.getString("language") : "auto",
479481
// jstring prompt
@@ -645,6 +647,7 @@ protected static native int fullTranscribe(
645647
int best_of,
646648
boolean speed_up,
647649
boolean translate,
650+
boolean tdrz_enable,
648651
String language,
649652
String prompt,
650653
ProgressCallback progressCallback

android/src/main/jni.cpp

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -232,6 +232,7 @@ Java_com_rnwhisper_WhisperContext_fullTranscribe(
232232
jint best_of,
233233
jboolean speed_up,
234234
jboolean translate,
235+
jboolean tdrz_enable,
235236
jstring language,
236237
jstring prompt,
237238
jobject progress_callback_instance
@@ -256,7 +257,7 @@ Java_com_rnwhisper_WhisperContext_fullTranscribe(
256257
params.print_realtime = false;
257258
params.print_progress = false;
258259
params.print_timestamps = false;
259-
params.print_special = false;
260+
params.print_special = true;
260261
params.translate = translate;
261262
const char *language_chars = env->GetStringUTFChars(language, nullptr);
262263
params.language = language_chars;
@@ -265,6 +266,7 @@ Java_com_rnwhisper_WhisperContext_fullTranscribe(
265266
params.offset_ms = 0;
266267
params.no_context = true;
267268
params.single_segment = false;
269+
params.tdrz_enable = tdrz_enable;
268270

269271
if (max_len > -1) {
270272
params.max_len = max_len;

cpp/whisper.cpp

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3727,6 +3727,7 @@ static void whisper_process_logits(
37273727
// [TDRZ] when tinydiarize is disabled, suppress solm token
37283728
if (params.tdrz_enable == false) {
37293729
logits[vocab.token_solm] = -INFINITY;
3730+
log("[TDRZ] solm token suppressed\n");
37303731
}
37313732

37323733
// suppress task tokens
@@ -4719,6 +4720,7 @@ int whisper_full_with_state(
47194720

47204721
// [TDRZ] record if speaker turn was predicted after current segment
47214722
if (params.tdrz_enable && tokens_cur[i].id == whisper_token_solm(ctx)) {
4723+
log("trdz status=%s\nSpeaker turn happened", params.tdrz_enable ? "enabled" : "disabled");
47224724
speaker_turn_next = true;
47234725
}
47244726

docs/API/README.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -82,6 +82,7 @@ ___
8282
| `prompt?` | `string` | Initial Prompt |
8383
| `speedUp?` | `boolean` | Speed up audio by x2 (reduced accuracy) |
8484
| `temperature?` | `number` | Tnitial decoding temperature |
85+
| `tdrzEnable?` | `boolean` | Enable tinydiarize https://github.com/ggerganov/whisper.cpp/pull/1058 |
8586
| `temperatureInc?` | `number` | - |
8687
| `tokenTimestamps?` | `boolean` | Enable token-level timestamps |
8788
| `translate?` | `boolean` | Translate from source language to english (Default: false) |

ios/RNWhisperContext.mm

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -381,6 +381,7 @@ - (struct whisper_full_params)getParams:(NSDictionary *)options jobId:(int)jobId
381381
params.print_special = false;
382382
params.speed_up = options[@"speedUp"] != nil ? [options[@"speedUp"] boolValue] : false;
383383
params.translate = options[@"translate"] != nil ? [options[@"translate"] boolValue] : false;
384+
params.tdrz_enable = options[@"tdrzEnable"] != nil ? [options[@"tdrzEnable"] boolValue] : false;
384385
params.language = options[@"language"] != nil ? [options[@"language"] UTF8String] : "auto";
385386
params.n_threads = n_threads > 0 ? n_threads : default_n_threads;
386387
params.offset_ms = 0;

src/NativeRNWhisper.ts

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -30,6 +30,8 @@ export type TranscribeOptions = {
3030
bestOf?: number,
3131
/** Speed up audio by x2 (reduced accuracy) */
3232
speedUp?: boolean,
33+
/** Enable tinydiarize (https://github.com/ggerganov/whisper.cpp/pull/1058) */
34+
tdrzEnable?: boolean,
3335
/** Initial Prompt */
3436
prompt?: string,
3537
}

0 commit comments

Comments
 (0)