28
28
public class GeminiUtils {
29
29
private static final ExecutorService executor = Executors .newSingleThreadExecutor ();
30
30
private static final String BASE_API_URL = "https://generativelanguage.googleapis.com/v1beta/models/" ;
31
- private static final String GEMINI_MODEL = "gemini-2.5-flash-preview-04-17" ;
31
+ private static final String SUMMARIZATION_GEMINI_MODEL = "gemini-2.0-flash" ;
32
+ private static final String TRANSCRIPTION_GEMINI_MODEL = "gemini-2.5-flash-preview-04-17" ;
32
33
private static final String ACTION = ":generateContent?key=" ;
33
34
private static final AtomicReference <Future <?>> currentTask = new AtomicReference <>(null );
34
35
private static final Handler mainThreadHandler = new Handler (Looper .getMainLooper ());
@@ -46,7 +47,7 @@ public static void getVideoSummary(@NonNull String videoUrl, @NonNull String api
46
47
String langName = getLanguageName ();
47
48
String prompt = "Summarize the key points of this video in " + langName + ". Skip any preamble, intro phrases, or explanations — output only the summary." ;
48
49
Logger .printDebug (() -> "GeminiUtils (SUMMARY): Sending Prompt: " + prompt );
49
- generateContent (videoUrl , apiKey , prompt , callback );
50
+ generateContent (videoUrl , apiKey , prompt , SUMMARIZATION_GEMINI_MODEL , callback );
50
51
}
51
52
52
53
/**
@@ -61,7 +62,7 @@ public static void getVideoTranscription(@NonNull String videoUrl, @NonNull Stri
61
62
String langName = getLanguageName ();
62
63
String prompt = "Transcribe this video precisely in " + langName + ", including spoken words, written words in the video and significant sounds. Provide timestamps for each segment in the format [HH:MM:SS.mmm - HH:MM:SS.mmm]: Text. Skip any preamble, intro phrases, or explanations — output only the transcription." ;
63
64
Logger .printDebug (() -> "GeminiUtils (TRANSCRIPTION): Sending Prompt: " + prompt );
64
- generateContent (videoUrl , apiKey , prompt , callback );
65
+ generateContent (videoUrl , apiKey , prompt , TRANSCRIPTION_GEMINI_MODEL , callback );
65
66
}
66
67
67
68
/**
@@ -79,7 +80,7 @@ public static void getVideoTranscription(@NonNull String videoUrl, @NonNull Stri
79
80
* <li>Output only the translated JSON data, without any extra text, explanations, or formatting.</li>
80
81
* </ul>
81
82
* <p>
82
- * It then calls the internal {@link #generateContent(String, String, String, Callback)} method,
83
+ * It then calls the internal {@link #generateContent(String, String, String, String, Callback)} method,
83
84
* passing {@code null} for the video URL, as this operation only involves text processing.
84
85
* Any previously running Gemini task initiated by this utility class will be canceled
85
86
* before this new translation task begins.
@@ -112,7 +113,7 @@ public static void translateYandexJson(
112
113
String prompt = "Translate ONLY the string values associated with the \" text\" keys within the following JSON subtitle data to " + targetLangName + ". Preserve the exact JSON structure, including all keys (like \" startMs\" , \" endMs\" , \" durationMs\" ) and their original numeric values. Output ONLY the fully translated JSON data, without any introductory text, explanations, comments, or markdown formatting (like ```json ... ```).\n \n Input JSON:\n " + yandexJson ;
113
114
114
115
Logger .printDebug (() -> "GeminiUtils (JSON TRANSLATE): Sending Translation Prompt for target '" + targetLangName + "'." );
115
- generateContent (null , apiKey , prompt , callback );
116
+ generateContent (null , apiKey , prompt , TRANSCRIPTION_GEMINI_MODEL , callback );
116
117
}
117
118
118
119
/**
@@ -125,9 +126,10 @@ public static void translateYandexJson(
125
126
* @param videoUrl The publicly accessible URL of the video (nullable).
126
127
* @param apiKey The Gemini API key.
127
128
* @param textPrompt The specific text prompt to send.
129
+ * @param model The Gemini model to use (e.g., "gemini-2.0-flash" or "gemini-2.5-flash-preview-04-17").
128
130
* @param callback The {@link Callback} to handle the success or failure response.
129
131
*/
130
- private static void generateContent (@ Nullable String videoUrl , @ NonNull String apiKey , @ NonNull String textPrompt , @ NonNull Callback callback ) {
132
+ private static void generateContent (@ Nullable String videoUrl , @ NonNull String apiKey , @ NonNull String textPrompt , @ NonNull String model , @ NonNull Callback callback ) {
131
133
cancelCurrentTask ();
132
134
133
135
final AtomicReference <Future <?>> taskRef = new AtomicReference <>();
@@ -136,7 +138,7 @@ private static void generateContent(@Nullable String videoUrl, @NonNull String a
136
138
Future <?> taskBeingRun = taskRef .get ();
137
139
138
140
try {
139
- URL url = new URL (BASE_API_URL + GEMINI_MODEL + ACTION + apiKey );
141
+ URL url = new URL (BASE_API_URL + model + ACTION + apiKey );
140
142
connection = (HttpURLConnection ) url .openConnection ();
141
143
currentConnection = connection ;
142
144
@@ -183,15 +185,17 @@ private static void generateContent(@Nullable String videoUrl, @NonNull String a
183
185
.put (safetySetting_danger );
184
186
requestBody .put ("safetySettings" , safetySettingsArray );
185
187
186
- // Generation Config (original thinking config)
187
- JSONObject thinkingConfig = new JSONObject ().put ("thinkingBudget" , 0 );
188
- JSONObject generationConfig = new JSONObject ().put ("thinkingConfig" , thinkingConfig );
189
- requestBody .put ("generationConfig" , generationConfig );
188
+ // Include generationConfig only for models that support thinkingConfig (e.g., transcription model)
189
+ if (model .equals (TRANSCRIPTION_GEMINI_MODEL )) {
190
+ JSONObject thinkingConfig = new JSONObject ().put ("thinkingBudget" , 0 );
191
+ JSONObject generationConfig = new JSONObject ().put ("thinkingConfig" , thinkingConfig );
192
+ requestBody .put ("generationConfig" , generationConfig );
193
+ }
190
194
191
195
String jsonInputString = requestBody .toString ();
192
196
193
197
String logIdentifier = (videoUrl != null ) ? "VIDEO" : "TEXT/JSON" ;
194
- Logger .printDebug (() -> "GeminiUtils (" + logIdentifier + " - " + GEMINI_MODEL + "): Sending Payload. Prompt starts: " + textPrompt .substring (0 , Math .min (textPrompt .length (), 200 )) + "..." );
198
+ Logger .printDebug (() -> "GeminiUtils (" + logIdentifier + " - " + model + "): Sending Payload. Prompt starts: " + textPrompt .substring (0 , Math .min (textPrompt .length (), 200 )) + "..." );
195
199
196
200
try (OutputStream os = connection .getOutputStream ()) {
197
201
byte [] input = jsonInputString .getBytes (StandardCharsets .UTF_8 );
@@ -218,7 +222,6 @@ private static void generateContent(@Nullable String videoUrl, @NonNull String a
218
222
if (responseCode == HttpURLConnection .HTTP_OK ) {
219
223
JSONObject jsonResponse = new JSONObject (responseString );
220
224
try {
221
- // Check for empty candidates or promptFeedback block
222
225
if (!jsonResponse .has ("candidates" ) || jsonResponse .getJSONArray ("candidates" ).length () == 0 ) {
223
226
String blockReason = extractBlockReason (jsonResponse );
224
227
final String finalBlockReason = blockReason != null ? "Content blocked: " + blockReason :
@@ -228,7 +231,6 @@ private static void generateContent(@Nullable String videoUrl, @NonNull String a
228
231
return ;
229
232
}
230
233
231
- // Extract text result
232
234
String resultText = jsonResponse .getJSONArray ("candidates" )
233
235
.getJSONObject (0 )
234
236
.getJSONObject ("content" )
@@ -237,10 +239,8 @@ private static void generateContent(@Nullable String videoUrl, @NonNull String a
237
239
.getString ("text" );
238
240
final String finalResult = resultText .trim ();
239
241
240
- // Log the raw result for debugging, especially for translation
241
242
Logger .printDebug (() -> "Gemini RAW result received: " + finalResult .substring (0 , Math .min (finalResult .length (), 300 )) + "..." );
242
243
243
- // Basic check if the result looks like JSON (for translation task)
244
244
if (videoUrl == null ) {
245
245
boolean looksLikeJson = finalResult .startsWith ("[" ) || finalResult .startsWith ("{" );
246
246
if (!looksLikeJson ) {
@@ -277,7 +277,7 @@ private static void generateContent(@Nullable String videoUrl, @NonNull String a
277
277
}
278
278
279
279
} catch (java .net .SocketTimeoutException e ) {
280
- Logger .printException (() -> "Gemini API request timed out (" + GEMINI_MODEL + ")" , e );
280
+ Logger .printException (() -> "Gemini API request timed out (" + model + ")" , e );
281
281
final String timeoutMsg = "Request timed out after " + (connection != null ? connection .getReadTimeout () / 1000 : "?" ) + " seconds." ;
282
282
mainThreadHandler .post (() -> callback .onFailure (timeoutMsg ));
283
283
} catch (InterruptedException e ) {
@@ -289,12 +289,12 @@ private static void generateContent(@Nullable String videoUrl, @NonNull String a
289
289
Logger .printInfo (() -> "Gemini task explicitly cancelled (IOException)." );
290
290
mainThreadHandler .post (() -> callback .onFailure ("Operation cancelled." ));
291
291
} else {
292
- Logger .printException (() -> "Gemini API request IO failed (" + GEMINI_MODEL + ")" , e );
292
+ Logger .printException (() -> "Gemini API request IO failed (" + model + ")" , e );
293
293
final String ioErrorMsg = e .getMessage () != null ? "Network error: " + e .getMessage () : "Unknown network error" ;
294
294
mainThreadHandler .post (() -> callback .onFailure (ioErrorMsg ));
295
295
}
296
296
} catch (Exception e ) {
297
- Logger .printException (() -> "Gemini API request failed (" + GEMINI_MODEL + ")" , e );
297
+ Logger .printException (() -> "Gemini API request failed (" + model + ")" , e );
298
298
final String genericErrorMsg = e .getMessage () != null ? e .getMessage () : "Unknown error during request setup" ;
299
299
mainThreadHandler .post (() -> callback .onFailure (genericErrorMsg ));
300
300
} finally {
0 commit comments