Skip to content

Commit 237673b

Browse files
committed
fix(YouTube - Overlay buttons): Use Gemini 2.0 Flash model for summaries to prevent 'Deadline expired' errors
1 parent 64d29f1 commit 237673b

File tree

1 file changed

+19
-19
lines changed
  • extensions/shared/src/main/java/app/revanced/extension/youtube/utils

1 file changed

+19
-19
lines changed

extensions/shared/src/main/java/app/revanced/extension/youtube/utils/GeminiUtils.java

Lines changed: 19 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -28,7 +28,8 @@
2828
public class GeminiUtils {
2929
private static final ExecutorService executor = Executors.newSingleThreadExecutor();
3030
private static final String BASE_API_URL = "https://generativelanguage.googleapis.com/v1beta/models/";
31-
private static final String GEMINI_MODEL = "gemini-2.5-flash-preview-04-17";
31+
private static final String SUMMARIZATION_GEMINI_MODEL = "gemini-2.0-flash";
32+
private static final String TRANSCRIPTION_GEMINI_MODEL = "gemini-2.5-flash-preview-04-17";
3233
private static final String ACTION = ":generateContent?key=";
3334
private static final AtomicReference<Future<?>> currentTask = new AtomicReference<>(null);
3435
private static final Handler mainThreadHandler = new Handler(Looper.getMainLooper());
@@ -46,7 +47,7 @@ public static void getVideoSummary(@NonNull String videoUrl, @NonNull String api
4647
String langName = getLanguageName();
4748
String prompt = "Summarize the key points of this video in " + langName + ". Skip any preamble, intro phrases, or explanations — output only the summary.";
4849
Logger.printDebug(() -> "GeminiUtils (SUMMARY): Sending Prompt: " + prompt);
49-
generateContent(videoUrl, apiKey, prompt, callback);
50+
generateContent(videoUrl, apiKey, prompt, SUMMARIZATION_GEMINI_MODEL, callback);
5051
}
5152

5253
/**
@@ -61,7 +62,7 @@ public static void getVideoTranscription(@NonNull String videoUrl, @NonNull Stri
6162
String langName = getLanguageName();
6263
String prompt = "Transcribe this video precisely in " + langName + ", including spoken words, written words in the video and significant sounds. Provide timestamps for each segment in the format [HH:MM:SS.mmm - HH:MM:SS.mmm]: Text. Skip any preamble, intro phrases, or explanations — output only the transcription.";
6364
Logger.printDebug(() -> "GeminiUtils (TRANSCRIPTION): Sending Prompt: " + prompt);
64-
generateContent(videoUrl, apiKey, prompt, callback);
65+
generateContent(videoUrl, apiKey, prompt, TRANSCRIPTION_GEMINI_MODEL, callback);
6566
}
6667

6768
/**
@@ -79,7 +80,7 @@ public static void getVideoTranscription(@NonNull String videoUrl, @NonNull Stri
7980
* <li>Output only the translated JSON data, without any extra text, explanations, or formatting.</li>
8081
* </ul>
8182
* <p>
82-
* It then calls the internal {@link #generateContent(String, String, String, Callback)} method,
83+
* It then calls the internal {@link #generateContent(String, String, String, String, Callback)} method,
8384
* passing {@code null} for the video URL, as this operation only involves text processing.
8485
* Any previously running Gemini task initiated by this utility class will be canceled
8586
* before this new translation task begins.
@@ -112,7 +113,7 @@ public static void translateYandexJson(
112113
String prompt = "Translate ONLY the string values associated with the \"text\" keys within the following JSON subtitle data to " + targetLangName + ". Preserve the exact JSON structure, including all keys (like \"startMs\", \"endMs\", \"durationMs\") and their original numeric values. Output ONLY the fully translated JSON data, without any introductory text, explanations, comments, or markdown formatting (like ```json ... ```).\n\nInput JSON:\n" + yandexJson;
113114

114115
Logger.printDebug(() -> "GeminiUtils (JSON TRANSLATE): Sending Translation Prompt for target '" + targetLangName + "'.");
115-
generateContent(null, apiKey, prompt, callback);
116+
generateContent(null, apiKey, prompt, TRANSCRIPTION_GEMINI_MODEL, callback);
116117
}
117118

118119
/**
@@ -125,9 +126,10 @@ public static void translateYandexJson(
125126
* @param videoUrl The publicly accessible URL of the video (nullable).
126127
* @param apiKey The Gemini API key.
127128
* @param textPrompt The specific text prompt to send.
129+
* @param model The Gemini model to use (e.g., "gemini-2.0-flash" or "gemini-2.5-flash-preview-04-17").
128130
* @param callback The {@link Callback} to handle the success or failure response.
129131
*/
130-
private static void generateContent(@Nullable String videoUrl, @NonNull String apiKey, @NonNull String textPrompt, @NonNull Callback callback) {
132+
private static void generateContent(@Nullable String videoUrl, @NonNull String apiKey, @NonNull String textPrompt, @NonNull String model, @NonNull Callback callback) {
131133
cancelCurrentTask();
132134

133135
final AtomicReference<Future<?>> taskRef = new AtomicReference<>();
@@ -136,7 +138,7 @@ private static void generateContent(@Nullable String videoUrl, @NonNull String a
136138
Future<?> taskBeingRun = taskRef.get();
137139

138140
try {
139-
URL url = new URL(BASE_API_URL + GEMINI_MODEL + ACTION + apiKey);
141+
URL url = new URL(BASE_API_URL + model + ACTION + apiKey);
140142
connection = (HttpURLConnection) url.openConnection();
141143
currentConnection = connection;
142144

@@ -183,15 +185,17 @@ private static void generateContent(@Nullable String videoUrl, @NonNull String a
183185
.put(safetySetting_danger);
184186
requestBody.put("safetySettings", safetySettingsArray);
185187

186-
// Generation Config (original thinking config)
187-
JSONObject thinkingConfig = new JSONObject().put("thinkingBudget", 0);
188-
JSONObject generationConfig = new JSONObject().put("thinkingConfig", thinkingConfig);
189-
requestBody.put("generationConfig", generationConfig);
188+
// Include generationConfig only for models that support thinkingConfig (e.g., transcription model)
189+
if (model.equals(TRANSCRIPTION_GEMINI_MODEL)) {
190+
JSONObject thinkingConfig = new JSONObject().put("thinkingBudget", 0);
191+
JSONObject generationConfig = new JSONObject().put("thinkingConfig", thinkingConfig);
192+
requestBody.put("generationConfig", generationConfig);
193+
}
190194

191195
String jsonInputString = requestBody.toString();
192196

193197
String logIdentifier = (videoUrl != null) ? "VIDEO" : "TEXT/JSON";
194-
Logger.printDebug(() -> "GeminiUtils (" + logIdentifier + " - " + GEMINI_MODEL + "): Sending Payload. Prompt starts: " + textPrompt.substring(0, Math.min(textPrompt.length(), 200)) + "...");
198+
Logger.printDebug(() -> "GeminiUtils (" + logIdentifier + " - " + model + "): Sending Payload. Prompt starts: " + textPrompt.substring(0, Math.min(textPrompt.length(), 200)) + "...");
195199

196200
try (OutputStream os = connection.getOutputStream()) {
197201
byte[] input = jsonInputString.getBytes(StandardCharsets.UTF_8);
@@ -218,7 +222,6 @@ private static void generateContent(@Nullable String videoUrl, @NonNull String a
218222
if (responseCode == HttpURLConnection.HTTP_OK) {
219223
JSONObject jsonResponse = new JSONObject(responseString);
220224
try {
221-
// Check for empty candidates or promptFeedback block
222225
if (!jsonResponse.has("candidates") || jsonResponse.getJSONArray("candidates").length() == 0) {
223226
String blockReason = extractBlockReason(jsonResponse);
224227
final String finalBlockReason = blockReason != null ? "Content blocked: " + blockReason :
@@ -228,7 +231,6 @@ private static void generateContent(@Nullable String videoUrl, @NonNull String a
228231
return;
229232
}
230233

231-
// Extract text result
232234
String resultText = jsonResponse.getJSONArray("candidates")
233235
.getJSONObject(0)
234236
.getJSONObject("content")
@@ -237,10 +239,8 @@ private static void generateContent(@Nullable String videoUrl, @NonNull String a
237239
.getString("text");
238240
final String finalResult = resultText.trim();
239241

240-
// Log the raw result for debugging, especially for translation
241242
Logger.printDebug(() -> "Gemini RAW result received: " + finalResult.substring(0, Math.min(finalResult.length(), 300)) + "...");
242243

243-
// Basic check if the result looks like JSON (for translation task)
244244
if (videoUrl == null) {
245245
boolean looksLikeJson = finalResult.startsWith("[") || finalResult.startsWith("{");
246246
if (!looksLikeJson) {
@@ -277,7 +277,7 @@ private static void generateContent(@Nullable String videoUrl, @NonNull String a
277277
}
278278

279279
} catch (java.net.SocketTimeoutException e) {
280-
Logger.printException(() -> "Gemini API request timed out (" + GEMINI_MODEL + ")", e);
280+
Logger.printException(() -> "Gemini API request timed out (" + model + ")", e);
281281
final String timeoutMsg = "Request timed out after " + (connection != null ? connection.getReadTimeout() / 1000 : "?") + " seconds.";
282282
mainThreadHandler.post(() -> callback.onFailure(timeoutMsg));
283283
} catch (InterruptedException e) {
@@ -289,12 +289,12 @@ private static void generateContent(@Nullable String videoUrl, @NonNull String a
289289
Logger.printInfo(() -> "Gemini task explicitly cancelled (IOException).");
290290
mainThreadHandler.post(() -> callback.onFailure("Operation cancelled."));
291291
} else {
292-
Logger.printException(() -> "Gemini API request IO failed (" + GEMINI_MODEL + ")", e);
292+
Logger.printException(() -> "Gemini API request IO failed (" + model + ")", e);
293293
final String ioErrorMsg = e.getMessage() != null ? "Network error: " + e.getMessage() : "Unknown network error";
294294
mainThreadHandler.post(() -> callback.onFailure(ioErrorMsg));
295295
}
296296
} catch (Exception e) {
297-
Logger.printException(() -> "Gemini API request failed (" + GEMINI_MODEL + ")", e);
297+
Logger.printException(() -> "Gemini API request failed (" + model + ")", e);
298298
final String genericErrorMsg = e.getMessage() != null ? e.getMessage() : "Unknown error during request setup";
299299
mainThreadHandler.post(() -> callback.onFailure(genericErrorMsg));
300300
} finally {

0 commit comments

Comments
 (0)