cafferychen777
diff --git a/‎R/DESCRIPTION
Lines changed: 1 addition & 1 deletion b/‎R/DESCRIPTION
Lines changed: 1 addition & 1 deletion
diff --git a/‎R/NEWS.md
Lines changed: 18 additions & 0 deletions b/‎R/NEWS.md
Lines changed: 18 additions & 0 deletions
diff --git a/‎R/R/check_consensus.R
Lines changed: 29 additions & 6 deletions b/‎R/R/check_consensus.R
Lines changed: 29 additions & 6 deletions
diff --git a/‎R/R/process_anthropic.R
Lines changed: 19 additions & 7 deletions b/‎R/R/process_anthropic.R
Lines changed: 19 additions & 7 deletions
diff --git a/‎R/R/process_deepseek.R
Lines changed: 19 additions & 7 deletions b/‎R/R/process_deepseek.R
Lines changed: 19 additions & 7 deletions
diff --git a/‎R/R/process_gemini.R
Lines changed: 56 additions & 24 deletions b/‎R/R/process_gemini.R
Lines changed: 56 additions & 24 deletions
diff --git a/‎R/R/process_grok.R
Lines changed: 20 additions & 13 deletions b/‎R/R/process_grok.R
Lines changed: 20 additions & 13 deletions
@@ -1,7 +1,7 @@
 Package: mLLMCelltype
 Type: Package
 Title: Cell Type Annotation Using Large Language Models
-Version: 1.2.3
+Version: 1.2.4
 Author: Chen Yang [aut, cre]
 Maintainer: Chen Yang <[email protected]>
 Authors@R:
 
@@ -1,5 +1,23 @@
 # mLLMCelltype Changelog
 
+## 1.2.4 (2025-05-25)
+
+### Critical Bug Fixes
+* **Fixed major `as.logical(from)` error**: Resolved critical error that occurred when processing large numbers of clusters (60+ clusters), which was caused by non-character data being passed to `strsplit()` functions
+* **Enhanced error handling for API responses**: Added comprehensive `tryCatch()` blocks around all `strsplit()` operations in API processing functions
+* **Improved response validation**: Added robust type checking for API responses to prevent function/closure types from being processed as character strings
+
+### Improvements
+* **Enhanced API processing robustness**: All API processing functions (`process_openrouter.R`, `process_anthropic.R`, `process_openai.R`, `process_deepseek.R`, `process_qwen.R`, `process_stepfun.R`, `process_minimax.R`, `process_zhipu.R`, `process_gemini.R`, `process_grok.R`) now include improved error handling
+* **Better NULL value handling**: Improved `unlist()` operations to filter out NULL values and handle errors gracefully
+* **Enhanced logging**: Added more detailed error logging for debugging API response issues
+* **Improved consensus checking**: Enhanced `check_consensus.R` to handle edge cases with malformed responses
+
+### Technical Details
+* Fixed issue where large cluster datasets could cause type coercion errors in response parsing
+* Added validation for function/closure types in API responses to prevent downstream errors
+* Improved error messages to provide better diagnostics for API response issues
+
 ## 1.2.3 (2025-05-10)
 
 ### Bug Fixes
 
@@ -136,19 +136,42 @@ check_consensus <- function(round_responses, api_keys = NULL, controversy_thresh
 
   # Directly parse the response using a simpler approach
   # First, check if response is NULL or empty
-  if (is.null(response) || length(response) == 0 || nchar(response) == 0) {
-    write_log("WARNING: Response is NULL, empty, or has zero length")
+  if (is.null(response) || length(response) == 0) {
+    write_log("WARNING: Response is NULL or has zero length")
     lines <- c("0", "0", "0", "Unknown")
   } else if (!is.character(response)) {
     # If response is not a character, convert it to string
     write_log(sprintf("WARNING: Response is not a character but %s, converting to string", typeof(response)))
-    response <- as.character(response)
+    # Handle different types more carefully
+    if (is.function(response)) {
+      write_log("ERROR: Response is a function (closure), this indicates a serious error in the API response processing")
+      lines <- c("0", "0", "0", "Unknown")
+    } else {
+      tryCatch({
+        response <- as.character(response)
+        if (nchar(response) == 0) {
+          lines <- c("0", "0", "0", "Unknown")
+        } else {
+          lines <- c(response)
+        }
+      }, error = function(e) {
+        write_log(sprintf("ERROR: Failed to convert response to character: %s", e$message))
+        lines <- c("0", "0", "0", "Unknown")
+      })
+    }
+  } else if (nchar(response) == 0) {
+    write_log("WARNING: Response is empty string")
     lines <- c("0", "0", "0", "Unknown")
   } else if (grepl("\n", response)) {
     # Split by newlines and clean up
-    lines <- strsplit(response, "\n")[[1]]
-    lines <- trimws(lines)
-    lines <- lines[nchar(lines) > 0]
+    tryCatch({
+      lines <- strsplit(response, "\n")[[1]]
+      lines <- trimws(lines)
+      lines <- lines[nchar(lines) > 0]
+    }, error = function(e) {
+      write_log(sprintf("ERROR: Failed to split response by newlines: %s", e$message))
+      lines <- c("0", "0", "0", "Unknown")
+    })
   } else {
     # If no newlines, treat as a single line
     lines <- c(response)
 
@@ -83,16 +83,28 @@ process_anthropic <- function(prompt, model, api_key) {
       write_log("ERROR: Response content is not a character string")
       write_log(sprintf("Response content type: %s", typeof(response_content)))
       write_log(sprintf("Response content structure: %s", jsonlite::toJSON(content$content[[1]], auto_unbox = TRUE, pretty = TRUE)))
-      return(NULL)
+      return(c("Error: Invalid response format"))
     }
 
-    res <- strsplit(response_content, '\n')[[1]]
-    write_log(sprintf("Got response with %d lines", length(res)))
-    write_log(sprintf("Raw response from Claude:\n%s", paste(res, collapse = "\n")))
-
-    res
+    tryCatch({
+      res <- strsplit(response_content, '\n')[[1]]
+      write_log(sprintf("Got response with %d lines", length(res)))
+      write_log(sprintf("Raw response from Claude:\n%s", paste(res, collapse = "\n")))
+      res
+    }, error = function(e) {
+      write_log(sprintf("ERROR: Failed to split response content: %s", e$message))
+      return(c("Error: Failed to parse response"))
+    })
   }, simplify = FALSE)
 
   write_log("All chunks processed successfully")
-  return(gsub(',$', '', unlist(allres)))
+
+  # Filter out NULL values and handle errors more gracefully
+  valid_results <- allres[!sapply(allres, is.null)]
+  if (length(valid_results) == 0) {
+    write_log("ERROR: No valid responses received from Anthropic")
+    return(c("Error: No valid responses"))
+  }
+
+  return(gsub(',$', '', unlist(valid_results)))
 }
@@ -80,17 +80,29 @@ process_deepseek <- function(prompt, model, api_key) {
       write_log("ERROR: Response content is not a character string")
       write_log(sprintf("Response content type: %s", typeof(response_content)))
       write_log(sprintf("Response content structure: %s", jsonlite::toJSON(content$choices[[1]]$message, auto_unbox = TRUE, pretty = TRUE)))
-      return(NULL)
+      return(c("Error: Invalid response format"))
     }
 
     # DeepSeek's response is in content$choices[[1]]$message$content
-    res <- strsplit(response_content, '\n')[[1]]
-    write_log(sprintf("Got response with %d lines", length(res)))
-    write_log(sprintf("Raw response from DeepSeek:\n%s", paste(res, collapse = "\n")))
-
-    res
+    tryCatch({
+      res <- strsplit(response_content, '\n')[[1]]
+      write_log(sprintf("Got response with %d lines", length(res)))
+      write_log(sprintf("Raw response from DeepSeek:\n%s", paste(res, collapse = "\n")))
+      res
+    }, error = function(e) {
+      write_log(sprintf("ERROR: Failed to split response content: %s", e$message))
+      return(c("Error: Failed to parse response"))
+    })
   }, simplify = FALSE)
 
   write_log("All chunks processed successfully")
-  return(gsub(',$', '', unlist(allres)))
+
+  # Filter out NULL values and handle errors more gracefully
+  valid_results <- allres[!sapply(allres, is.null)]
+  if (length(valid_results) == 0) {
+    write_log("ERROR: No valid responses received from DeepSeek")
+    return(c("Error: No valid responses"))
+  }
+
+  return(gsub(',$', '', unlist(valid_results)))
 }
@@ -3,36 +3,36 @@
 process_gemini <- function(prompt, model, api_key) {
   write_log("\n=== Starting Gemini API Request ===\n")
   write_log(sprintf("Model: %s", model))
-  
+
   # Gemini API endpoint
   base_url <- "https://generativelanguage.googleapis.com/v1beta/models"
   url <- sprintf("%s/%s:generateContent?key=%s", base_url, model, api_key)
   write_log("API URL:")
   write_log(url)
-  
+
   # Process all input at once
   input_lines <- strsplit(prompt, "\n")[[1]]
   write_log("\nInput lines:")
   write_log(paste(input_lines, collapse = "\n"))
-  
+
   cutnum <- 1  # Changed to always use 1 chunk
   write_log(sprintf("\nProcessing input in %d chunk(s)", cutnum))
-  
+
   if (cutnum > 1) {
-    cid <- as.numeric(cut(1:length(input_lines), cutnum))	
+    cid <- as.numeric(cut(1:length(input_lines), cutnum))
   } else {
     cid <- rep(1, length(input_lines))
   }
-  
+
   # Process each chunk
   allres <- sapply(1:cutnum, function(i) {
     write_log(sprintf("\nProcessing chunk %d of %d", i, cutnum))
     id <- which(cid == i)
-    
+
     chunk_content <- paste(input_lines[id], collapse = '\n')
     write_log("\nChunk content:")
     write_log(chunk_content)
-    
+
     # Prepare the request body
     body <- list(
       contents = list(
@@ -45,10 +45,10 @@ process_gemini <- function(prompt, model, api_key) {
         )
       )
     )
-    
+
     write_log("\nRequest body:")
     write_log(jsonlite::toJSON(body, auto_unbox = TRUE, pretty = TRUE))
-    
+
     write_log("\nSending API request...")
     # Make the API request
     response <- httr::POST(
@@ -59,19 +59,19 @@ process_gemini <- function(prompt, model, api_key) {
       body = jsonlite::toJSON(body, auto_unbox = TRUE),
       encode = "json"
     )
-    
+
     # Check for errors
     if (httr::http_error(response)) {
       error_message <- httr::content(response, "parsed")
-      write_log(sprintf("ERROR: Gemini API request failed: %s", 
+      write_log(sprintf("ERROR: Gemini API request failed: %s",
                        if (!is.null(error_message$error$message)) error_message$error$message else "Unknown error"))
       return(NULL)
     }
-    
+
     write_log("Parsing API response...")
     # Parse the response
     content <- httr::content(response, "parsed")
-    
+
     # Add robust error handling for response structure
     tryCatch({
       # Check if the response has the expected structure
@@ -80,23 +80,47 @@ process_gemini <- function(prompt, model, api_key) {
         write_log(sprintf("Response content: %s", jsonlite::toJSON(content, auto_unbox = TRUE, pretty = TRUE)))
         return("Error: Unexpected API response structure")
       }
-      
+
       candidate <- content$candidates[[1]]
-      
+
       # For Gemini 1.0 models
       if (!is.null(candidate$content$parts[[1]]$text)) {
-        res <- strsplit(candidate$content$parts[[1]]$text, '\n')[[1]]
-      } 
+        text_content <- candidate$content$parts[[1]]$text
+        if (is.character(text_content)) {
+          res <- strsplit(text_content, '\n')[[1]]
+        } else {
+          write_log("ERROR: Text content is not a character string")
+          return("Error: Invalid text content format")
+        }
+      }
       # For Gemini 1.5/2.5 models (may have different structure)
       else if (!is.null(candidate$text)) {
-        res <- strsplit(candidate$text, '\n')[[1]]
+        text_content <- candidate$text
+        if (is.character(text_content)) {
+          res <- strsplit(text_content, '\n')[[1]]
+        } else {
+          write_log("ERROR: Text content is not a character string")
+          return("Error: Invalid text content format")
+        }
       }
       # Try other possible response structures
       else if (!is.null(candidate$content$text)) {
-        res <- strsplit(candidate$content$text, '\n')[[1]]
+        text_content <- candidate$content$text
+        if (is.character(text_content)) {
+          res <- strsplit(text_content, '\n')[[1]]
+        } else {
+          write_log("ERROR: Text content is not a character string")
+          return("Error: Invalid text content format")
+        }
       }
       else if (!is.null(content$text)) {
-        res <- strsplit(content$text, '\n')[[1]]
+        text_content <- content$text
+        if (is.character(text_content)) {
+          res <- strsplit(text_content, '\n')[[1]]
+        } else {
+          write_log("ERROR: Text content is not a character string")
+          return("Error: Invalid text content format")
+        }
       }
       else {
         # If we can't find the text in any expected location, log the structure and return an error
@@ -111,10 +135,18 @@ process_gemini <- function(prompt, model, api_key) {
     })
     write_log(sprintf("Got response with %d lines", length(res)))
     write_log(sprintf("Raw response from Gemini:\n%s", paste(res, collapse = "\n")))
-    
+
     res
   }, simplify = FALSE)
-  
+
   write_log("All chunks processed successfully")
-  return(gsub(',$', '', unlist(allres)))
+
+  # Filter out NULL values and handle errors more gracefully
+  valid_results <- allres[!sapply(allres, is.null)]
+  if (length(valid_results) == 0) {
+    write_log("ERROR: No valid responses received from Gemini")
+    return(c("Error: No valid responses"))
+  }
+
+  return(gsub(',$', '', unlist(valid_results)))
 }
@@ -109,21 +109,28 @@ process_grok <- function(prompt, model, api_key) {
       write_log("ERROR: Response content is not a character string")
       write_log(sprintf("Response content type: %s", typeof(response_content)))
       write_log(sprintf("Response content structure: %s", jsonlite::toJSON(content$choices[[1]]$message, auto_unbox = TRUE, pretty = TRUE)))
-      return(NULL)
-    }
-
-    res <- strsplit(response_content, '\n')[[1]]
-
-    # Log usage information if available
-    if (!is.null(content$usage)) {
-      write_log(sprintf("Tokens used - Prompt: %d, Completion: %d, Total: %d",
-                      content$usage$prompt_tokens,
-                      content$usage$completion_tokens,
-                      content$usage$total_tokens))
+      return(c("Error: Invalid response format"))
     }
 
-    write_log(sprintf("Got response with %d lines", length(res)))
-    write_log(sprintf("Raw response from Grok:\n%s", paste(res, collapse = "\n")))
+    res <- tryCatch({
+      split_result <- strsplit(response_content, '\n')[[1]]
+
+      # Log usage information if available
+      if (!is.null(content$usage)) {
+        write_log(sprintf("Tokens used - Prompt: %d, Completion: %d, Total: %d",
+                        content$usage$prompt_tokens,
+                        content$usage$completion_tokens,
+                        content$usage$total_tokens))
+      }
+
+      write_log(sprintf("Got response with %d lines", length(split_result)))
+      write_log(sprintf("Raw response from Grok:\n%s", paste(split_result, collapse = "\n")))
+
+      split_result
+    }, error = function(e) {
+      write_log(sprintf("ERROR: Failed to split response content: %s", e$message))
+      return(c("Error: Failed to parse response"))
+    })
 
     res
   }, simplify = FALSE)