@@ -2475,7 +2475,6 @@ static bool llama_kv_cache_init(
2475
2475
static bool llama_kv_cache_find_slot(
2476
2476
struct llama_kv_cache & cache,
2477
2477
const struct llama_batch & batch) {
2478
- const uint32_t n_ctx = cache.size;
2479
2478
const uint32_t n_tokens = batch.n_tokens;
2480
2479
2481
2480
if (cache.recurrent) {
@@ -2526,16 +2525,16 @@ static bool llama_kv_cache_find_slot(
2526
2525
}
2527
2526
// otherwise, one cell per token.
2528
2527
2529
- if (n_tokens > n_ctx ) {
2530
- LLAMA_LOG_ERROR("%s: n_tokens=%d > n_ctx =%d\n", __func__, n_tokens, n_ctx );
2528
+ if (n_tokens > cache.size ) {
2529
+ LLAMA_LOG_ERROR("%s: n_tokens=%d > cache.size =%d\n", __func__, n_tokens, cache.size );
2531
2530
return false;
2532
2531
}
2533
2532
2534
2533
uint32_t n_tested = 0;
2535
2534
2536
2535
while (true) {
2537
- if (cache.head + n_tokens > n_ctx ) {
2538
- n_tested += n_ctx - cache.head;
2536
+ if (cache.head + n_tokens > cache.size ) {
2537
+ n_tested += cache.size - cache.head;
2539
2538
cache.head = 0;
2540
2539
continue;
2541
2540
}
@@ -2554,7 +2553,7 @@ static bool llama_kv_cache_find_slot(
2554
2553
break;
2555
2554
}
2556
2555
2557
- if (n_tested >= n_ctx ) {
2556
+ if (n_tested >= cache.size ) {
2558
2557
//LLAMA_LOG_ERROR("%s: failed to find a slot for %d tokens\n", __func__, n_tokens);
2559
2558
return false;
2560
2559
}
0 commit comments