dgarage · Mar 13, 2025
diff --git a/‎common/common.cpp
+3-3 b/‎common/common.cpp
+3-3
diff --git a/‎common/speculative.cpp
+4-4 b/‎common/speculative.cpp
+4-4
@@ -955,8 +955,8 @@ struct common_init_result common_init_from_params(common_params & params) {
         return iparams;
     }
 
-    if (params.ctx_shift && !llama_kv_cache_can_shift(lctx)) {
-        LOG_WRN("%s: KV cache shifting is not supported for this model, disabling KV cache shifting\n", __func__);
+    if (params.ctx_shift && !llama_kv_self_can_shift(lctx)) {
+        LOG_WRN("%s: KV cache shifting is not supported for this context, disabling KV cache shifting\n", __func__);
         params.ctx_shift = false;
     }
 
@@ -1060,7 +1060,7 @@ struct common_init_result common_init_from_params(common_params & params) {
         if (llama_model_has_decoder(model)) {
             llama_decode(lctx, llama_batch_get_one(tmp.data(), std::min(tmp.size(), (size_t) params.n_batch)));
         }
-        llama_kv_cache_clear(lctx);
+        llama_kv_self_clear(lctx);
         llama_synchronize(lctx);
         llama_perf_context_reset(lctx);
     }
 
@@ -173,7 +173,7 @@ llama_tokens common_speculative_gen_draft(
     result.reserve(params.n_draft);
 
     if (reuse_n == 0) {
-        llama_kv_cache_clear(ctx);
+        llama_kv_self_clear(ctx);
 
         prompt.clear();
     } else {
@@ -192,14 +192,14 @@ llama_tokens common_speculative_gen_draft(
         }
 
         if (reuse_i > 0) {
-            llama_kv_cache_seq_rm (ctx, 0, 0, reuse_i);
-            llama_kv_cache_seq_add(ctx, 0, reuse_i, -1, -reuse_i);
+            llama_kv_self_seq_rm (ctx, 0, 0, reuse_i);
+            llama_kv_self_seq_add(ctx, 0, reuse_i, -1, -reuse_i);
 
             prompt.erase(prompt.begin(), prompt.begin() + reuse_i);
         }
 
         if (reuse_n < (int) prompt.size()) {
-            llama_kv_cache_seq_rm (ctx, 0, reuse_n, -1);
+            llama_kv_self_seq_rm (ctx, 0, reuse_n, -1);
 
             prompt.erase(prompt.begin() + reuse_n, prompt.end());
         }
Original file line number	Diff line number	Diff line change
`@@ -955,8 +955,8 @@ struct common_init_result common_init_from_params(common_params & params) {`
`955`	`955`	`return iparams;`
`956`	`956`	`}`
`957`	`957`
`958`		`- if (params.ctx_shift && !llama_kv_cache_can_shift(lctx)) {`
`959`		`- LOG_WRN("%s: KV cache shifting is not supported for this model, disabling KV cache shifting\n", __func__);`
	`958`	`+ if (params.ctx_shift && !llama_kv_self_can_shift(lctx)) {`
	`959`	`+ LOG_WRN("%s: KV cache shifting is not supported for this context, disabling KV cache shifting\n", __func__);`
`960`	`960`	`params.ctx_shift = false;`
`961`	`961`	`}`
`962`	`962`
`@@ -1060,7 +1060,7 @@ struct common_init_result common_init_from_params(common_params & params) {`
`1060`	`1060`	`if (llama_model_has_decoder(model)) {`
`1061`	`1061`	`llama_decode(lctx, llama_batch_get_one(tmp.data(), std::min(tmp.size(), (size_t) params.n_batch)));`
`1062`	`1062`	`}`
`1063`		`- llama_kv_cache_clear(lctx);`
	`1063`	`+ llama_kv_self_clear(lctx);`
`1064`	`1064`	`llama_synchronize(lctx);`
`1065`	`1065`	`llama_perf_context_reset(lctx);`
`1066`	`1066`	`}`