Skip to content

Commit 27ebfca

Browse files
authored
llama : do not crash if there is no CPU backend (#13395)
* llama : do not crash if there is no CPU backend * add checks to examples
1 parent 5c86c9e commit 27ebfca

File tree

7 files changed

+49
-14
lines changed

7 files changed

+49
-14
lines changed

src/llama-adapter.cpp

+6
Original file line numberDiff line numberDiff line change
@@ -253,6 +253,9 @@ static void llama_adapter_lora_init_impl(llama_model & model, const char * path_
253253
std::vector<ggml_backend_buffer_type_t> buft_extra;
254254
{
255255
auto * cpu_dev = ggml_backend_dev_by_type(GGML_BACKEND_DEVICE_TYPE_CPU);
256+
if (!cpu_dev) {
257+
throw std::runtime_error(format("%s: no CPU backend found", __func__));
258+
}
256259
auto * cpu_reg = ggml_backend_dev_backend_reg(cpu_dev);
257260

258261
auto ggml_backend_dev_get_extra_bufts_fn = (ggml_backend_dev_get_extra_bufts_t)
@@ -291,6 +294,9 @@ static void llama_adapter_lora_init_impl(llama_model & model, const char * path_
291294
LLAMA_LOG_WARN("%s: lora for '%s' cannot use buft '%s', fallback to CPU\n", __func__, model_tensor->name, ggml_backend_buft_name(buft));
292295

293296
auto * cpu_dev = ggml_backend_dev_by_type(GGML_BACKEND_DEVICE_TYPE_CPU);
297+
if (!cpu_dev) {
298+
throw std::runtime_error(format("%s: no CPU backend found", __func__));
299+
}
294300
buft = ggml_backend_dev_buffer_type(cpu_dev);
295301

296302
break;

src/llama-model-loader.cpp

+4
Original file line numberDiff line numberDiff line change
@@ -823,6 +823,10 @@ void llama_model_loader::init_mappings(bool prefetch, llama_mlocks * mlock_mmaps
823823
mmaps_used.reserve(files.size());
824824
for (const auto & file : files) {
825825
auto * reg = ggml_backend_dev_backend_reg(ggml_backend_dev_by_type(GGML_BACKEND_DEVICE_TYPE_CPU));
826+
if (!reg) {
827+
throw std::runtime_error(format("%s: no CPU backend found", __func__));
828+
}
829+
826830
auto * is_numa_fn = (decltype(ggml_is_numa) *) ggml_backend_reg_get_proc_address(reg, "ggml_backend_cpu_is_numa");
827831
std::unique_ptr<llama_mmap> mapping = std::make_unique<llama_mmap>(file.get(), prefetch ? -1 : 0, is_numa_fn());
828832
mmaps_used.emplace_back(mapping->size(), 0);

src/llama-model.cpp

+13
Original file line numberDiff line numberDiff line change
@@ -299,6 +299,10 @@ static buft_list_t make_cpu_buft_list(const std::vector<ggml_backend_dev_t> & de
299299
// add extra buffer types, only if no GPU device is present
300300
// ref: https://github.com/ggml-org/llama.cpp/issues/12481#issuecomment-2743136094
301301
auto * cpu_dev = ggml_backend_dev_by_type(GGML_BACKEND_DEVICE_TYPE_CPU);
302+
if (cpu_dev == nullptr) {
303+
throw std::runtime_error(format("%s: no CPU backend found", __func__));
304+
}
305+
302306
auto * cpu_reg = ggml_backend_dev_backend_reg(cpu_dev);
303307
auto ggml_backend_dev_get_extra_bufts_fn = (ggml_backend_dev_get_extra_bufts_t)
304308
ggml_backend_reg_get_proc_address(cpu_reg, "ggml_backend_dev_get_extra_bufts");
@@ -1484,6 +1488,9 @@ bool llama_model::load_tensors(llama_model_loader & ml) {
14841488
}
14851489

14861490
ggml_backend_dev_t cpu_dev = ggml_backend_dev_by_type(GGML_BACKEND_DEVICE_TYPE_CPU);
1491+
if (cpu_dev == nullptr) {
1492+
throw std::runtime_error(format("%s: no CPU backend found", __func__));
1493+
}
14871494
const int i_gpu_start = std::max((int) hparams.n_layer - n_gpu_layers, (int) 0);
14881495
const int act_gpu_layers = devices.empty() ? 0 : std::min(n_gpu_layers, (int)n_layer + 1);
14891496
auto get_layer_buft_list = [&](int il) -> llama_model::impl::layer_dev {
@@ -1672,6 +1679,9 @@ bool llama_model::load_tensors(llama_model_loader & ml) {
16721679
auto * buft_dev = ggml_backend_buft_get_device(buft);
16731680
if (ml.use_mmap && buft_dev && buft == ggml_backend_dev_host_buffer_type(buft_dev)) {
16741681
auto * cpu_dev = ggml_backend_dev_by_type(GGML_BACKEND_DEVICE_TYPE_CPU);
1682+
if (!cpu_dev) {
1683+
throw std::runtime_error("no CPU backend found");
1684+
}
16751685
buft = ggml_backend_dev_buffer_type(cpu_dev);
16761686
}
16771687

@@ -4122,6 +4132,9 @@ bool llama_model::load_tensors(llama_model_loader & ml) {
41224132
if (!dev) {
41234133
// FIXME: workaround for CPU backend buft having a NULL device
41244134
dev = ggml_backend_dev_by_type(GGML_BACKEND_DEVICE_TYPE_CPU);
4135+
if (!dev) {
4136+
throw std::runtime_error(format("%s: no CPU backend found", __func__));
4137+
}
41254138
}
41264139
ggml_backend_dev_props props;
41274140
ggml_backend_dev_get_props(dev, &props);

tools/main/main.cpp

+6-1
Original file line numberDiff line numberDiff line change
@@ -152,7 +152,12 @@ int main(int argc, char ** argv) {
152152

153153
LOG_INF("%s: llama threadpool init, n_threads = %d\n", __func__, (int) params.cpuparams.n_threads);
154154

155-
auto * reg = ggml_backend_dev_backend_reg(ggml_backend_dev_by_type(GGML_BACKEND_DEVICE_TYPE_CPU));
155+
auto * cpu_dev = ggml_backend_dev_by_type(GGML_BACKEND_DEVICE_TYPE_CPU);
156+
if (!cpu_dev) {
157+
LOG_ERR("%s: no CPU backend found\n", __func__);
158+
return 1;
159+
}
160+
auto * reg = ggml_backend_dev_backend_reg(cpu_dev);
156161
auto * ggml_threadpool_new_fn = (decltype(ggml_threadpool_new) *) ggml_backend_reg_get_proc_address(reg, "ggml_threadpool_new");
157162
auto * ggml_threadpool_free_fn = (decltype(ggml_threadpool_free) *) ggml_backend_reg_get_proc_address(reg, "ggml_threadpool_free");
158163

tools/mtmd/clip.cpp

+8-4
Original file line numberDiff line numberDiff line change
@@ -352,9 +352,12 @@ struct clip_ctx {
352352

353353
clip_ctx(clip_context_params & ctx_params) {
354354
backend_cpu = ggml_backend_init_by_type(GGML_BACKEND_DEVICE_TYPE_CPU, nullptr);
355-
backend = ctx_params.use_gpu
356-
? ggml_backend_init_by_type(GGML_BACKEND_DEVICE_TYPE_GPU, nullptr)
357-
: nullptr;
355+
if (!backend_cpu) {
356+
throw std::runtime_error("failed to initialize CPU backend");
357+
}
358+
backend = ctx_params.use_gpu
359+
? ggml_backend_init_by_type(GGML_BACKEND_DEVICE_TYPE_GPU, nullptr)
360+
: nullptr;
358361

359362
if (backend) {
360363
LOG_INF("%s: CLIP using %s backend\n", __func__, ggml_backend_name(backend));
@@ -2185,9 +2188,10 @@ struct clip_ctx * clip_model_load(const char * fname, const int verbosity) {
21852188

21862189
struct clip_ctx * clip_init(const char * fname, struct clip_context_params ctx_params) {
21872190
g_logger_state.verbosity_thold = ctx_params.verbosity;
2188-
clip_ctx * ctx_clip = new clip_ctx(ctx_params);
2191+
clip_ctx * ctx_clip = nullptr;
21892192

21902193
try {
2194+
ctx_clip = new clip_ctx(ctx_params);
21912195
clip_model_loader loader(fname, *ctx_clip);
21922196
loader.load_hparams();
21932197
loader.load_tensors();

tools/mtmd/llava.cpp

+1
Original file line numberDiff line numberDiff line change
@@ -212,6 +212,7 @@ static bool clip_llava_handle_patches(clip_ctx * ctx_clip, std::vector<float *>
212212
ggml_build_forward_expand(gf, flatten);
213213

214214
ggml_backend_ptr backend { ggml_backend_init_by_type(GGML_BACKEND_DEVICE_TYPE_CPU, nullptr) };
215+
GGML_ASSERT(backend != nullptr && "failed to initialize CPU backend");
215216
ggml_backend_graph_compute(backend.get(), gf);
216217

217218
struct ggml_tensor* result = ggml_graph_node(gf, -1);

tools/rpc/rpc-server.cpp

+11-9
Original file line numberDiff line numberDiff line change
@@ -237,15 +237,17 @@ static ggml_backend_t create_backend(const rpc_server_params & params) {
237237
backend = ggml_backend_init_by_type(GGML_BACKEND_DEVICE_TYPE_CPU, nullptr);
238238
}
239239

240-
fprintf(stderr, "%s: using %s backend\n", __func__, ggml_backend_name(backend));
241-
242-
// set the number of threads
243-
ggml_backend_dev_t dev = ggml_backend_get_device(backend);
244-
ggml_backend_reg_t reg = dev ? ggml_backend_dev_backend_reg(dev) : nullptr;
245-
if (reg) {
246-
auto ggml_backend_set_n_threads_fn = (ggml_backend_set_n_threads_t) ggml_backend_reg_get_proc_address(reg, "ggml_backend_set_n_threads");
247-
if (ggml_backend_set_n_threads_fn) {
248-
ggml_backend_set_n_threads_fn(backend, params.n_threads);
240+
if (backend) {
241+
fprintf(stderr, "%s: using %s backend\n", __func__, ggml_backend_name(backend));
242+
243+
// set the number of threads
244+
ggml_backend_dev_t dev = ggml_backend_get_device(backend);
245+
ggml_backend_reg_t reg = dev ? ggml_backend_dev_backend_reg(dev) : nullptr;
246+
if (reg) {
247+
auto ggml_backend_set_n_threads_fn = (ggml_backend_set_n_threads_t) ggml_backend_reg_get_proc_address(reg, "ggml_backend_set_n_threads");
248+
if (ggml_backend_set_n_threads_fn) {
249+
ggml_backend_set_n_threads_fn(backend, params.n_threads);
250+
}
249251
}
250252
}
251253

0 commit comments

Comments
 (0)