Skip to content

Commit bd38279

Browse files
authored
feat(dynamo-run): Various UX improvements (ai-dynamo#168)
Engines mistralrs, sglang and vllm included by default. Can be disabled like this: `cargo build --no-default-features --features <add-back-what-you-want>`. Added `--feature vulkan` option, for llamacpp. Build time message if CUDA or Metal would help and are missing. That's the best we can do: > warning: [email protected]: CUDA not enabled, re-run with `--features cuda` Runtime message if CUDA, Metal or Vulkan are enabled: > 2025-03-14T21:59:26.501937Z INFO dynamo_run: CUDA on Runtime message if they are missing: > 2025-03-14T22:02:37.439404Z INFO dynamo_run: CPU mode. Rebuild with `--features cuda|metal|vulkan` for better performance Defaut engine message includes available engines: > 2025-03-14T21:59:26.503612Z INFO dynamo_run: Using default engine: mistralrs. Use out=<engine> to specify one of echo_core, echo_full, mistralrs, llamacpp, sglang, vllm, pystr, pytok The really important outcome is that this should now "just work": ``` cargo install dynamo-run dynamo-run Qwen/Qwen2.5-3B-Instruct ``` Sadly you still need `--features cuda|metal` for performance, I couldn't automate that.
1 parent 27afbb9 commit bd38279

File tree

7 files changed

+107
-15
lines changed

7 files changed

+107
-15
lines changed

.github/workflows/pre-merge-rust.yml

+1-1
Original file line numberDiff line numberDiff line change
@@ -84,7 +84,7 @@ jobs:
8484
working-directory: ${{ matrix.dir }}
8585
run: |
8686
cargo-deny --version || cargo install [email protected]
87-
cargo-deny check --hide-inclusion-graph licenses --config ${{ github.workspace }}/deny.toml
87+
cargo-deny --no-default-features check --hide-inclusion-graph licenses --config ${{ github.workspace }}/deny.toml
8888
- name: Run Unit Tests
8989
working-directory: ${{ matrix.dir }}
9090
run: cargo test --locked --all-targets

deny.toml

+11-1
Original file line numberDiff line numberDiff line change
@@ -28,10 +28,20 @@ allow = [
2828
"OpenSSL",
2929
"Unicode-3.0",
3030
"BSL-1.0",
31-
"MPL-2.0"
31+
"MPL-2.0",
32+
"MIT-0"
3233
]
3334

35+
# TODO exceptions
36+
# MIT: https://github.com/guidance-ai/llguidance
37+
# "llguidance",
38+
# MIT: https://github.com/guidance-ai/llguidance/toktrie
39+
# "toktrie",
40+
# MIT: https://github.com/guidance-ai/llguidance/toktrie_hf_tokenizers
41+
# "toktrie_hf_tokenizers",
42+
3443
[[licenses.clarify]]
44+
3545
name = "ring"
3646
expression = "MIT AND ISC AND OpenSSL"
3747
license-files = [

launch/dynamo-run/Cargo.toml

+5-1
Original file line numberDiff line numberDiff line change
@@ -23,14 +23,18 @@ license.workspace = true
2323
repository.workspace = true
2424

2525
[features]
26+
# Build with `--no-default-features` to disable these defaults
27+
default = ["mistralrs", "vllm", "sglang"]
2628
mistralrs = ["dynamo-llm/mistralrs"]
2729
sglang = ["dynamo-llm/sglang", "dep:netlink-packet-route", "dep:rtnetlink"]
2830
vllm = ["dynamo-llm/vllm", "dep:netlink-packet-route", "dep:rtnetlink"]
31+
# We don't include llamacpp by default until we figure out when it needs external libraries
2932
llamacpp = ["dynamo-llm/llamacpp"]
3033
trtllm = ["dynamo-llm/trtllm"]
3134
python = ["dynamo-llm/python"]
3235
cuda = ["dynamo-llm/cuda"]
3336
metal = ["dynamo-llm/metal"]
37+
vulkan = ["dynamo-llm/vulkan"]
3438

3539
[dependencies]
3640
dynamo-llm = { workspace = true }
@@ -55,4 +59,4 @@ tracing-subscriber = { version = "0.3", features = ["env-filter", "local-time",
5559

5660
[target.x86_64-unknown-linux-gnu.dependencies]
5761
netlink-packet-route = { version = "0.19", optional = true }
58-
rtnetlink = { version = "0.14", optional = true }
62+
rtnetlink = { version = "0.14", optional = true }

launch/dynamo-run/build.rs

+52
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,52 @@
1+
// SPDX-FileCopyrightText: Copyright (c) 2024-2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
2+
// SPDX-License-Identifier: Apache-2.0
3+
//
4+
// Licensed under the Apache License, Version 2.0 (the "License");
5+
// you may not use this file except in compliance with the License.
6+
// You may obtain a copy of the License at
7+
//
8+
// http://www.apache.org/licenses/LICENSE-2.0
9+
//
10+
// Unless required by applicable law or agreed to in writing, software
11+
// distributed under the License is distributed on an "AS IS" BASIS,
12+
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13+
// See the License for the specific language governing permissions and
14+
// limitations under the License.
15+
16+
use std::env;
17+
use std::process::Command;
18+
19+
fn main() {
20+
if has_cuda_toolkit() && !has_feature("cuda") && is_cuda_engine() {
21+
println!("cargo:warning=CUDA not enabled, re-run with `--features cuda`");
22+
}
23+
if is_mac() && !has_feature("metal") {
24+
println!("cargo:warning=Metal not enabled, re-run with `--features metal`");
25+
}
26+
}
27+
28+
fn has_feature(s: &str) -> bool {
29+
env::var(format!("CARGO_FEATURE_{}", s.to_uppercase())).is_ok()
30+
}
31+
32+
fn has_cuda_toolkit() -> bool {
33+
if let Ok(output) = Command::new("nvcc").arg("--version").output() {
34+
output.status.success()
35+
} else {
36+
false
37+
}
38+
}
39+
40+
fn is_cuda_engine() -> bool {
41+
has_feature("mistralrs") || has_feature("llamacpp")
42+
}
43+
44+
#[cfg(target_os = "macos")]
45+
fn is_mac() -> bool {
46+
true
47+
}
48+
49+
#[cfg(not(target_os = "macos"))]
50+
fn is_mac() -> bool {
51+
false
52+
}

launch/dynamo-run/src/main.rs

+21-1
Original file line numberDiff line numberDiff line change
@@ -108,6 +108,25 @@ fn main() -> anyhow::Result<()> {
108108
}
109109
}
110110
}
111+
#[cfg(any(feature = "mistralrs", feature = "llamacpp"))]
112+
{
113+
#[cfg(feature = "cuda")]
114+
{
115+
tracing::info!("CUDA on");
116+
}
117+
#[cfg(feature = "metal")]
118+
{
119+
tracing::info!("Metal on");
120+
}
121+
#[cfg(feature = "vulkan")]
122+
{
123+
tracing::info!("Vulkan on");
124+
}
125+
#[cfg(not(any(feature = "cuda", feature = "metal", feature = "vulkan")))]
126+
tracing::info!(
127+
"CPU mode. Rebuild with `--features cuda|metal|vulkan` for better performance"
128+
);
129+
}
111130

112131
// max_worker_threads and max_blocking_threads from env vars or config file.
113132
let rt_config = dynamo_runtime::RuntimeConfig::from_settings()?;
@@ -165,7 +184,8 @@ async fn wrapper(runtime: dynamo_runtime::Runtime) -> anyhow::Result<()> {
165184
None => {
166185
let default_engine = Output::default(); // smart default based on feature flags
167186
tracing::info!(
168-
"Using default engine: {default_engine}. Use out=<engine> to specify an engine."
187+
"Using default engine: {default_engine}. Use out=<engine> to specify one of {}",
188+
Output::available_engines().join(", ")
169189
);
170190
default_engine
171191
}

launch/dynamo-run/src/opt.rs

+8-8
Original file line numberDiff line numberDiff line change
@@ -204,20 +204,15 @@ impl fmt::Display for Output {
204204
}
205205
}
206206

207-
/// Returns the engine to use if user did not say on cmd line
208-
/// Uses whatever was compiled in, with a priority ordering.
207+
/// Returns the engine to use if user did not say on cmd line.
208+
/// Nearly always defaults to mistralrs which has no dependencies and we include by default.
209+
/// If built with --no-default-features and a specific engine, default to that.
209210
#[allow(unused_assignments, unused_mut)]
210211
impl Default for Output {
211212
fn default() -> Self {
212213
// Default if no engines
213214
let mut out = Output::EchoFull;
214215

215-
// Runs everywhere but needs local CUDA to build
216-
#[cfg(feature = "mistralrs")]
217-
{
218-
out = Output::MistralRs;
219-
}
220-
221216
#[cfg(feature = "llamacpp")]
222217
{
223218
out = Output::LlamaCpp;
@@ -233,6 +228,11 @@ impl Default for Output {
233228
out = Output::Vllm;
234229
}
235230

231+
#[cfg(feature = "mistralrs")]
232+
{
233+
out = Output::MistralRs;
234+
}
235+
236236
out
237237
}
238238
}

lib/llm/src/engines.rs

+9-3
Original file line numberDiff line numberDiff line change
@@ -51,11 +51,14 @@ impl Default for MultiNodeConfig {
5151
}
5252
}
5353

54-
#[cfg(feature = "python")]
54+
#[cfg(any(feature = "sglang", feature = "vllm", feature = "python"))]
5555
use pyo3::prelude::*;
5656

5757
/// On Mac embedded Python interpreters do not pick up the virtual env.
58-
#[cfg(all(target_os = "macos", feature = "python"))]
58+
#[cfg(all(
59+
target_os = "macos",
60+
any(feature = "sglang", feature = "vllm", feature = "python")
61+
))]
5962
fn fix_venv(venv: String, py: pyo3::Python<'_>) -> anyhow::Result<()> {
6063
let version_info = py.version_info();
6164
let sys: PyObject = py.import("sys")?.into();
@@ -69,5 +72,8 @@ fn fix_venv(venv: String, py: pyo3::Python<'_>) -> anyhow::Result<()> {
6972
Ok(())
7073
}
7174

72-
#[cfg(all(target_os = "linux", feature = "python"))]
75+
#[cfg(all(
76+
target_os = "linux",
77+
any(feature = "sglang", feature = "vllm", feature = "python")
78+
))]
7379
fn fix_venv(_venv: String, _py: Python<'_>) {}

0 commit comments

Comments
 (0)