feat(dynamo-run): Various UX improvements (ai-dynamo#168)

grahamking · web-flow · commit bd38279d2e02 · 2025-03-14T23:23:00.000Z
Engines mistralrs, sglang and vllm included by default. Can be disabled like this: `cargo build --no-default-features --features &lt;add-back-what-you-want&gt;`.

Added `--feature vulkan` option, for llamacpp.

Build time message if CUDA or Metal would help and are missing. That's the best we can do:
&gt; warning: dynamo-run@0.1.0: CUDA not enabled, re-run with `--features cuda`

Runtime message if CUDA, Metal or Vulkan are enabled:
&gt; 2025-03-14T21:59:26.501937Z  INFO dynamo_run: CUDA on

Runtime message if they are missing:
&gt; 2025-03-14T22:02:37.439404Z  INFO dynamo_run: CPU mode. Rebuild with `--features cuda|metal|vulkan` for better performance

Defaut engine message includes available engines:
&gt; 2025-03-14T21:59:26.503612Z  INFO dynamo_run: Using default engine: mistralrs. Use out=&lt;engine&gt; to specify one of echo_core, echo_full, mistralrs, llamacpp, sglang, vllm, pystr, pytok

The really important outcome is that this should now "just work":
```
cargo install dynamo-run
dynamo-run Qwen/Qwen2.5-3B-Instruct
```

Sadly you still need `--features cuda|metal` for performance, I couldn't automate that.
diff --git a/.github/workflows/pre-merge-rust.yml b/.github/workflows/pre-merge-rust.yml
@@ -84,7 +84,7 @@ jobs:
       working-directory: ${{ matrix.dir }}
       run: |
         cargo-deny --version || cargo install cargo-deny@0.16.4
-        cargo-deny check --hide-inclusion-graph licenses --config ${{ github.workspace }}/deny.toml
+        cargo-deny --no-default-features check --hide-inclusion-graph licenses --config ${{ github.workspace }}/deny.toml
     - name: Run Unit Tests
       working-directory: ${{ matrix.dir }}
       run: cargo test --locked --all-targets
diff --git a/deny.toml b/deny.toml
@@ -28,10 +28,20 @@ allow = [
     "OpenSSL",
     "Unicode-3.0",
     "BSL-1.0",
-    "MPL-2.0"
+    "MPL-2.0",
+    "MIT-0"
 ]
 
+# TODO exceptions
+# MIT: https://github.com/guidance-ai/llguidance
+#  "llguidance",
+# MIT: https://github.com/guidance-ai/llguidance/toktrie
+#  "toktrie",
+# MIT: https://github.com/guidance-ai/llguidance/toktrie_hf_tokenizers
+#  "toktrie_hf_tokenizers",
+
 [[licenses.clarify]]
+
 name = "ring"
 expression = "MIT AND ISC AND OpenSSL"
 license-files = [
diff --git a/launch/dynamo-run/Cargo.toml b/launch/dynamo-run/Cargo.toml
@@ -23,14 +23,18 @@ license.workspace = true
 repository.workspace = true
 
 [features]
+# Build with `--no-default-features` to disable these defaults
+default = ["mistralrs", "vllm", "sglang"]
 mistralrs = ["dynamo-llm/mistralrs"]
 sglang = ["dynamo-llm/sglang", "dep:netlink-packet-route", "dep:rtnetlink"]
 vllm = ["dynamo-llm/vllm", "dep:netlink-packet-route", "dep:rtnetlink"]
+# We don't include llamacpp by default until we figure out when it needs external libraries
 llamacpp = ["dynamo-llm/llamacpp"]
 trtllm = ["dynamo-llm/trtllm"]
 python = ["dynamo-llm/python"]
 cuda = ["dynamo-llm/cuda"]
 metal = ["dynamo-llm/metal"]
+vulkan = ["dynamo-llm/vulkan"]
 
 [dependencies]
 dynamo-llm = { workspace = true }
@@ -55,4 +59,4 @@ tracing-subscriber = { version = "0.3", features = ["env-filter", "local-time",
 
 [target.x86_64-unknown-linux-gnu.dependencies]
 netlink-packet-route = { version = "0.19", optional = true }
-rtnetlink = { version = "0.14", optional = true }
+rtnetlink = { version = "0.14", optional = true }
diff --git a/launch/dynamo-run/build.rs b/launch/dynamo-run/build.rs
@@ -0,0 +1,52 @@
+// SPDX-FileCopyrightText: Copyright (c) 2024-2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+// SPDX-License-Identifier: Apache-2.0
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+use std::env;
+use std::process::Command;
+
+fn main() {
+    if has_cuda_toolkit() && !has_feature("cuda") && is_cuda_engine() {
+        println!("cargo:warning=CUDA not enabled, re-run with `--features cuda`");
+    }
+    if is_mac() && !has_feature("metal") {
+        println!("cargo:warning=Metal not enabled, re-run with `--features metal`");
+    }
+}
+
+fn has_feature(s: &str) -> bool {
+    env::var(format!("CARGO_FEATURE_{}", s.to_uppercase())).is_ok()
+}
+
+fn has_cuda_toolkit() -> bool {
+    if let Ok(output) = Command::new("nvcc").arg("--version").output() {
+        output.status.success()
+    } else {
+        false
+    }
+}
+
+fn is_cuda_engine() -> bool {
+    has_feature("mistralrs") || has_feature("llamacpp")
+}
+
+#[cfg(target_os = "macos")]
+fn is_mac() -> bool {
+    true
+}
+
+#[cfg(not(target_os = "macos"))]
+fn is_mac() -> bool {
+    false
+}
diff --git a/launch/dynamo-run/src/main.rs b/launch/dynamo-run/src/main.rs
@@ -108,6 +108,25 @@ fn main() -> anyhow::Result<()> {
             }
         }
     }
+    #[cfg(any(feature = "mistralrs", feature = "llamacpp"))]
+    {
+        #[cfg(feature = "cuda")]
+        {
+            tracing::info!("CUDA on");
+        }
+        #[cfg(feature = "metal")]
+        {
+            tracing::info!("Metal on");
+        }
+        #[cfg(feature = "vulkan")]
+        {
+            tracing::info!("Vulkan on");
+        }
+        #[cfg(not(any(feature = "cuda", feature = "metal", feature = "vulkan")))]
+        tracing::info!(
+            "CPU mode. Rebuild with `--features cuda|metal|vulkan` for better performance"
+        );
+    }
 
     // max_worker_threads and max_blocking_threads from env vars or config file.
     let rt_config = dynamo_runtime::RuntimeConfig::from_settings()?;
@@ -165,7 +184,8 @@ async fn wrapper(runtime: dynamo_runtime::Runtime) -> anyhow::Result<()> {
         None => {
             let default_engine = Output::default(); // smart default based on feature flags
             tracing::info!(
-                "Using default engine: {default_engine}. Use out=<engine> to specify an engine."
+                "Using default engine: {default_engine}. Use out=<engine> to specify one of {}",
+                Output::available_engines().join(", ")
             );
             default_engine
         }
diff --git a/launch/dynamo-run/src/opt.rs b/launch/dynamo-run/src/opt.rs
@@ -204,20 +204,15 @@ impl fmt::Display for Output {
     }
 }
 
-/// Returns the engine to use if user did not say on cmd line
-/// Uses whatever was compiled in, with a priority ordering.
+/// Returns the engine to use if user did not say on cmd line.
+/// Nearly always defaults to mistralrs which has no dependencies and we include by default.
+/// If built with --no-default-features and a specific engine, default to that.
 #[allow(unused_assignments, unused_mut)]
 impl Default for Output {
     fn default() -> Self {
         // Default if no engines
         let mut out = Output::EchoFull;
 
-        // Runs everywhere but needs local CUDA to build
-        #[cfg(feature = "mistralrs")]
-        {
-            out = Output::MistralRs;
-        }
-
         #[cfg(feature = "llamacpp")]
         {
             out = Output::LlamaCpp;
@@ -233,6 +228,11 @@ impl Default for Output {
             out = Output::Vllm;
         }
 
+        #[cfg(feature = "mistralrs")]
+        {
+            out = Output::MistralRs;
+        }
+
         out
     }
 }
diff --git a/lib/llm/src/engines.rs b/lib/llm/src/engines.rs
@@ -51,11 +51,14 @@ impl Default for MultiNodeConfig {
     }
 }
 
-#[cfg(feature = "python")]
+#[cfg(any(feature = "sglang", feature = "vllm", feature = "python"))]
 use pyo3::prelude::*;
 
 /// On Mac embedded Python interpreters do not pick up the virtual env.
-#[cfg(all(target_os = "macos", feature = "python"))]
+#[cfg(all(
+    target_os = "macos",
+    any(feature = "sglang", feature = "vllm", feature = "python")
+))]
 fn fix_venv(venv: String, py: pyo3::Python<'_>) -> anyhow::Result<()> {
     let version_info = py.version_info();
     let sys: PyObject = py.import("sys")?.into();
@@ -69,5 +72,8 @@ fn fix_venv(venv: String, py: pyo3::Python<'_>) -> anyhow::Result<()> {
     Ok(())
 }
 
-#[cfg(all(target_os = "linux", feature = "python"))]
+#[cfg(all(
+    target_os = "linux",
+    any(feature = "sglang", feature = "vllm", feature = "python")
+))]
 fn fix_venv(_venv: String, _py: Python<'_>) {}

Original file line number	Diff line number	Diff line change
`@@ -204,20 +204,15 @@ impl fmt::Display for Output {`
`204`	`204`	`}`
`205`	`205`	`}`
`206`	`206`
`207`		`-/// Returns the engine to use if user did not say on cmd line`
`208`		`-/// Uses whatever was compiled in, with a priority ordering.`
	`207`	`+/// Returns the engine to use if user did not say on cmd line.`
	`208`	`+/// Nearly always defaults to mistralrs which has no dependencies and we include by default.`
	`209`	`+/// If built with --no-default-features and a specific engine, default to that.`
`209`	`210`	`#[allow(unused_assignments, unused_mut)]`
`210`	`211`	`impl Default for Output {`
`211`	`212`	`fn default() -> Self {`
`212`	`213`	`// Default if no engines`
`213`	`214`	`let mut out = Output::EchoFull;`
`214`	`215`
`215`		`- // Runs everywhere but needs local CUDA to build`
`216`		`- #[cfg(feature = "mistralrs")]`
`217`		`- {`
`218`		`- out = Output::MistralRs;`
`219`		`- }`
`220`		`-`
`221`	`216`	`#[cfg(feature = "llamacpp")]`
`222`	`217`	`{`
`223`	`218`	`out = Output::LlamaCpp;`
`@@ -233,6 +228,11 @@ impl Default for Output {`
`233`	`228`	`out = Output::Vllm;`
`234`	`229`	`}`
`235`	`230`
	`231`	`+ #[cfg(feature = "mistralrs")]`
	`232`	`+ {`
	`233`	`+ out = Output::MistralRs;`
	`234`	`+ }`
	`235`	`+`
`236`	`236`	`out`
`237`	`237`	`}`
`238`	`238`	`}`