llamastack · franciscojavierarceo · Oct 24, 2025 · Oct 25, 2025 · Oct 25, 2025 · Oct 24, 2025
@@ -99,7 +99,7 @@ curl -LsSf https://github.com/llamastack/llama-stack/raw/main/scripts/install.sh
 
 Llama Stack standardizes the core building blocks that simplify AI application development. It codifies best practices across the Llama ecosystem. More specifically, it provides
 
-- **Unified API layer** for Inference, RAG, Agents, Tools, Safety, Evals, and Telemetry.
+- **Unified API layer** for Inference, RAG, Agents, Tools, Safety, Evals.
 - **Plugin architecture** to support the rich ecosystem of different API implementations in various environments, including local development, on-premises, cloud, and mobile.
 - **Prepackaged verified distributions** which offer a one-stop solution for developers to get started quickly and reliably in any environment.
 - **Multiple developer interfaces** like CLI and SDKs for Python, Typescript, iOS, and Android.
@@ -125,34 +125,34 @@ By reducing friction and complexity, Llama Stack empowers developers to focus on
 Here is a list of the various API providers and available distributions that can help developers get started easily with Llama Stack.
 Please checkout for [full list](https://llamastack.github.io/docs/providers)
 
-| API Provider Builder | Environments | Agents | Inference | VectorIO | Safety | Telemetry | Post Training | Eval | DatasetIO |
-|:--------------------:|:------------:|:------:|:---------:|:--------:|:------:|:---------:|:-------------:|:----:|:--------:|
-|    Meta Reference    | Single Node | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ |
-|      SambaNova       | Hosted | | ✅ | | ✅ | | | | |
-|       Cerebras       | Hosted | | ✅ | | | | | | |
-|      Fireworks       | Hosted | ✅ | ✅ | ✅ | | | | | |
-|     AWS Bedrock      | Hosted | | ✅ | | ✅ | | | | |
-|       Together       | Hosted | ✅ | ✅ | | ✅ | | | | |
-|         Groq         | Hosted | | ✅ | | | | | | |
-|        Ollama        | Single Node | | ✅ | | | | | | |
-|         TGI          | Hosted/Single Node | | ✅ | | | | | | |
-|      NVIDIA NIM      | Hosted/Single Node | | ✅ | | ✅ | | | | |
-|       ChromaDB       | Hosted/Single Node | | | ✅ | | | | | |
-|        Milvus        | Hosted/Single Node | | | ✅ | | | | | |
-|        Qdrant        | Hosted/Single Node | | | ✅ | | | | | |
-|       Weaviate       | Hosted/Single Node | | | ✅ | | | | | |
-|      SQLite-vec      | Single Node | | | ✅ | | | | | |
-|      PG Vector       | Single Node | | | ✅ | | | | | |
-|  PyTorch ExecuTorch  | On-device iOS | ✅ | ✅ | | | | | | |
-|         vLLM         | Single Node | | ✅ | | | | | | |
-|        OpenAI        | Hosted | | ✅ | | | | | | |
-|      Anthropic       | Hosted | | ✅ | | | | | | |
-|        Gemini        | Hosted | | ✅ | | | | | | |
-|       WatsonX        | Hosted | | ✅ | | | | | | |
-|     HuggingFace      | Single Node | | | | | | ✅ | | ✅ |
-|      TorchTune       | Single Node | | | | | | ✅ | | |
-|     NVIDIA NEMO      | Hosted | | ✅ | ✅ | | | ✅ | ✅ | ✅ |
-|        NVIDIA        | Hosted | | | | | | ✅ | ✅ | ✅ |
+| API Provider Builder | Environments | Agents | Inference | VectorIO | Safety | Post Training | Eval | DatasetIO |
+|:--------------------:|:------------:|:------:|:---------:|:--------:|:------:|:-------------:|:----:|:--------:|
+|    Meta Reference    | Single Node | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ |
+|      SambaNova       | Hosted | | ✅ | | ✅ | | | |
+|       Cerebras       | Hosted | | ✅ | | | | | |
+|      Fireworks       | Hosted | ✅ | ✅ | ✅ | | | | |
+|     AWS Bedrock      | Hosted | | ✅ | | ✅ | | | |
+|       Together       | Hosted | ✅ | ✅ | | ✅ | | | |
+|         Groq         | Hosted | | ✅ | | | | | |
+|        Ollama        | Single Node | | ✅ | | | | | |
+|         TGI          | Hosted/Single Node | | ✅ | | | | | |
+|      NVIDIA NIM      | Hosted/Single Node | | ✅ | | ✅ | | | |
+|       ChromaDB       | Hosted/Single Node | | | ✅ | | | | |
+|        Milvus        | Hosted/Single Node | | | ✅ | | | | |
+|        Qdrant        | Hosted/Single Node | | | ✅ | | | | |
+|       Weaviate       | Hosted/Single Node | | | ✅ | | | | |
+|      SQLite-vec      | Single Node | | | ✅ | | | | |
+|      PG Vector       | Single Node | | | ✅ | | | | |
+|  PyTorch ExecuTorch  | On-device iOS | ✅ | ✅ | | | | | |
+|         vLLM         | Single Node | | ✅ | | | | | |
+|        OpenAI        | Hosted | | ✅ | | | | | |
+|      Anthropic       | Hosted | | ✅ | | | | | |
+|        Gemini        | Hosted | | ✅ | | | | | |
+|       WatsonX        | Hosted | | ✅ | | | | | |
+|     HuggingFace      | Single Node | | | | | ✅ | | ✅ |
+|      TorchTune       | Single Node | | | | | ✅ | | |
+|     NVIDIA NEMO      | Hosted | | ✅ | ✅ | | ✅ | ✅ | ✅ |
+|        NVIDIA        | Hosted | | | | | ✅ | ✅ | ✅ |
 
 > **Note**: Additional providers are available through external packages. See [External Providers](https://llamastack.github.io/docs/providers/external) documentation.
 

@@ -6,7 +6,6 @@ apis:
 - inference
 - files
 - safety
-- telemetry
 - tool_runtime
 - vector_io
 providers:
@@ -63,12 +62,6 @@ providers:
           backend: sql_default
           max_write_queue_size: 10000
           num_writers: 4
-  telemetry:
-  - provider_id: meta-reference
-    provider_type: inline::meta-reference
-    config:
-      service_name: "${env.OTEL_SERVICE_NAME:=\u200B}"
-      sinks: ${env.TELEMETRY_SINKS:=console}
   tool_runtime:
   - provider_id: brave-search
     provider_type: remote::brave-search
@@ -80,8 +73,8 @@ providers:
     config:
       api_key: ${env.TAVILY_SEARCH_API_KEY:+}
       max_results: 3
-  - provider_id: rag-runtime
-    provider_type: inline::rag-runtime
+  - provider_id: file_search-runtime
+    provider_type: inline::file_search-runtime
     config: {}
   - provider_id: model-context-protocol
     provider_type: remote::model-context-protocol
@@ -134,8 +127,8 @@ registered_resources:
   tool_groups:
   - toolgroup_id: builtin::websearch
     provider_id: tavily-search
-  - toolgroup_id: builtin::rag
-    provider_id: rag-runtime
+  - toolgroup_id: builtin::file_search
+    provider_id: file_search-runtime
 server:
   port: 8323
 telemetry:

@@ -121,9 +121,9 @@ resources:
       tool_response_message: ToolResponseMessage
       system_message: SystemMessage
       tool_call: ToolCall
-      query_result: RAGQueryResult
+      query_result: FileSearchResult
       document: RAGDocument
-      query_config: RAGQueryConfig
+      query_config: FileSearchConfig
       response_format: ResponseFormat
   toolgroups:
     models:
@@ -153,8 +153,8 @@ resources:
     subresources:
       rag_tool:
         methods:
-          insert: post /v1/tool-runtime/rag-tool/insert
-          query: post /v1/tool-runtime/rag-tool/query
+          insert: post /v1/tool-runtime/file_search-tool/insert
+          query: post /v1/tool-runtime/file_search-tool/query
 
   responses:
     models: