diff --git a/moxin-backend/src/fake_data.rs b/moxin-backend/src/fake_data.rs index 752d7a1c..fa7fdea6 100644 --- a/moxin-backend/src/fake_data.rs +++ b/moxin-backend/src/fake_data.rs @@ -4,58 +4,72 @@ use chrono::NaiveDate; pub fn get_models() -> Vec { let open_hermes_files = vec![ File { + id: "1".to_string(), name: "openhermes-2.5-mistral-7b.Q2_K.gguf".to_string(), size: "3.08 GB".to_string(), quantization: "Q2_K".to_string(), downloaded: false, + downloaded_path: None, tags: vec![], featured: false, }, File { + id: "2".to_string(), name: "openhermes-2.5-mistral-7b.Q3_K_S.gguf".to_string(), size: "3.16 GB".to_string(), quantization: "Q3_K_S".to_string(), downloaded: false, + downloaded_path: None, tags: vec![], featured: false, }, File { + id: "3".to_string(), name: "openhermes-2.5-mistral-7b.Q3_K_M.gguf".to_string(), size: "3.52 GB".to_string(), quantization: "Q3_K_M".to_string(), downloaded: false, + downloaded_path: None, tags: vec![], featured: false, }, File { + id: "4".to_string(), name: "openhermes-2.5-mistral-7b.Q3_K_L.gguf".to_string(), size: "3.82 GB".to_string(), quantization: "Q3_K_M".to_string(), downloaded: false, + downloaded_path: None, tags: vec![], featured: false, }, File { + id: "5".to_string(), name: "openhermes-2.5-mistral-7b.Q4_0.gguf".to_string(), size: "4.11 GB".to_string(), quantization: "Q4_0".to_string(), downloaded: false, + downloaded_path: None, tags: vec![], featured: false, }, File { + id: "6".to_string(), name: "stablelm-zephyr-3b.Q4_K_S.gguf".to_string(), size: "1.62 GB".to_string(), quantization: "Q4_K_S".to_string(), downloaded: true, + downloaded_path: Some("/home/user/.moxin/stablelm-zephyr-3b.Q4_K_S.gguf".to_string()), tags: vec!["Small & Fast".to_string()], featured: true, }, File { + id: "7".to_string(), name: "stablelm-zephyr-3b.Q6_K.gguf".to_string(), size: "2.30 GB".to_string(), quantization: "Q6_K".to_string(), downloaded: false, + downloaded_path: None, tags: vec!["Less Compressed".to_string(), "Might be slower".to_string()], featured: true, }, @@ -63,18 +77,22 @@ pub fn get_models() -> Vec { let nexus_raven_files = vec![ File { + id: "8".to_string(), name: "nexusraven-v2-13b.Q4_K_S.gguf".to_string(), size: "7.41 GB".to_string(), quantization: "Q4_K_S".to_string(), downloaded: false, + downloaded_path: None, tags: vec!["Small & Fast".to_string()], featured: true, }, File { + id: "9".to_string(), name: "nexusraven-v2-13b.Q6_K.gguf".to_string(), size: "10.68 GB".to_string(), quantization: "Q6_K".to_string(), downloaded: true, + downloaded_path: Some("/home/user/.moxin/nexusraven-v2-13b.Q6_K.gguf".to_string()), tags: vec!["Less Compressed".to_string(), "Might be slower".to_string()], featured: true, }, @@ -82,18 +100,22 @@ pub fn get_models() -> Vec { let stable_lm_files = vec![ File { + id: "10".to_string(), name: "nexusraven-v2-13b.Q4_K_S.gguf".to_string(), size: "1.62 GB".to_string(), quantization: "Q4_K_S".to_string(), downloaded: true, + downloaded_path: Some("/home/user/.moxin/nexusraven-v2-13b.Q4_K_S.gguf".to_string()), tags: vec!["Small & Fast".to_string()], featured: true, }, File { + id: "11".to_string(), name: "nexusraven-v2-13b.Q6_K.gguf".to_string(), size: "2.30 GB".to_string(), quantization: "Q6_K".to_string(), downloaded: false, + downloaded_path: None, tags: vec!["Less Compressed".to_string(), "Might be slower".to_string()], featured: true, }, @@ -101,10 +123,12 @@ pub fn get_models() -> Vec { let qwen_files = vec![ File { + id: "12".to_string(), name: "qwen1_5-7b-chat-q5_k_m.gguf".to_string(), size: "2.30 GB".to_string(), quantization: "Q5_K_M".to_string(), downloaded: false, + downloaded_path: None, tags: vec!["Less Compressed".to_string(), "Might be slower".to_string()], featured: true, }, diff --git a/moxin-backend/src/lib.rs b/moxin-backend/src/lib.rs index 62fb9f73..a2ade55b 100644 --- a/moxin-backend/src/lib.rs +++ b/moxin-backend/src/lib.rs @@ -1,11 +1,10 @@ mod fake_data; use std::sync::mpsc; -use moxin_protocol::protocol::{Command, Response}; +use moxin_protocol::protocol::Command; pub struct Backend { pub command_sender: mpsc::Sender, - pub response_receiver: mpsc::Receiver, } impl Default for Backend { @@ -17,18 +16,18 @@ impl Default for Backend { impl Backend { pub fn new() -> Backend { let (command_sender, command_receiver) = mpsc::channel(); - let (response_sender, response_receiver) = mpsc::channel(); // The backend thread std::thread::spawn(move || { loop { if let Ok(command) = command_receiver.recv() { match command { - Command::GetFeaturedModels => { + Command::GetFeaturedModels(tx) => { let models = fake_data::get_models(); - response_sender.send(Response::FeaturedModels(models)).unwrap(); + tx.send(Ok(models)).unwrap(); + //tx.send(Err(anyhow!("Database query failed"))).unwrap(); } - Command::SearchModels(query) => { + Command::SearchModels(query, _tx) => { println!("Searching for models with query: {}", query); } _ => {} @@ -37,6 +36,6 @@ impl Backend { } }); - Backend { command_sender, response_receiver } + Backend { command_sender } } } \ No newline at end of file diff --git a/moxin-frontend/Cargo.toml b/moxin-frontend/Cargo.toml index 9e5230b3..5325aafa 100644 --- a/moxin-frontend/Cargo.toml +++ b/moxin-frontend/Cargo.toml @@ -10,4 +10,4 @@ moxin-protocol = { path = "../moxin-protocol" } moxin-backend = { path = "../moxin-backend" } ## makepad-widgets = { git = "https://github.com/makepad/makepad", branch = "rik" } makepad-widgets = { path = "../../makepad/widgets", version = "0.6.0" } -chrono = "0.4" +chrono = "0.4" \ No newline at end of file diff --git a/moxin-frontend/src/data/store.rs b/moxin-frontend/src/data/store.rs index cd1819a3..08160e89 100644 --- a/moxin-frontend/src/data/store.rs +++ b/moxin-frontend/src/data/store.rs @@ -1,7 +1,8 @@ use chrono::Utc; use moxin_protocol::data::{Model, File}; -use moxin_protocol::protocol::{Command, Response}; +use moxin_protocol::protocol::Command; use moxin_backend::Backend; +use std::sync::mpsc::channel; #[derive(Default)] pub struct Store { @@ -20,10 +21,17 @@ impl Store { backend: Backend::default(), }; - store.backend.command_sender.send(Command::GetFeaturedModels).unwrap(); - if let Ok(response) = store.backend.response_receiver.recv() { - if let Response::FeaturedModels(models) = response { - store.models = models; + let (tx, rx) = channel(); + store + .backend + .command_sender + .send(Command::GetFeaturedModels(tx)) + .unwrap(); + + if let Ok(response) = rx.recv() { + match response { + Ok(models) => store.models = models, + Err(err) => eprintln!("Error fetching models: {:?}", err), } }; diff --git a/moxin-protocol/Cargo.toml b/moxin-protocol/Cargo.toml index 1af5ac05..73c7d1db 100644 --- a/moxin-protocol/Cargo.toml +++ b/moxin-protocol/Cargo.toml @@ -6,4 +6,6 @@ edition = "2021" # See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html [dependencies] +anyhow = "1.0" chrono = "0.4" +serde = { version = "1.0", features = ["derive"] } diff --git a/moxin-protocol/src/data.rs b/moxin-protocol/src/data.rs index d40f4376..f7f66e67 100644 --- a/moxin-protocol/src/data.rs +++ b/moxin-protocol/src/data.rs @@ -1,11 +1,16 @@ use chrono::NaiveDate; +pub type FileID = String; +pub type ModelID = String; + #[derive(Debug, Clone, Default)] pub struct File { + pub id: FileID, pub name: String, pub size: String, pub quantization: String, pub downloaded: bool, + pub downloaded_path: Option, pub tags: Vec, pub featured: bool, } @@ -17,12 +22,27 @@ pub struct Author { pub description: String, } +#[derive(Clone, Debug)] +pub enum CompatibilityGuess { + PossiblySupported, + NotSupported, +} + +#[derive(Clone, Debug)] +pub struct DownloadedFile { + pub file: File, + pub model: Model, + pub downloaded_at: NaiveDate, + pub compatibility_guess: CompatibilityGuess, + pub information: String, +} + // We're using the HuggingFace identifier as the model ID for now // We should consider using a different identifier in the future if more // models sources are added. #[derive(Debug, Clone, Default)] pub struct Model { - pub id: String, + pub id: ModelID, pub name: String, pub summary: String, pub size: String, diff --git a/moxin-protocol/src/lib.rs b/moxin-protocol/src/lib.rs index 76f395da..6db536ff 100644 --- a/moxin-protocol/src/lib.rs +++ b/moxin-protocol/src/lib.rs @@ -1,2 +1,3 @@ pub mod data; -pub mod protocol; \ No newline at end of file +pub mod protocol; +pub mod open_ai; \ No newline at end of file diff --git a/moxin-protocol/src/open_ai.rs b/moxin-protocol/src/open_ai.rs new file mode 100644 index 00000000..37b8970f --- /dev/null +++ b/moxin-protocol/src/open_ai.rs @@ -0,0 +1,142 @@ +use std::collections::HashMap; +use serde::{Deserialize, Serialize}; +use crate::data::*; + +#[derive(Clone, Debug, Serialize, Deserialize)] +pub struct Message { + pub content: String, + pub role: String, + pub name: Option, +} + +// Based on https://platform.openai.com/docs/api-reference/chat/object +#[derive(Clone, Debug, Serialize, Deserialize)] +pub struct ChatRequestData { + pub messages: Vec, + + // Not really necesary but it is part of the OpenAI API. We are going to send the id + // of the model currently loaded. + pub model: ModelID, + + pub frequency_penalty: Option, + pub logprobs: Option, + pub top_logprobs: Option, + pub max_tokens: Option, + pub presence_penalty: Option, + pub seed: Option, + pub stop: Option>, + pub stream: Option, + pub temperature: Option, + pub top_p: Option, + + // Adding the following fields since there are part of the OpenAI API, + // but are not likely to be used in the first version of the client + pub n: Option, + pub logit_bias: Option>, +} + +// Shared structs for ChatResponse and ChatResponseChunk + +#[derive(Clone, Debug, Serialize, Deserialize)] +pub struct MessageData { + pub content: String, + pub role: String, +} + +#[derive(Clone, Debug, Serialize, Deserialize)] +pub struct TopLogProbsItemData { + pub token: String, + pub logprob: f32, + pub bytes: Option>, +} + +#[derive(Clone, Debug, Serialize, Deserialize)] +pub struct LogProbsItemData { + pub token: String, + pub logprob: f32, + pub bytes: Option>, + pub top_logprobs: Vec, +} + +#[derive(Clone, Debug, Serialize, Deserialize)] +pub struct LogProbsData { + pub content: Vec, +} + +#[derive(Clone, Debug, Serialize, Deserialize)] +pub enum StopReason { + #[serde(rename = "stop")] + Stop, + #[serde(rename = "length")] + Length, + #[serde(rename = "content_filter")] + ContentFilter +} + +// ChatResponse structs + +#[derive(Clone, Debug, Serialize, Deserialize)] +pub struct ChoiceData { + pub finish_reason: StopReason, + pub index: u32, + pub message: MessageData, + pub logprobs: Option, +} + +#[derive(Clone, Debug, Serialize, Deserialize)] +pub struct UsageData { + pub completion_tokens: u32, + pub prompt_tokens: u32, + pub total_tokens: u32, +} + +#[derive(Clone, Debug, Serialize, Deserialize)] +pub struct ChatResponseData { + pub id: String, + pub choices: Vec, + pub created: u32, + pub model: ModelID, + pub system_fingerprint: String, + pub usage: UsageData, + + #[serde(default = "response_object")] + pub object: String, +} + +fn response_object() -> String { + "chat.completion".to_string() +} + +// ChatResponseChunk structs + +#[derive(Clone, Debug, Serialize, Deserialize)] +pub struct ChunkChoiceData { + pub finish_reason: StopReason, + pub index: u32, + pub delta: MessageData, + pub logprobs: Option, +} + +#[derive(Clone, Debug, Serialize, Deserialize)] +pub struct ChatResponseChunkData { + pub id: String, + pub choices: Vec, + pub created: u32, + pub model: ModelID, + pub system_fingerprint: String, + + #[serde(default = "response_chunk_object")] + pub object: String, +} + +fn response_chunk_object() -> String { + "chat.completion.chunk".to_string() +} + +#[derive(Clone, Debug)] +pub enum ChatResponse { + // https://platform.openai.com/docs/api-reference/chat/object + ChatFinalResponseData(ChatResponseData), + // https://platform.openai.com/docs/api-reference/chat/streaming + ChatResponseChunk(ChatResponseChunkData), +} \ No newline at end of file diff --git a/moxin-protocol/src/protocol.rs b/moxin-protocol/src/protocol.rs index 93ba11a9..7ef988a7 100644 --- a/moxin-protocol/src/protocol.rs +++ b/moxin-protocol/src/protocol.rs @@ -1,21 +1,99 @@ -use crate::data::Model; +use std::sync::mpsc::Sender; +use anyhow::Result; +use crate::data::*; +use crate::open_ai::*; #[derive(Clone, Debug)] -pub enum Command { - GetFeaturedModels, +pub enum FileDownloadResponse { + Progress(FileID, f32), + Completed(DownloadedFile), +} - // The argument is a string with the keywords to search for. - SearchModels(String), +#[derive(Clone, Debug)] +pub enum ContextOverflowPolicy { + StopAtLimit, + TruncateMiddle, + TruncatePastMessages, +} - // The argument is the File name. - DownloadFile(String), +#[derive(Clone, Debug)] +pub enum GPULayers { + Specific(u32), + Max, +} + +#[derive(Clone, Debug)] +pub struct LoadModelOptions { + pub prompt_template: Option, + pub gpu_layers: GPULayers, + pub use_mlock: bool, + pub n_batch: u32, + pub n_ctx: u32, + pub rope_freq_scale: f32, + pub rope_freq_base: f32, + + // TBD Not really sure if this is something backend manages or if it is matter of + // the client (if it is done by tweaking the JSON payload for the chat completition) + pub context_overflow_policy: ContextOverflowPolicy +} + +#[derive(Clone, Debug)] +pub struct LoadedModelInfo { + pub file_id: FileID, + pub model_id: ModelID, + + // JSON formatted string with the model information. See "Model Inspector" in LMStudio. + pub information: String, +} + +#[derive(Clone, Debug)] +pub struct ModelResourcesInfo { + pub ram_usage: f32, + pub cpu_usage: f32, } #[derive(Clone, Debug)] -pub enum Response { - // Response to the GetFeaturedModels command - FeaturedModels(Vec), +pub enum LoadModelResponse { + Progress(FileID, f32), + Completed(LoadedModelInfo), + ModelResoucesUsage(ModelResourcesInfo), +} + +#[derive(Clone, Debug)] +pub struct LocalServerConfig { + pub port: u16, + pub cors: bool, + pub request_queuing: bool, + pub verbose_server_logs: bool, + pub apply_prompt_formatting: bool, +} + +#[derive(Clone, Debug)] +pub enum LocalServerResponse { + Started, + Log(String), +} + +#[derive(Clone, Debug)] +pub enum Command { + GetFeaturedModels(Sender>>), + + // The argument is a string with the keywords to search for. + SearchModels(String, Sender>>), + + DownloadFile(FileID, Sender>), + GetDownloadedFiles(Sender>>), + + LoadModel(FileID, LoadModelOptions, Sender>), + + // Eject currently loaded model, if any is provided + EjectModel(Sender>), + + Chat(ChatRequestData, Sender>), + StopChatCompletion(Sender>), - // Response to the SearchModels command - ModelsSearchResults(Vec), + // Command to start a local server to interact with chat models + StartLocalServer(LocalServerConfig, Sender>), + // Command to stop the local server + StopLocalServer(Sender>), } \ No newline at end of file