From 1bf7df11fcc4d821446d3cf487db23bbfc10fa05 Mon Sep 17 00:00:00 2001 From: Chris Bruner Date: Sat, 22 Feb 2025 18:12:52 -0500 Subject: [PATCH 1/3] Now get's list of models from ollama instead of hardcoding them. --- src/utils/utils.py | 77 ++++++++++++++++++++++++++++++++++++++++------ 1 file changed, 67 insertions(+), 10 deletions(-) diff --git a/src/utils/utils.py b/src/utils/utils.py index b604812b..9b9c75d4 100644 --- a/src/utils/utils.py +++ b/src/utils/utils.py @@ -2,8 +2,9 @@ import os import time from pathlib import Path -from typing import Dict, Optional +from typing import Dict, Optional, List import requests +from ollama import ListResponse, list from langchain_anthropic import ChatAnthropic from langchain_mistralai import ChatMistralAI @@ -24,10 +25,38 @@ "moonshot": "MoonShot" } +def get_ollama_models() -> List[str]: + """ + Fetch available models from Ollama server + Returns a list of model names or empty list if Ollama is not available + """ + try: + response: ListResponse = list() + return [model.model for model in response.models] + except Exception as e: + print(f"Warning: Could not fetch Ollama models: {e}") + return [] + +def update_model_names(): + """ + Update the model_names dictionary with current Ollama models + """ + global model_names + ollama_models = get_ollama_models() + + # Create a copy of the original dictionary + updated_models = model_names.copy() + + # Update Ollama models if we successfully fetched them + if ollama_models: + updated_models["ollama"] = ollama_models + + return updated_models + def get_llm_model(provider: str, **kwargs): """ - 获取LLM 模型 - :param provider: 模型类型 + Get LLM model + :param provider: model type :param kwargs: :return: """ @@ -150,7 +179,6 @@ def get_llm_model(provider: str, **kwargs): base_url=base_url, api_key=api_key, ) - elif provider == "moonshot": return ChatOpenAI( model=kwargs.get("model_name", "moonshot-v1-32k-vision-preview"), @@ -167,14 +195,17 @@ def get_llm_model(provider: str, **kwargs): "openai": ["gpt-4o", "gpt-4", "gpt-3.5-turbo", "o3-mini"], "deepseek": ["deepseek-chat", "deepseek-reasoner"], "google": ["gemini-2.0-flash", "gemini-2.0-flash-thinking-exp", "gemini-1.5-flash-latest", "gemini-1.5-flash-8b-latest", "gemini-2.0-flash-thinking-exp-01-21", "gemini-2.0-pro-exp-02-05"], - "ollama": ["qwen2.5:7b", "qwen2.5:14b", "qwen2.5:32b", "qwen2.5-coder:14b", "qwen2.5-coder:32b", "llama2:7b", "deepseek-r1:14b", "deepseek-r1:32b"], "azure_openai": ["gpt-4o", "gpt-4", "gpt-3.5-turbo"], "mistral": ["pixtral-large-latest", "mistral-large-latest", "mistral-small-latest", "ministral-8b-latest"], "alibaba": ["qwen-plus", "qwen-max", "qwen-turbo", "qwen-long"], "moonshot": ["moonshot-v1-32k-vision-preview", "moonshot-v1-8k-vision-preview"], + # Ollama models will be populated dynamically + "ollama": [] } -# Callback to update the model name dropdown based on the selected provider +# Update model_names with current Ollama models +model_names = update_model_names() + def update_model_dropdown(llm_provider, api_key=None, base_url=None): """ Update the model name dropdown with predefined models for the selected provider. @@ -185,11 +216,37 @@ def update_model_dropdown(llm_provider, api_key=None, base_url=None): if not base_url: base_url = os.getenv(f"{llm_provider.upper()}_BASE_URL", "") - # Use predefined models for the selected provider + if llm_provider == "ollama": + # Refresh Ollama models list when provider is selected + current_models = get_ollama_models() + if current_models: + return gr.Dropdown( + choices=current_models, + value=current_models[0] if current_models else "", + interactive=True + ) + else: + return gr.Dropdown( + choices=[], + value="", + interactive=True, + allow_custom_value=True + ) + + # Use predefined models for other providers if llm_provider in model_names: - return gr.Dropdown(choices=model_names[llm_provider], value=model_names[llm_provider][0], interactive=True) + return gr.Dropdown( + choices=model_names[llm_provider], + value=model_names[llm_provider][0], + interactive=True + ) else: - return gr.Dropdown(choices=[], value="", interactive=True, allow_custom_value=True) + return gr.Dropdown( + choices=[], + value="", + interactive=True, + allow_custom_value=True + ) def handle_api_key_error(provider: str, env_var: str): """ @@ -208,7 +265,6 @@ def encode_image(img_path): image_data = base64.b64encode(fin.read()).decode("utf-8") return image_data - def get_latest_files(directory: str, file_types: list = ['.webm', '.zip']) -> Dict[str, Optional[str]]: """Get the latest recording and trace files""" latest_files: Dict[str, Optional[str]] = {ext: None for ext in file_types} @@ -229,6 +285,7 @@ def get_latest_files(directory: str, file_types: list = ['.webm', '.zip']) -> Di print(f"Error getting latest {file_type} file: {e}") return latest_files + async def capture_screenshot(browser_context): """Capture and encode a screenshot""" # Extract the Playwright browser instance From 77ce73df2a78107b96576d8889ae265069b5ada8 Mon Sep 17 00:00:00 2001 From: Chris Bruner Date: Mon, 24 Feb 2025 01:56:40 -0500 Subject: [PATCH 2/3] Improve browser navigation and error handling - Add isolation of navigation actions to ensure page loads completely before further actions - Add browser context validation to catch initialization errors early - Add state validation to ensure browser state is available before proceeding The navigation isolation particularly helps with Ollama models where sequential actions might be attempted before page load completes. Browser context and state validation provides earlier, clearer error messages for common failure cases. --- src/agent/custom_agent.py | 11 +++++++++++ src/agent/custom_views.py | 2 +- 2 files changed, 12 insertions(+), 1 deletion(-) diff --git a/src/agent/custom_agent.py b/src/agent/custom_agent.py index bfeb33ca..61c4ac5f 100644 --- a/src/agent/custom_agent.py +++ b/src/agent/custom_agent.py @@ -245,6 +245,14 @@ async def get_next_action(self, input_messages: list[BaseMessage]) -> AgentOutpu if parsed is None: logger.debug(ai_message.content) raise ValueError('Could not parse response.') + + if len(parsed.action) > 0: + first_action = parsed.action[0] + if hasattr(first_action, 'go_to_url'): + logger.info("Navigation action detected - executing in isolation") + parsed.action = [first_action] + elif hasattr(first_action, 'done'): + parsed.action = [first_action] # Limit actions to maximum allowed per step parsed.action = parsed.action[: self.max_actions_per_step] @@ -310,6 +318,9 @@ async def _run_planner(self) -> Optional[str]: @time_execution_async("--step") async def step(self, step_info: Optional[CustomAgentStepInfo] = None) -> None: """Execute one step of the task""" + if not self.browser_context: + raise RuntimeError("Browser context not initialized") + logger.info(f"\n📍 Step {self.n_steps}") state = None model_output = None diff --git a/src/agent/custom_views.py b/src/agent/custom_views.py index d0dfb061..7496a471 100644 --- a/src/agent/custom_views.py +++ b/src/agent/custom_views.py @@ -25,7 +25,7 @@ class CustomAgentBrain(BaseModel): task_progress: str future_plans: str thought: str - summary: str + summary: str="" class CustomAgentOutput(AgentOutput): From b056cd23654a7696eb8dbb85c0320ca5f0eef7a3 Mon Sep 17 00:00:00 2001 From: Chris Bruner Date: Tue, 25 Feb 2025 10:43:43 -0500 Subject: [PATCH 3/3] Updated CustomAgent to be more robust Modified the llm_num_ctl to be more approriate for various models Updated llm so models are consolidated fixed some errors in utils.py --- src/agent/custom_agent.py | 95 ++++++++-- src/utils/default_config_settings.py | 2 +- src/utils/llm.py | 264 +++++++++++++++++++++------ src/utils/utils.py | 61 ++++--- webui.py | 4 +- 5 files changed, 318 insertions(+), 108 deletions(-) diff --git a/src/agent/custom_agent.py b/src/agent/custom_agent.py index 61c4ac5f..d611621a 100644 --- a/src/agent/custom_agent.py +++ b/src/agent/custom_agent.py @@ -220,6 +220,7 @@ def update_step_info( logger.info(f"🧠 All Memory: \n{step_info.memory}") + @time_execution_async("--get_next_action") async def get_next_action(self, input_messages: list[BaseMessage]) -> AgentOutput: """Get next action from LLM based on current state""" @@ -232,20 +233,73 @@ async def get_next_action(self, input_messages: list[BaseMessage]) -> AgentOutpu logger.info(ai_message.reasoning_content) logger.info("🤯 End Deep Thinking") - if isinstance(ai_message.content, list): - ai_content = ai_message.content[0] - else: - ai_content = ai_message.content - - ai_content = ai_content.replace("```json", "").replace("```", "") - ai_content = repair_json(ai_content) - parsed_json = json.loads(ai_content) - parsed: AgentOutput = self.AgentOutput(**parsed_json) - - if parsed is None: - logger.debug(ai_message.content) - raise ValueError('Could not parse response.') + try: + if isinstance(ai_message.content, list): + ai_content = ai_message.content[0] + else: + ai_content = ai_message.content + + # Add this debug print + print("RAW AI CONTENT:", ai_content) + + # Enhanced JSON parsing + if "```json" in ai_content or "```" in ai_content: + # Extract JSON from code blocks + ai_content = re.sub(r'```(?:json)?(.*?)```', r'\1', ai_content, flags=re.DOTALL) + + # Try to repair the JSON + try: + ai_content = repair_json(ai_content) + except Exception as json_repair_error: + logger.warning(f"JSON repair failed: {json_repair_error}") + # Try more aggressive cleaning + ai_content = re.sub(r'[^{}[\],:"\d\w\s.-]', '', ai_content) + + try: + parsed_json = json.loads(ai_content) + if 'action' in parsed_json: + for action in parsed_json['action']: + if isinstance(action, dict) and 'done' in action and isinstance(action['done'], dict) and 'text' in action['done']: + # If text is a dict with type/value structure, extract the value + if isinstance(action['done']['text'], dict) and 'value' in action['done']['text']: + action['done']['text'] = action['done']['text']['value'] + # If text is any other non-string dict, convert to string + elif isinstance(action['done']['text'], dict): + action['done']['text'] = str(action['done']['text']) + parsed: AgentOutput = self.AgentOutput(**parsed_json) + except json.JSONDecodeError as e: + # Create a minimal valid structure if parsing fails + logger.warning("JSON parsing failed, creating minimal structure") + parsed_json = { + "current_state": { + "prev_action_evaluation": "Failed - Unable to parse model output", + "important_contents": "", + "task_progress": "", + "future_plans": "Retry with simpler action", + "thought": "The model output was malformed. I need to retry with a simpler action.", + "summary": "Retrying with simpler action" + }, + "action": [{"extract_page_content": {}}] # Safe fallback action + } + + parsed: AgentOutput = self.AgentOutput(**parsed_json) + except Exception as e: + logger.error(f"Error processing model output: {e}") + # Create a minimal fallback output + minimal_json = { + "current_state": { + "prev_action_evaluation": "Failed - Unable to process model output", + "important_contents": "", + "task_progress": "", + "future_plans": "Retry with simpler action", + "thought": "There was an error processing the model output. I'll take a safe action.", + "summary": "Handling error gracefully" + }, + "action": [{"extract_page_content": {}}] # Safe fallback action + } + parsed = self.AgentOutput(**minimal_json) + # Continue with existing code... if len(parsed.action) > 0: first_action = parsed.action[0] if hasattr(first_action, 'go_to_url'): @@ -363,14 +417,17 @@ async def step(self, step_info: Optional[CustomAgentStepInfo] = None) -> None: check_break_if_paused=lambda: self._check_if_stopped_or_paused(), available_file_paths=self.available_file_paths, ) - if len(result) != len(actions): - # I think something changes, such information should let LLM know + if len(result) != len(actions) and len(actions) > 0: + # Add safety check for result list + base_action_index = len(result) - 1 if len(result) > 0 else 0 for ri in range(len(result), len(actions)): + error_msg = f"{actions[ri].model_dump_json(exclude_unset=True)} is Failed to execute." + if len(result) > 0: + error_msg += f" Something new appeared after action {actions[base_action_index].model_dump_json(exclude_unset=True)}" result.append(ActionResult(extracted_content=None, - include_in_memory=True, - error=f"{actions[ri].model_dump_json(exclude_unset=True)} is Failed to execute. \ - Something new appeared after action {actions[len(result) - 1].model_dump_json(exclude_unset=True)}", - is_done=False)) + include_in_memory=True, + error=error_msg, + is_done=False)) for ret_ in result: if ret_.extracted_content and "Extracted page" in ret_.extracted_content: # record every extracted page diff --git a/src/utils/default_config_settings.py b/src/utils/default_config_settings.py index e6fa88f9..5ddc27b1 100644 --- a/src/utils/default_config_settings.py +++ b/src/utils/default_config_settings.py @@ -14,7 +14,7 @@ def default_config(): "tool_calling_method": "auto", "llm_provider": "openai", "llm_model_name": "gpt-4o", - "llm_num_ctx": 32000, + "llm_num_ctx": 16000, "llm_temperature": 1.0, "llm_base_url": "", "llm_api_key": "", diff --git a/src/utils/llm.py b/src/utils/llm.py index 2ea332e1..34519881 100644 --- a/src/utils/llm.py +++ b/src/utils/llm.py @@ -1,5 +1,6 @@ from openai import OpenAI -import pdb +import logging +import traceback from langchain_openai import ChatOpenAI from langchain_core.globals import get_llm_cache from langchain_core.language_models.base import ( @@ -40,14 +41,87 @@ cast, ) -class DeepSeekR1ChatOpenAI(ChatOpenAI): +logger = logging.getLogger(__name__) + +class ModelResponseProcessor: + """Utility class for extracting and processing model responses.""" + + @staticmethod + def extract_reasoning_content(content: str) -> tuple[str, str]: + """Extract reasoning content from various formats.""" + reasoning_content = "" + processed_content = content + + # Try different formats + if "" in content and "" in content: + # DeepSeek format with tags + parts = content.split("", 1) + reasoning_content = parts[0].replace("", "").strip() + processed_content = parts[1].strip() if len(parts) > 1 else content + elif "Reasoning:" in content and "Action:" in content: + # Format with explicit Reasoning/Action sections + parts = content.split("Action:", 1) + reasoning_content = parts[0].replace("Reasoning:", "").strip() + processed_content = parts[1].strip() if len(parts) > 1 else content + + return reasoning_content, processed_content + + @staticmethod + def extract_json_content(content: str) -> str: + """Extract JSON content from various formats.""" + processed_content = content + + # Try JSON code blocks + if "```json" in content and "```" in content: + try: + json_parts = content.split("```json", 1) + if len(json_parts) > 1: + code_parts = json_parts[1].split("```", 1) + if code_parts: + processed_content = code_parts[0].strip() + except Exception: + pass + + # Try JSON Response marker + elif "**JSON Response:**" in content: + try: + json_parts = content.split("**JSON Response:**", 1) + if len(json_parts) > 1: + processed_content = json_parts[1].strip() + except Exception: + pass + + return processed_content - def __init__(self, *args: Any, **kwargs: Any) -> None: + @staticmethod + def process_response(response: AIMessage) -> AIMessage: + """Process a response to extract reasoning and content.""" + try: + if not hasattr(response, "content") or not response.content: + return AIMessage(content="", reasoning_content="") + + content = response.content + + # Extract reasoning content + reasoning_content, processed_content = ModelResponseProcessor.extract_reasoning_content(content) + + # Extract JSON content if present + processed_content = ModelResponseProcessor.extract_json_content(processed_content) + + return AIMessage(content=processed_content, reasoning_content=reasoning_content) + + except Exception as e: + logger.error(f"Error processing response: {e}") + # Return original message if processing fails + return response + + +class EnhancedChatOpenAI(ChatOpenAI): + """Enhanced ChatOpenAI that handles reasoning extraction.""" + + def __init__(self, *args, **kwargs): + self.extract_reasoning = kwargs.pop("extract_reasoning", False) super().__init__(*args, **kwargs) - self.client = OpenAI( - base_url=kwargs.get("base_url"), - api_key=kwargs.get("api_key") - ) async def ainvoke( self, @@ -57,24 +131,18 @@ async def ainvoke( stop: Optional[list[str]] = None, **kwargs: Any, ) -> AIMessage: - message_history = [] - for input_ in input: - if isinstance(input_, SystemMessage): - message_history.append({"role": "system", "content": input_.content}) - elif isinstance(input_, AIMessage): - message_history.append({"role": "assistant", "content": input_.content}) - else: - message_history.append({"role": "user", "content": input_.content}) - - response = self.client.chat.completions.create( - model=self.model_name, - messages=message_history - ) - - reasoning_content = response.choices[0].message.reasoning_content - content = response.choices[0].message.content - return AIMessage(content=content, reasoning_content=reasoning_content) - + try: + response = await super().ainvoke(input=input, config=config, stop=stop, **kwargs) + + if self.extract_reasoning: + return ModelResponseProcessor.process_response(response) + return response + + except Exception as e: + logger.error(f"Error in EnhancedChatOpenAI.ainvoke: {e}") + # Return a minimal AIMessage + return AIMessage(content=f"Error: {str(e)}") + def invoke( self, input: LanguageModelInput, @@ -83,26 +151,29 @@ def invoke( stop: Optional[list[str]] = None, **kwargs: Any, ) -> AIMessage: - message_history = [] - for input_ in input: - if isinstance(input_, SystemMessage): - message_history.append({"role": "system", "content": input_.content}) - elif isinstance(input_, AIMessage): - message_history.append({"role": "assistant", "content": input_.content}) - else: - message_history.append({"role": "user", "content": input_.content}) - - response = self.client.chat.completions.create( - model=self.model_name, - messages=message_history - ) + try: + response = super().invoke(input=input, config=config, stop=stop, **kwargs) + + if self.extract_reasoning: + return ModelResponseProcessor.process_response(response) + return response + + except Exception as e: + logger.error(f"Error in EnhancedChatOpenAI.invoke: {e}") + # Return a minimal AIMessage + return AIMessage(content=f"Error: {str(e)}") + - reasoning_content = response.choices[0].message.reasoning_content - content = response.choices[0].message.content - return AIMessage(content=content, reasoning_content=reasoning_content) +class EnhancedChatOllama(ChatOllama): + """Enhanced ChatOllama that handles reasoning extraction.""" -class DeepSeekR1ChatOllama(ChatOllama): - + extract_reasoning: bool = True + + def __init__(self, *args, **kwargs): + if "extract_reasoning" in kwargs: + self.extract_reasoning = kwargs.pop("extract_reasoning") + super().__init__(*args, **kwargs) + async def ainvoke( self, input: LanguageModelInput, @@ -111,14 +182,18 @@ async def ainvoke( stop: Optional[list[str]] = None, **kwargs: Any, ) -> AIMessage: - org_ai_message = await super().ainvoke(input=input) - org_content = org_ai_message.content - reasoning_content = org_content.split("")[0].replace("", "") - content = org_content.split("")[1] - if "**JSON Response:**" in content: - content = content.split("**JSON Response:**")[-1] - return AIMessage(content=content, reasoning_content=reasoning_content) - + try: + response = await super().ainvoke(input=input, config=config, stop=stop, **kwargs) + + if self.extract_reasoning: + return ModelResponseProcessor.process_response(response) + return response + + except Exception as e: + logger.error(f"Error in EnhancedChatOllama.ainvoke: {e}\n{traceback.format_exc()}") + # Return a minimal AIMessage + return AIMessage(content=f"Error: {str(e)}") + def invoke( self, input: LanguageModelInput, @@ -127,10 +202,83 @@ def invoke( stop: Optional[list[str]] = None, **kwargs: Any, ) -> AIMessage: - org_ai_message = super().invoke(input=input) - org_content = org_ai_message.content - reasoning_content = org_content.split("")[0].replace("", "") - content = org_content.split("")[1] - if "**JSON Response:**" in content: - content = content.split("**JSON Response:**")[-1] - return AIMessage(content=content, reasoning_content=reasoning_content) \ No newline at end of file + try: + # Try special API for compatible models + if hasattr(self, "client") and hasattr(self.client, "chat") and \ + any(name in self.model_name for name in ["deepseek-r1", "command-r"]): + try: + message_history = [] + for input_ in input: + if isinstance(input_, SystemMessage): + message_history.append({"role": "system", "content": input_.content}) + elif isinstance(input_, AIMessage): + message_history.append({"role": "assistant", "content": input_.content}) + else: + message_history.append({"role": "user", "content": input_.content}) + + api_response = self.client.chat.completions.create( + model=self.model_name, + messages=message_history + ) + + content = getattr(api_response.choices[0].message, "content", "") + reasoning_content = getattr(api_response.choices[0].message, "reasoning_content", "") + + if content and reasoning_content: + return AIMessage(content=content, reasoning_content=reasoning_content) + except Exception as api_err: + logger.warning(f"Special API approach failed, falling back: {api_err}") + + # Standard approach + response = super().invoke(input=input, config=config, stop=stop, **kwargs) + + if self.extract_reasoning: + return ModelResponseProcessor.process_response(response) + return response + + except Exception as e: + logger.error(f"Error in EnhancedChatOllama.invoke: {e}\n{traceback.format_exc()}") + # Return a minimal AIMessage + return AIMessage(content=f"Error: {str(e)}") + + +class DeepSeekR1ChatOpenAI(EnhancedChatOpenAI): + """Specialized class for DeepSeek models via OpenAI compatible API.""" + + def __init__(self, *args, **kwargs): + super().__init__(extract_reasoning=True, *args, **kwargs) + self.client = OpenAI( + base_url=kwargs.get("base_url"), + api_key=kwargs.get("api_key") + ) + + async def ainvoke( + self, + input: LanguageModelInput, + config: Optional[RunnableConfig] = None, + *, + stop: Optional[list[str]] = None, + **kwargs: Any, + ) -> AIMessage: + try: + message_history = [] + for input_ in input: + if isinstance(input_, SystemMessage): + message_history.append({"role": "system", "content": input_.content}) + elif isinstance(input_, AIMessage): + message_history.append({"role": "assistant", "content": input_.content}) + else: + message_history.append({"role": "user", "content": input_.content}) + + response = self.client.chat.completions.create( + model=self.model_name, + messages=message_history + ) + + reasoning_content = getattr(response.choices[0].message, "reasoning_content", "") + content = getattr(response.choices[0].message, "content", "") + + return AIMessage(content=content, reasoning_content=reasoning_content) + except Exception as e: + logger.error(f"Error in DeepSeekR1ChatOpenAI.ainvoke: {e}\n{traceback.format_exc()}") + return AIMessage(content=f"Error processing DeepSeek model: {str(e)}") \ No newline at end of file diff --git a/src/utils/utils.py b/src/utils/utils.py index 9b9c75d4..f4a2ef88 100644 --- a/src/utils/utils.py +++ b/src/utils/utils.py @@ -4,7 +4,7 @@ from pathlib import Path from typing import Dict, Optional, List import requests -from ollama import ListResponse, list +from ollama import ListResponse, list,Client from langchain_anthropic import ChatAnthropic from langchain_mistralai import ChatMistralAI @@ -12,8 +12,8 @@ from langchain_ollama import ChatOllama from langchain_openai import AzureChatOpenAI, ChatOpenAI import gradio as gr +from src.utils.llm import EnhancedChatOllama, EnhancedChatOpenAI, DeepSeekR1ChatOpenAI -from .llm import DeepSeekR1ChatOpenAI, DeepSeekR1ChatOllama PROVIDER_DISPLAY_NAMES = { "openai": "OpenAI", @@ -101,7 +101,7 @@ def get_llm_model(provider: str, **kwargs): else: base_url = kwargs.get("base_url") - return ChatOpenAI( + return EnhancedChatOpenAI( model=kwargs.get("model_name", "gpt-4o"), temperature=kwargs.get("temperature", 0.0), base_url=base_url, @@ -121,11 +121,12 @@ def get_llm_model(provider: str, **kwargs): api_key=api_key, ) else: - return ChatOpenAI( + return EnhancedChatOpenAI( model=kwargs.get("model_name", "deepseek-chat"), temperature=kwargs.get("temperature", 0.0), base_url=base_url, api_key=api_key, + extract_reasoning=True, ) elif provider == "google": return ChatGoogleGenerativeAI( @@ -138,22 +139,18 @@ def get_llm_model(provider: str, **kwargs): base_url = os.getenv("OLLAMA_ENDPOINT", "http://localhost:11434") else: base_url = kwargs.get("base_url") - - if "deepseek-r1" in kwargs.get("model_name", "qwen2.5:7b"): - return DeepSeekR1ChatOllama( - model=kwargs.get("model_name", "deepseek-r1:14b"), - temperature=kwargs.get("temperature", 0.0), - num_ctx=kwargs.get("num_ctx", 32000), - base_url=base_url, - ) - else: - return ChatOllama( - model=kwargs.get("model_name", "qwen2.5:7b"), - temperature=kwargs.get("temperature", 0.0), - num_ctx=kwargs.get("num_ctx", 32000), - num_predict=kwargs.get("num_predict", 1024), - base_url=base_url, - ) + + model_name = kwargs.get("model_name", "qwen2.5:7b") + + # Use the enhanced ChatOllama for all Ollama models + return EnhancedChatOllama( + model=model_name, + temperature=kwargs.get("temperature", 0.0), + num_ctx=min(kwargs.get("num_ctx", 16000), 32000), + num_predict=kwargs.get("num_predict", 1024), + base_url=base_url, + stop=["<|im_end|>", ""] + ) elif provider == "azure_openai": if not kwargs.get("base_url", ""): base_url = os.getenv("AZURE_OPENAI_ENDPOINT", "") @@ -173,14 +170,14 @@ def get_llm_model(provider: str, **kwargs): else: base_url = kwargs.get("base_url") - return ChatOpenAI( + return EnhancedChatOpenAI( model=kwargs.get("model_name", "qwen-plus"), temperature=kwargs.get("temperature", 0.0), base_url=base_url, api_key=api_key, ) elif provider == "moonshot": - return ChatOpenAI( + return EnhancedChatOpenAI( model=kwargs.get("model_name", "moonshot-v1-32k-vision-preview"), temperature=kwargs.get("temperature", 0.0), base_url=os.getenv("MOONSHOT_ENDPOINT"), @@ -188,7 +185,7 @@ def get_llm_model(provider: str, **kwargs): ) else: raise ValueError(f"Unsupported provider: {provider}") - + # Predefined model names for common providers model_names = { "anthropic": ["claude-3-5-sonnet-20241022", "claude-3-5-sonnet-20240620", "claude-3-opus-20240229"], @@ -275,12 +272,20 @@ def get_latest_files(directory: str, file_types: list = ['.webm', '.zip']) -> Di for file_type in file_types: try: - matches = list(Path(directory).rglob(f"*{file_type}")) - if matches: - latest = max(matches, key=lambda p: p.stat().st_mtime) + # Use os.walk instead of Path.rglob + matching_files = [] + for root, _, files in os.walk(directory): + for filename in files: + if filename.endswith(file_type): + full_path = os.path.join(root, filename) + matching_files.append(full_path) + + if matching_files: + # Find latest file by modification time + latest = max(matching_files, key=lambda p: os.path.getmtime(p)) # Only return files that are complete (not being written) - if time.time() - latest.stat().st_mtime > 1.0: - latest_files[file_type] = str(latest) + if time.time() - os.path.getmtime(latest) > 1.0: + latest_files[file_type] = latest except Exception as e: print(f"Error getting latest {file_type} file: {e}") diff --git a/webui.py b/webui.py index e770d99d..6929c449 100644 --- a/webui.py +++ b/webui.py @@ -752,8 +752,8 @@ def create_ui(config, theme_name="Ocean"): interactive=True, allow_custom_value=True, # Allow users to input custom model names choices=["auto", "json_schema", "function_calling"], - info="Tool Calls Funtion Name", - visible=False + info="Tool Callin Method for the LLM", + visible=True ) with gr.TabItem("🔧 LLM Configuration", id=2):