diff --git a/alt-text-generator.py b/alt-text-generator.py index 03a36ec..de0ca1f 100644 --- a/alt-text-generator.py +++ b/alt-text-generator.py @@ -36,6 +36,7 @@ def validate_anthropic_key(selected_model): "Alt text appears to be meaningless. Replace it with descriptive content.", "Alt text seems too short. Consider providing more context.", "Consider simplifying the text.", + "Alt text is too short. Provide more context." ] # Set up logging for debugging @@ -108,54 +109,78 @@ def generate_with_anthropic(prompt): def generate_with_ollama(prompt, model_name="llama3.1:latest"): - """Generate concise alt text using a hosted Ollama model.""" + """Generate text using a hosted Ollama model with improved response handling.""" try: - # Refine the prompt for clarity and specificity - refined_prompt = ( - f"Generate alt text for the provided image. Respond ONLY with the text that should go inside the alt " - f"attribute of an img tag. Do not include explanations, context, or prefaces. Just provide the alt text.\n\n" - f"Image description: {prompt}" + # Create a more specific prompt that explicitly requests just the alt text + formatted_prompt = ( + "Generate alt text for an image. Respond ONLY with the text that should go inside " + "the alt attribute of an img tag. Do not include 'Alt text:', explanations, quotes, " + "or any other text. Keep the description concise and factual.\n\n" + f"Image details: {prompt}" ) - - # Send the refined prompt to the Ollama API - payload = {"model": model_name, "prompt": refined_prompt} + + payload = { + "model": model_name, + "prompt": formatted_prompt, + "stream": False, + "system": "You are a helpful assistant that generates alt text for images. Respond only with the alt text itself, without any explanations, disclaimers, or meta-commentary." + } + response = requests.post(OLLAMA_API_URL, json=payload, timeout=30) - - # Log the full response text for debugging - logging.debug(f"Ollama API raw response: {response.text}") - - # Check for HTTP errors response.raise_for_status() - - # Parse the JSON response - response_data = response.json() - - # Post-process the response to extract only the alt text - alt_text = response_data.get("response", "").strip() - - # Remove any unwanted prefixes or extraneous text - prefixes_to_remove = [ - "Based on the image, I will generate an alt text that is concise, yet descriptive.", - "Here is a concise and descriptive alt text for the image:", - "Alt Text:", - "Unfortunately, I cannot verify the image, but here is a suggestion:", - ] - for prefix in prefixes_to_remove: - if alt_text.startswith(prefix): - alt_text = alt_text[len(prefix):].strip() - - # Ensure the text is concise and clean - return alt_text.strip() - + + try: + response_data = response.json() + generated_text = "" + + if "response" in response_data: + generated_text = response_data["response"].strip() + elif "text" in response_data: + generated_text = response_data["text"].strip() + else: + return "Error: Unexpected response structure from Ollama" + + # Clean up the response + # Remove common prefixes + prefixes_to_remove = [ + "Alt text:", + "Here is", + "The alt text is", + "I suggest", + "Based on the image,", + "A concise alt text would be", + "Here's a descriptive alt text:", + "The appropriate alt text is", + ] + + for prefix in prefixes_to_remove: + if generated_text.lower().startswith(prefix.lower()): + generated_text = generated_text[len(prefix):].strip() + + # Remove quotes and leading/trailing punctuation + generated_text = generated_text.strip('"\'".,: ') + + # Remove any explanatory text after the main description + if "\n" in generated_text: + generated_text = generated_text.split("\n")[0].strip() + + # Ensure proper sentence format + generated_text = generated_text.strip(".") + if generated_text: + generated_text = generated_text[0].upper() + generated_text[1:] + "." + + return generated_text + + except ValueError as val_err: + logging.error(f"JSON decode error with Ollama API: {val_err}") + return "Error: Malformed JSON from Ollama API" + except requests.exceptions.HTTPError as http_err: - logging.error(f"Ollama API HTTP error: {http_err} - Response: {http_err.response.text}") - return f"Error using Ollama API: {http_err.response.text}" - except ValueError as val_err: - logging.error(f"JSON decode error with Ollama API: {val_err}") - return "Error: Malformed JSON from Ollama API" + logging.error(f"Ollama API HTTP error: {http_err}") + return f"Error using Ollama API: {http_err}" except Exception as e: logging.error(f"Error using Ollama API: {e}") - return f"Error generating text with Ollama API: {e}" + return f"Error generating text with Ollama API: {str(e)}" def check_image_exists(image_url):