refactor: update LLM vulnerability report generation and storage

- Replaced the get_vulnerability_llm_report function with llm_vulnerability_report for generating and storing vulnerability reports using LLM. - Enhanced the LLM vulnerability report generation process by splitting it into distinct sections: technical description, business impact, remediation steps, and references. - Updated the data model to store references as text fields instead of using a separate VulnerabilityReference model. - Improved the HTML rendering of vulnerability descriptions, impacts, and remediations by converting markdown to HTML with proper styling. - Refactored the LLM response handling to use a dictionary format for easier manipulation and storage. - Removed redundant code and streamlined the process of updating vulnerabilities with LLM-generated data. - Adjusted the configuration and prompts for LLM to support more detailed and structured report generation.
Security-Tools-Alliance · Nov 12, 2024 · 7e87530 · 7e87530
1 parent 916ed10
commit 7e87530
Show file tree

Hide file tree

Showing 13 changed files with 441 additions and 326 deletions.
diff --git a/web/api/views.py b/web/api/views.py
@@ -6,7 +6,6 @@
 from ipaddress import IPv4Network
 from collections import defaultdict
 from datetime import datetime
-from markdown import markdown
 
 import requests
 import validators
@@ -52,7 +51,7 @@
 )
 from reNgine.tasks import (
     create_scan_activity,
-    llm_vulnerability_description,
+    llm_vulnerability_report,
     initiate_subscan,
     query_ip_history,
     query_reverse_whois,
@@ -64,6 +63,7 @@
     send_hackerone_report
 )
 from reNgine.llm.llm import LLMAttackSuggestionGenerator
+from reNgine.llm.utils import convert_markdown_to_html
 from reNgine.utilities import is_safe_path, remove_lead_and_trail_slash
 from scanEngine.models import EngineType, InstalledExternalTool
 from startScan.models import (
@@ -183,37 +183,6 @@ def put(self, request):
             return Response({'status': False, 'message': 'An error occurred while updating Ollama settings.'}, status=500)
 
 class LLMAttackSuggestion(APIView):
-    def _convert_markdown_to_html(self, markdown_text):
-        # Extract LLM badge if present (at the beginning of the text)
-        llm_badge = ""
-        if markdown_text.startswith('[LLM:'):
-            llm_name = markdown_text[5:markdown_text.index(']')]
-            llm_badge = f'<span class="badge bg-soft-primary text-primary mb-3">Generated by {llm_name}</span><br>'
-            markdown_text = markdown_text[markdown_text.index(']')+1:].strip()
-
-        # Configure Markdown with specific options
-        html_content = markdown(markdown_text,
-            extensions=[
-                'fenced_code',
-                'tables',
-                'nl2br',
-                'sane_lists',    # Better list handling
-                'def_list',      # Definition lists support
-            ],
-        )
-
-        # Add Bootstrap classes and clean up formatting
-        html_content = (html_content
-            .replace('<pre><code>', '<pre class="bg-light p-3 rounded"><code class="text-danger">')
-            .replace('<ul>', '<ul class="list-unstyled">')
-            .replace('<ol>', '<ul class="list-unstyled">')  # Convert ordered lists to unordered
-            .replace('</ol>', '</ul>')
-            .replace('\n\n', '<br>')
-            .replace('\n', '')
-        )
-
-        return llm_badge + html_content
-
     def get(self, request):
         req = request
         subdomain_id = safe_int_cast(req.query_params.get('subdomain_id'))
@@ -237,7 +206,7 @@ def get(self, request):
 
         # Return cached result only if not forcing regeneration
         if subdomain.attack_surface and not force_regenerate:
-            sanitized_html = self._convert_markdown_to_html(subdomain.attack_surface)
+            sanitized_html = subdomain.formatted_attack_surface
             return Response({
                 'status': True,
                 'subdomain_name': subdomain.name,
@@ -279,7 +248,7 @@ def get(self, request):
             subdomain.attack_surface = markdown_content
             subdomain.save()
 
-            response['description'] = self._convert_markdown_to_html(markdown_content)
+            response['description'] = convert_markdown_to_html(markdown_content)
 
         return Response(response)
 
@@ -320,7 +289,7 @@ def get(self, request):
                 'status': False,
                 'error': 'Missing GET param Vulnerability `id`'
             })
-        task = llm_vulnerability_description.apply_async(args=(vulnerability_id,))
+        task = llm_vulnerability_report.apply_async(args=(vulnerability_id,))
         response = task.wait()
         return Response(response)
 
@@ -2792,7 +2761,7 @@ def general_lookup(self, search_value):
                     Q(severity__icontains=search_value) |
                     Q(description__icontains=search_value) |
                     Q(extracted_results__icontains=search_value) |
-                    Q(references__url__icontains=search_value) |
+                    Q(references__icontains=search_value) |
                     Q(cve_ids__name__icontains=search_value) |
                     Q(cwe_ids__name__icontains=search_value) |
                     Q(cvss_metrics__icontains=search_value) |

diff --git a/web/reNgine/llm/config.py b/web/reNgine/llm/config.py
@@ -10,40 +10,51 @@
 # LLM SYSTEM PROMPTS
 ###############################################################################
 
-VULNERABILITY_DESCRIPTION_SYSTEM_MESSAGE = """
+VULNERABILITY_CONTEXT = """
 You are an expert penetration tester specializing in web application security assessments. Your task is to analyze the following vulnerability information:
     - Vulnerability title
     - Vulnerable URL
     - Vulnerability description
 
-Required report sections (separate each with \n\n):
+Keep the tone technical and professional. Focus on actionable insights. Avoid generic statements.
+"""
 
-1. TECHNICAL DESCRIPTION
-    - Detailed technical explanation of the vulnerability
+VULNERABILITY_TECHNICAL_DESCRIPTION_PROMPT = """
+Provide a detailed technical description of the vulnerability, including:
+    - Detailed technical explanation
     - Associated CVE IDs and CVSS scores if applicable
     - Attack vectors and exploitation methods
     - Any prerequisites or conditions required for exploitation
+I don't want to see any other information in the response.
+"""
 
-2. BUSINESS IMPACT
+VULNERABILITY_BUSINESS_IMPACT_PROMPT = """
+Describe the business impact of this vulnerability, including:
     - Direct security implications
     - Potential business consequences
     - Data exposure risks
     - Compliance implications
+I don't want to see any other information in the response.
+"""
 
-3. REMEDIATION STEPS
-    - Provide specific, actionable remediation steps
-    - Include code examples where relevant
-    - List configuration changes if needed
-    - Suggest security controls to prevent similar issues
+VULNERABILITY_REMEDIATION_STEPS_PROMPT = """
+List the remediation steps for this vulnerability, including:
+    - Specific, actionable steps
+    - Code examples where relevant
+    - Configuration changes if needed
+    - Security controls to prevent similar issues
     Format: Each step prefixed with "- " on a new line
+I don't want to see any other information in the response.
+"""
 
-4. REFERENCES
-    - Only include validated HTTP/HTTPS URLs
-    - Focus on official documentation, security advisories, and research papers
-    - Include relevant CVE details and exploit databases
+VULNERABILITY_REFERENCES_PROMPT = """
+Provide references related to this vulnerability, focusing on:
+    - Validated HTTP/HTTPS URLs
+    - Official documentation, security advisories, and research papers
+    - Relevant CVE details and exploit databases
     Format: Each reference prefixed with "- " on a new line
-
-Keep the tone technical and professional. Focus on actionable insights. Avoid generic statements.
+I don't want to see any other information in the response.
+Give me the response in json format.
 """
 
 ATTACK_SUGGESTION_LLM_SYSTEM_PROMPT = """
@@ -91,11 +102,12 @@
 LLM_CONFIG: Dict[str, Any] = {
     'providers': {
         'openai': {
-            'default_model': 'gpt-4-turbo-preview',
+            'default_model': 'gpt-4',
             'models': [
-                'gpt-4-turbo-preview',
+                'gpt-4-turbo',
                 'gpt-4',
-                'gpt-3.5-turbo'
+                'gpt-3.5-turbo',
+                'gpt-3'
             ],
             'api_version': '2024-02-15',
             'max_tokens': 2000,
@@ -117,7 +129,13 @@
     'timeout': 30,
     'max_retries': 3,
     'prompts': {
-        'vulnerability': VULNERABILITY_DESCRIPTION_SYSTEM_MESSAGE,
+        'vulnerability': {
+            'context': VULNERABILITY_CONTEXT,
+            'technical': VULNERABILITY_TECHNICAL_DESCRIPTION_PROMPT,
+            'impact': VULNERABILITY_BUSINESS_IMPACT_PROMPT,
+            'remediation': VULNERABILITY_REMEDIATION_STEPS_PROMPT,
+            'references': VULNERABILITY_REFERENCES_PROMPT,
+        },
         'attack': ATTACK_SUGGESTION_LLM_SYSTEM_PROMPT
     }
 }