Skip to content

Commit

Permalink
refactor: update LLM vulnerability report generation and storage
Browse files Browse the repository at this point in the history
- Replaced the get_vulnerability_llm_report function with llm_vulnerability_report for generating and storing vulnerability reports using LLM.
- Enhanced the LLM vulnerability report generation process by splitting it into distinct sections: technical description, business impact, remediation steps, and references.
- Updated the data model to store references as text fields instead of using a separate VulnerabilityReference model.
- Improved the HTML rendering of vulnerability descriptions, impacts, and remediations by converting markdown to HTML with proper styling.
- Refactored the LLM response handling to use a dictionary format for easier manipulation and storage.
- Removed redundant code and streamlined the process of updating vulnerabilities with LLM-generated data.
- Adjusted the configuration and prompts for LLM to support more detailed and structured report generation.
  • Loading branch information
psyray committed Nov 12, 2024
1 parent 916ed10 commit 7e87530
Show file tree
Hide file tree
Showing 13 changed files with 441 additions and 326 deletions.
43 changes: 6 additions & 37 deletions web/api/views.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,6 @@
from ipaddress import IPv4Network
from collections import defaultdict
from datetime import datetime
from markdown import markdown

import requests
import validators
Expand Down Expand Up @@ -52,7 +51,7 @@
)
from reNgine.tasks import (
create_scan_activity,
llm_vulnerability_description,
llm_vulnerability_report,
initiate_subscan,
query_ip_history,
query_reverse_whois,
Expand All @@ -64,6 +63,7 @@
send_hackerone_report
)
from reNgine.llm.llm import LLMAttackSuggestionGenerator
from reNgine.llm.utils import convert_markdown_to_html
from reNgine.utilities import is_safe_path, remove_lead_and_trail_slash
from scanEngine.models import EngineType, InstalledExternalTool
from startScan.models import (
Expand Down Expand Up @@ -183,37 +183,6 @@ def put(self, request):
return Response({'status': False, 'message': 'An error occurred while updating Ollama settings.'}, status=500)

class LLMAttackSuggestion(APIView):
def _convert_markdown_to_html(self, markdown_text):
# Extract LLM badge if present (at the beginning of the text)
llm_badge = ""
if markdown_text.startswith('[LLM:'):
llm_name = markdown_text[5:markdown_text.index(']')]
llm_badge = f'<span class="badge bg-soft-primary text-primary mb-3">Generated by {llm_name}</span><br>'
markdown_text = markdown_text[markdown_text.index(']')+1:].strip()

# Configure Markdown with specific options
html_content = markdown(markdown_text,
extensions=[
'fenced_code',
'tables',
'nl2br',
'sane_lists', # Better list handling
'def_list', # Definition lists support
],
)

# Add Bootstrap classes and clean up formatting
html_content = (html_content
.replace('<pre><code>', '<pre class="bg-light p-3 rounded"><code class="text-danger">')
.replace('<ul>', '<ul class="list-unstyled">')
.replace('<ol>', '<ul class="list-unstyled">') # Convert ordered lists to unordered
.replace('</ol>', '</ul>')
.replace('\n\n', '<br>')
.replace('\n', '')
)

return llm_badge + html_content

def get(self, request):
req = request
subdomain_id = safe_int_cast(req.query_params.get('subdomain_id'))
Expand All @@ -237,7 +206,7 @@ def get(self, request):

# Return cached result only if not forcing regeneration
if subdomain.attack_surface and not force_regenerate:
sanitized_html = self._convert_markdown_to_html(subdomain.attack_surface)
sanitized_html = subdomain.formatted_attack_surface
return Response({
'status': True,
'subdomain_name': subdomain.name,
Expand Down Expand Up @@ -279,7 +248,7 @@ def get(self, request):
subdomain.attack_surface = markdown_content
subdomain.save()

response['description'] = self._convert_markdown_to_html(markdown_content)
response['description'] = convert_markdown_to_html(markdown_content)

return Response(response)

Check warning

Code scanning / CodeQL

Information exposure through an exception Medium

Stack trace information
flows to this location and may be exposed to an external user.

Expand Down Expand Up @@ -320,7 +289,7 @@ def get(self, request):
'status': False,
'error': 'Missing GET param Vulnerability `id`'
})
task = llm_vulnerability_description.apply_async(args=(vulnerability_id,))
task = llm_vulnerability_report.apply_async(args=(vulnerability_id,))
response = task.wait()
return Response(response)

Expand Down Expand Up @@ -2792,7 +2761,7 @@ def general_lookup(self, search_value):
Q(severity__icontains=search_value) |
Q(description__icontains=search_value) |
Q(extracted_results__icontains=search_value) |
Q(references__url__icontains=search_value) |
Q(references__icontains=search_value) |
Q(cve_ids__name__icontains=search_value) |
Q(cwe_ids__name__icontains=search_value) |
Q(cvss_metrics__icontains=search_value) |
Expand Down
58 changes: 38 additions & 20 deletions web/reNgine/llm/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,40 +10,51 @@
# LLM SYSTEM PROMPTS
###############################################################################

VULNERABILITY_DESCRIPTION_SYSTEM_MESSAGE = """
VULNERABILITY_CONTEXT = """
You are an expert penetration tester specializing in web application security assessments. Your task is to analyze the following vulnerability information:
- Vulnerability title
- Vulnerable URL
- Vulnerability description
Required report sections (separate each with \n\n):
Keep the tone technical and professional. Focus on actionable insights. Avoid generic statements.
"""

1. TECHNICAL DESCRIPTION
- Detailed technical explanation of the vulnerability
VULNERABILITY_TECHNICAL_DESCRIPTION_PROMPT = """
Provide a detailed technical description of the vulnerability, including:
- Detailed technical explanation
- Associated CVE IDs and CVSS scores if applicable
- Attack vectors and exploitation methods
- Any prerequisites or conditions required for exploitation
I don't want to see any other information in the response.
"""

2. BUSINESS IMPACT
VULNERABILITY_BUSINESS_IMPACT_PROMPT = """
Describe the business impact of this vulnerability, including:
- Direct security implications
- Potential business consequences
- Data exposure risks
- Compliance implications
I don't want to see any other information in the response.
"""

3. REMEDIATION STEPS
- Provide specific, actionable remediation steps
- Include code examples where relevant
- List configuration changes if needed
- Suggest security controls to prevent similar issues
VULNERABILITY_REMEDIATION_STEPS_PROMPT = """
List the remediation steps for this vulnerability, including:
- Specific, actionable steps
- Code examples where relevant
- Configuration changes if needed
- Security controls to prevent similar issues
Format: Each step prefixed with "- " on a new line
I don't want to see any other information in the response.
"""

4. REFERENCES
- Only include validated HTTP/HTTPS URLs
- Focus on official documentation, security advisories, and research papers
- Include relevant CVE details and exploit databases
VULNERABILITY_REFERENCES_PROMPT = """
Provide references related to this vulnerability, focusing on:
- Validated HTTP/HTTPS URLs
- Official documentation, security advisories, and research papers
- Relevant CVE details and exploit databases
Format: Each reference prefixed with "- " on a new line
Keep the tone technical and professional. Focus on actionable insights. Avoid generic statements.
I don't want to see any other information in the response.
Give me the response in json format.
"""

ATTACK_SUGGESTION_LLM_SYSTEM_PROMPT = """
Expand Down Expand Up @@ -91,11 +102,12 @@
LLM_CONFIG: Dict[str, Any] = {
'providers': {
'openai': {
'default_model': 'gpt-4-turbo-preview',
'default_model': 'gpt-4',
'models': [
'gpt-4-turbo-preview',
'gpt-4-turbo',
'gpt-4',
'gpt-3.5-turbo'
'gpt-3.5-turbo',
'gpt-3'
],
'api_version': '2024-02-15',
'max_tokens': 2000,
Expand All @@ -117,7 +129,13 @@
'timeout': 30,
'max_retries': 3,
'prompts': {
'vulnerability': VULNERABILITY_DESCRIPTION_SYSTEM_MESSAGE,
'vulnerability': {
'context': VULNERABILITY_CONTEXT,
'technical': VULNERABILITY_TECHNICAL_DESCRIPTION_PROMPT,
'impact': VULNERABILITY_BUSINESS_IMPACT_PROMPT,
'remediation': VULNERABILITY_REMEDIATION_STEPS_PROMPT,
'references': VULNERABILITY_REFERENCES_PROMPT,
},
'attack': ATTACK_SUGGESTION_LLM_SYSTEM_PROMPT
}
}
Expand Down
Loading

0 comments on commit 7e87530

Please sign in to comment.