Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

refactor: improve tasks code structure and efficiency #280

Draft
wants to merge 17 commits into
base: release/2.1.1
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from 9 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -43,6 +43,9 @@ MANIFEST
.vscode/settings.json
.vscode/tasks-and-contexts.json
.idea/
.cursorrules
.cursorignore
.cursor/

*.manifest
*.spec
Expand Down
2 changes: 1 addition & 1 deletion docker/celery/Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -148,7 +148,7 @@ RUN python3.10 -m pip install pipx && pipx ensurepath && printf "poetry\n\
https://github.com/laramies/theHarvester/archive/refs/tags/4.6.0.zip\n\
git+https://github.com/ncrocfer/whatportis@59a1718bf7c531f2a5a4e213cad0c047ce9c1c94\n\
git+https://github.com/EnableSecurity/wafw00f@914dbf4feab7e2529f064f4300b5fde84ea1cce3\n\
h8mail\n" | xargs -L1 pipx install || true
h8mail\n" | xargs -L1 pipx install || true && pipx install flower --include-deps && pipx inject flower redis

# Install tools
RUN ln -s /usr/local/bin/geckodriver $BINPATH/geckodriver && \
Expand Down
66 changes: 40 additions & 26 deletions docker/celery/entrypoint.sh
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,9 @@ print_msg() {
}

RENGINE_FOLDER="/home/$USERNAME/rengine"
MAX_CONCURRENCY=${MAX_CONCURRENCY:-20}
MIN_CONCURRENCY=${MIN_CONCURRENCY:-5}
CELERY_LOGLEVEL=${CELERY_LOGLEVEL:-info}

print_msg "Generate Django migrations files"
poetry run -C $RENGINE_FOLDER python3 manage.py makemigrations
Expand All @@ -24,31 +27,42 @@ poetry run -C $RENGINE_FOLDER python3 manage.py loaddata fixtures/default_keywor
print_msg "Load default external tools"
poetry run -C $RENGINE_FOLDER python3 manage.py loaddata fixtures/external_tools.yaml --app scanEngine.InstalledExternalTool

if [ ! "$CELERY_LOGLEVEL" ]; then
export CELERY_LOGLEVEL='info'
fi

print_msg "Start celery workers"
watchmedo auto-restart --recursive --pattern="*.py" --directory="$RENGINE_FOLDER" -- poetry run -C $RENGINE_FOLDER celery -A reNgine.tasks worker --loglevel=$CELERY_LOGLEVEL --autoscale=$MAX_CONCURRENCY,$MIN_CONCURRENCY -Q main_scan_queue &
watchmedo auto-restart --recursive --pattern="*.py" --directory="$RENGINE_FOLDER" -- poetry run -C $RENGINE_FOLDER celery -A reNgine.tasks worker --pool=gevent --concurrency=30 --loglevel=$CELERY_LOGLEVEL -Q initiate_scan_queue -n initiate_scan_worker &
watchmedo auto-restart --recursive --pattern="*.py" --directory="$RENGINE_FOLDER" -- poetry run -C $RENGINE_FOLDER celery -A reNgine.tasks worker --pool=gevent --concurrency=30 --loglevel=$CELERY_LOGLEVEL -Q subscan_queue -n subscan_worker &
watchmedo auto-restart --recursive --pattern="*.py" --directory="$RENGINE_FOLDER" -- poetry run -C $RENGINE_FOLDER celery -A reNgine.tasks worker --pool=gevent --concurrency=20 --loglevel=$CELERY_LOGLEVEL -Q report_queue -n report_worker &
watchmedo auto-restart --recursive --pattern="*.py" --directory="$RENGINE_FOLDER" -- poetry run -C $RENGINE_FOLDER celery -A reNgine.tasks worker --pool=gevent --concurrency=10 --loglevel=$CELERY_LOGLEVEL -Q send_notif_queue -n send_notif_worker &
watchmedo auto-restart --recursive --pattern="*.py" --directory="$RENGINE_FOLDER" -- poetry run -C $RENGINE_FOLDER celery -A reNgine.tasks worker --pool=gevent --concurrency=10 --loglevel=$CELERY_LOGLEVEL -Q send_scan_notif_queue -n send_scan_notif_worker &
watchmedo auto-restart --recursive --pattern="*.py" --directory="$RENGINE_FOLDER" -- poetry run -C $RENGINE_FOLDER celery -A reNgine.tasks worker --pool=gevent --concurrency=10 --loglevel=$CELERY_LOGLEVEL -Q send_task_notif_queue -n send_task_notif_worker &
watchmedo auto-restart --recursive --pattern="*.py" --directory="$RENGINE_FOLDER" -- poetry run -C $RENGINE_FOLDER celery -A reNgine.tasks worker --pool=gevent --concurrency=5 --loglevel=$CELERY_LOGLEVEL -Q send_file_to_discord_queue -n send_file_to_discord_worker &
watchmedo auto-restart --recursive --pattern="*.py" --directory="$RENGINE_FOLDER" -- poetry run -C $RENGINE_FOLDER celery -A reNgine.tasks worker --pool=gevent --concurrency=5 --loglevel=$CELERY_LOGLEVEL -Q send_hackerone_report_queue -n send_hackerone_report_worker &
watchmedo auto-restart --recursive --pattern="*.py" --directory="$RENGINE_FOLDER" -- poetry run -C $RENGINE_FOLDER celery -A reNgine.tasks worker --pool=gevent --concurrency=10 --loglevel=$CELERY_LOGLEVEL -Q parse_nmap_results_queue -n parse_nmap_results_worker &
watchmedo auto-restart --recursive --pattern="*.py" --directory="$RENGINE_FOLDER" -- poetry run -C $RENGINE_FOLDER celery -A reNgine.tasks worker --pool=gevent --concurrency=20 --loglevel=$CELERY_LOGLEVEL -Q geo_localize_queue -n geo_localize_worker &
watchmedo auto-restart --recursive --pattern="*.py" --directory="$RENGINE_FOLDER" -- poetry run -C $RENGINE_FOLDER celery -A reNgine.tasks worker --pool=gevent --concurrency=10 --loglevel=$CELERY_LOGLEVEL -Q query_whois_queue -n query_whois_worker &
watchmedo auto-restart --recursive --pattern="*.py" --directory="$RENGINE_FOLDER" -- poetry run -C $RENGINE_FOLDER celery -A reNgine.tasks worker --pool=gevent --concurrency=30 --loglevel=$CELERY_LOGLEVEL -Q remove_duplicate_endpoints_queue -n remove_duplicate_endpoints_worker &
watchmedo auto-restart --recursive --pattern="*.py" --directory="$RENGINE_FOLDER" -- poetry run -C $RENGINE_FOLDER celery -A reNgine.tasks worker --pool=gevent --concurrency=50 --loglevel=$CELERY_LOGLEVEL -Q run_command_queue -n run_command_worker &
watchmedo auto-restart --recursive --pattern="*.py" --directory="$RENGINE_FOLDER" -- poetry run -C $RENGINE_FOLDER celery -A reNgine.tasks worker --pool=gevent --concurrency=10 --loglevel=$CELERY_LOGLEVEL -Q query_reverse_whois_queue -n query_reverse_whois_worker &
watchmedo auto-restart --recursive --pattern="*.py" --directory="$RENGINE_FOLDER" -- poetry run -C $RENGINE_FOLDER celery -A reNgine.tasks worker --pool=gevent --concurrency=10 --loglevel=$CELERY_LOGLEVEL -Q query_ip_history_queue -n query_ip_history_worker &
watchmedo auto-restart --recursive --pattern="*.py" --directory="$RENGINE_FOLDER" -- poetry run -C $RENGINE_FOLDER celery -A reNgine.tasks worker --pool=gevent --concurrency=30 --loglevel=$CELERY_LOGLEVEL -Q gpt_queue -n gpt_worker &
watchmedo auto-restart --recursive --pattern="*.py" --directory="$RENGINE_FOLDER" -- poetry run -C $RENGINE_FOLDER celery -A reNgine.tasks worker --pool=gevent --concurrency=10 --loglevel=$CELERY_LOGLEVEL -Q dorking_queue -n dorking_worker &
watchmedo auto-restart --recursive --pattern="*.py" --directory="$RENGINE_FOLDER" -- poetry run -C $RENGINE_FOLDER celery -A reNgine.tasks worker --pool=gevent --concurrency=10 --loglevel=$CELERY_LOGLEVEL -Q osint_discovery_queue -n osint_discovery_worker &
watchmedo auto-restart --recursive --pattern="*.py" --directory="$RENGINE_FOLDER" -- poetry run -C $RENGINE_FOLDER celery -A reNgine.tasks worker --pool=gevent --concurrency=10 --loglevel=$CELERY_LOGLEVEL -Q h8mail_queue -n h8mail_worker &
watchmedo auto-restart --recursive --pattern="*.py" --directory="$RENGINE_FOLDER" -- poetry run -C $RENGINE_FOLDER celery -A reNgine.tasks worker --pool=gevent --concurrency=10 --loglevel=$CELERY_LOGLEVEL -Q theHarvester_queue -n theHarvester_worker
worker_command() {
local queue=$1
local worker_name=$2

if [ "$CELERY_DEBUG" = "1" ]; then
echo "watchmedo auto-restart --recursive --pattern=\"*.py\" --directory=\"$RENGINE_FOLDER\" -- \
poetry run -C $RENGINE_FOLDER celery -A reNgine worker \
--pool=solo \
--loglevel=$CELERY_LOGLEVEL \
-Q $queue -n $worker_name"
else
echo "poetry run -C $RENGINE_FOLDER celery -A reNgine worker \
--pool=gevent \
--loglevel=$CELERY_LOGLEVEL \
--autoscale=$MAX_CONCURRENCY,$MIN_CONCURRENCY \
-Q $queue -n $worker_name"
fi
}

queues=(
"orchestrator_queue:orchestrator_worker"
"io_queue:io_worker"
"run_command_queue:run_command_worker"
"cpu_queue:cpu_worker"
"report_queue:report_worker"
"send_notif_queue:send_notif_worker"
)

commands=""
for queue in "${queues[@]}"; do
IFS=':' read -r queue worker_name <<< "$queue"
commands+="$(worker_command "$queue" "$worker_name") &"$'\n'
done

eval "$commands"

wait

exec "$@"
4 changes: 4 additions & 0 deletions docker/docker-compose.dev.yml
Original file line number Diff line number Diff line change
Expand Up @@ -13,10 +13,14 @@ services:
- CELERY_DEBUG=1
- CELERY_REMOTE_DEBUG=0
- CELERY_REMOTE_DEBUG_PORT=5679
- CELERY_TASK_ALWAYS_EAGER=0
- CELERY_TASK_EAGER_PROPAGATES=0
- COMMAND_EXECUTOR_DRY_RUN=1
volumes:
- ./celery/entrypoint-dev.sh:/entrypoint-dev.sh:ro
ports:
- "127.0.0.1:5679:5679"
- "127.0.0.1:5555:5555"

celery-beat:
entrypoint: /entrypoint-dev.sh
Expand Down
6 changes: 5 additions & 1 deletion web/api/serializers.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,14 +3,15 @@
from django.contrib.humanize.templatetags.humanize import (naturalday, naturaltime)
from django.db.models import F, JSONField, Value
from recon_note.models import *
from reNgine.common_func import *
from rest_framework import serializers
from scanEngine.models import *
from startScan.models import *
from targetApp.models import *
from dashboard.models import *
import yaml

from reNgine.definitions import ENGINE_NAMES


class SearchHistorySerializer(serializers.ModelSerializer):
class Meta:
Expand Down Expand Up @@ -359,6 +360,7 @@ def get_description(self, subdomain):
return subdomain.name

def get_title(self, subdomain):
from reNgine.utils.db import get_interesting_subdomains
if get_interesting_subdomains(subdomain.scan_history.id).filter(name=subdomain.name).exists():
return "Interesting"

Expand Down Expand Up @@ -724,6 +726,7 @@ def get_change(self, Subdomain):
return Subdomain.change

def get_is_interesting(self, Subdomain):
from reNgine.utils.db import get_interesting_subdomains
return (
get_interesting_subdomains(Subdomain.scan_history.id)
.filter(name=Subdomain.name)
Expand Down Expand Up @@ -905,6 +908,7 @@ class Meta:
fields = '__all__'

def get_is_interesting(self, subdomain):
from reNgine.utils.db import get_interesting_subdomains
scan_id = subdomain.scan_history.id if subdomain.scan_history else None
return (
get_interesting_subdomains(scan_id)
Expand Down
2 changes: 1 addition & 1 deletion web/api/tests/test_vulnerability.py
Original file line number Diff line number Diff line change
Expand Up @@ -88,7 +88,7 @@ def setUp(self):
self.data_generator.create_endpoint()
self.data_generator.create_vulnerability()

@patch("reNgine.tasks.gpt_vulnerability_description.apply_async")
@patch("reNgine.tasks.llm_vulnerability_description.apply_async")
def test_get_vulnerability_report(self, mock_apply_async):
"""Test generating a vulnerability report."""
mock_task = MagicMock()
Expand Down
44 changes: 26 additions & 18 deletions web/api/views.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,11 +25,10 @@

from recon_note.models import TodoNote
from reNgine.celery import app
from reNgine.common_func import (
get_data_from_post_request,
get_interesting_endpoints,
get_interesting_subdomains,
from reNgine.utils.db import (
get_lookup_keywords,
)
from reNgine.utils.utils import (
safe_int_cast
)
from reNgine.definitions import (
Expand All @@ -39,27 +38,31 @@
DEFAULT_GPT_MODELS,
RUNNING_TASK,
SUCCESS_TASK,
ENGINE_NAMES
)
from reNgine.settings import (
RENGINE_CURRENT_VERSION,
RENGINE_TOOL_GITHUB_PATH
)
from reNgine.tasks import (
create_scan_activity,
gpt_vulnerability_description,
from reNgine.tasks.scan import (
initiate_subscan,
)
from reNgine.tasks.command import run_command_line
from reNgine.tasks.detect import (
run_cmseek,
run_wafw00f,
)
from reNgine.tasks.dns import (
query_ip_history,
query_reverse_whois,
query_whois,
run_cmseek,
run_command,
)
from reNgine.tasks.llm import llm_vulnerability_description
from reNgine.tasks.notification import send_hackerone_report
from reNgine.tasks.url import (
run_gf_list,
run_wafw00f,
send_hackerone_report
)
from reNgine.gpt import GPTAttackSuggestionGenerator
from reNgine.utilities import is_safe_path, remove_lead_and_trail_slash
from reNgine.utils.utils import is_safe_path, remove_lead_and_trail_slash
from scanEngine.models import EngineType, InstalledExternalTool
from startScan.models import (
Command,
Expand All @@ -80,6 +83,13 @@
Vulnerability,
)
from targetApp.models import Domain, Organization
from reNgine.utils.command_executor import run_command
from reNgine.utils.http import get_data_from_post_request
from reNgine.utils.db import (
create_scan_activity,
get_interesting_endpoints,
get_interesting_subdomains
)

from .serializers import (
CommandSerializer,
Expand All @@ -103,7 +113,6 @@
OnlySubdomainNameSerializer,
OrganizationSerializer,
OrganizationTargetsSerializer,
PortSerializer,
ProjectSerializer,
ReconNoteSerializer,
ScanHistorySerializer,
Expand Down Expand Up @@ -240,7 +249,7 @@ def get(self, request):
'status': False,
'error': 'Missing GET param Vulnerability `id`'
})
task = gpt_vulnerability_description.apply_async(args=(vulnerability_id,))
task = llm_vulnerability_description.apply_async(args=(vulnerability_id,))
response = task.wait()
return Response(response)

Expand Down Expand Up @@ -1015,8 +1024,8 @@ def get(self, request):
else:
return Response({'status': False, 'message': 'Cannot uninstall tool!'})

run_command(uninstall_command)
run_command.apply_async(args=(uninstall_command,))
run_command_line(uninstall_command)
run_command_line.apply_async(args=(uninstall_command,))

tool.delete()

Expand Down Expand Up @@ -1047,7 +1056,6 @@ def get(self, request):
update_command = 'cd ' + str(Path(RENGINE_TOOL_GITHUB_PATH) / tool_name) + ' && git pull && cd -'

run_command(update_command)
run_command.apply_async(args=(update_command,))
return Response({'status': True, 'message': tool.name + ' updated successfully.'})


Expand Down
4 changes: 2 additions & 2 deletions web/dashboard/views.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
import json
import logging

from datetime import datetime, timedelta
from datetime import timedelta

from django.contrib import messages
from django.contrib.auth import get_user_model, update_session_auth_hash
Expand All @@ -20,7 +20,7 @@
from targetApp.models import Domain
from startScan.models import (
EndPoint, ScanHistory, SubScan, Subdomain, Vulnerability, ScanActivity,
IpAddress, Port, Technology, VulnerabilityTags, CountryISO
IpAddress, Port, Technology, CountryISO
)
from dashboard.models import Project, OpenAiAPIKey, NetlasAPIKey
from dashboard.forms import ProjectForm
Expand Down
13 changes: 11 additions & 2 deletions web/reNgine/celery.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
from __future__ import absolute_import
import os

import django
from celery import Celery
from celery.signals import setup_logging
Expand All @@ -10,10 +10,19 @@
# Celery app
app = Celery('reNgine')
app.config_from_object('django.conf:settings', namespace='CELERY')
app.autodiscover_tasks()

# Default configuration for all tasks
app.conf.update(
task_track_started=True,
task_default_queue='orchestrator_queue',
task_acks_late=True,
worker_prefetch_multiplier=1,
)

app.autodiscover_tasks(['reNgine.tasks'])

@setup_logging.connect()
def config_loggers(*args, **kwargs):
from logging.config import dictConfig
dictConfig(app.conf['LOGGING'])

Loading
Loading