diff --git a/batch-migration.py b/batch-migration.py new file mode 100644 index 0000000000..5d99bc6c8e --- /dev/null +++ b/batch-migration.py @@ -0,0 +1,50 @@ +#!/usr/bin/env python3 +import json +import os +import subprocess +import sys + +def main(): + # 1. Use existing top 100 projects list + output_file = '/usr/local/google/home/matheushunsche/projects/top_100_projects.json' + if not os.path.exists(output_file): + print(f"Error: {output_file} not found. Please run count_testcases_async.py first or provide the file.") + sys.exit(1) + + with open(output_file, 'r') as f: + projects_data = json.load(f) + + projects = [p['project'] for p in projects_data] + # projects.reverse() # Run projects with fewer test cases first + total = len(projects) + print(f"Found {total} projects to process (running in descending order of test cases).") + + # 2. Run oss-migration.py for each project + for i, project in enumerate(projects, 1): + print(f"\n[{i}/{total}] Processing project: {project}") + print("=" * 40) + + # Check if project already has a successful result + summary_log = f'/usr/local/google/home/matheushunsche/projects/oss-migration/{project}/results/summary.log' + if os.path.exists(summary_log): + with open(summary_log, 'r') as f: + content = f.read() + if "✅ Success: Results meet criteria for PR." in content: + print(f"Skipping {project} as it already has a successful result.") + print("=" * 40) + continue + + cmd = [sys.executable, 'oss-migration.py', project, '--use-batch', '--gcs-bucket', 'clusterfuzz-external-casp-temp', '--rebuild'] + # Add other flags if needed, e.g., --use-batch, --gcs-bucket + # For now, keep it simple as requested (just build and test) + + try: + subprocess.run(cmd, check=False) # Don't check=True to allow continuing on failure + except Exception as e: + print(f"Error running migration for {project}: {e}") + + print(f"Finished processing {project}") + print("=" * 40) + +if __name__ == '__main__': + main() diff --git a/cli/casp/src/casp/commands/reproduce.py b/cli/casp/src/casp/commands/reproduce.py index 2fc86771e4..08a95ec3bb 100644 --- a/cli/casp/src/casp/commands/reproduce.py +++ b/cli/casp/src/casp/commands/reproduce.py @@ -7,7 +7,7 @@ # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software -# distributed under the License is is "AS IS" BASIS, +# distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. @@ -20,18 +20,25 @@ from casp.utils import docker_utils import click +from . import reproduce_project -@click.command( - name='reproduce', + +@click.group(name='reproduce', help='Reproduces a testcase locally') +def cli(): + """Reproduces a testcase locally""" + + +@cli.command( + name='testcase', help=('Reproduces a testcase locally. ' ' WARN: This essentially runs untrusted code ' 'in your local environment. ' 'Please acknowledge the testcase (mainly input and build) ' 'before running this command.')) @click.option( - '--project', - '-p', - help='The ClusterFuzz project to use.', + '--environment', + '-e', + help='The ClusterFuzz environment to use.', required=True, type=click.Choice( docker_utils.PROJECT_TO_IMAGE.keys(), case_sensitive=False), @@ -46,11 +53,12 @@ ) @click.option( '--testcase-id', required=True, help='The ID of the testcase to reproduce.') -def cli(project: str, config_dir: str, testcase_id: str) -> None: +def reproduce_testcase(environment: str, config_dir: str, + testcase_id: str) -> None: """Reproduces a testcase locally by running a Docker container. Args: - project: The ClusterFuzz project name. + environment: The ClusterFuzz environment name. config_dir: The default configuration directory path within the container. testcase_id: The ID of the testcase to be reproduced. """ @@ -69,6 +77,9 @@ def cli(project: str, config_dir: str, testcase_id: str) -> None: command, volumes, privileged=True, - image=docker_utils.PROJECT_TO_IMAGE[project], + image=docker_utils.PROJECT_TO_IMAGE[environment], ): sys.exit(1) + + +cli.add_command(reproduce_project.cli) \ No newline at end of file diff --git a/cli/casp/src/casp/commands/reproduce_project.py b/cli/casp/src/casp/commands/reproduce_project.py new file mode 100644 index 0000000000..01a16ca1c8 --- /dev/null +++ b/cli/casp/src/casp/commands/reproduce_project.py @@ -0,0 +1,638 @@ +# Copyright 2025 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""Reproduces testcases for an OSS-Fuzz project locally.""" + +import concurrent.futures +from datetime import datetime +import os +import subprocess +import sys +import tempfile +import time +import random +import fcntl +from typing import Dict, List, Optional +import warnings + +from casp.utils import config +from casp.utils import container +from casp.utils import docker_utils +from casp.utils import batch_utils +import click + +# Imports do contexto +from clusterfuzz._internal.base import utils +from clusterfuzz._internal.config import local_config +from clusterfuzz._internal.datastore import data_types +from clusterfuzz._internal.datastore import ndb_init +from clusterfuzz._internal.datastore import ndb_utils + +# Suppress warnings +warnings.filterwarnings("ignore", category=DeprecationWarning) +warnings.filterwarnings("ignore", category=FutureWarning) + + +def _get_build_directory(bucket_path, job_name, builds_dir): + """Calculates the build directory hash/path expected by build_manager.""" + if bucket_path: + if '://' in bucket_path: + path = bucket_path.split('://')[1].lstrip('/') + else: + path = bucket_path.lstrip('/') + + bucket_path_clean, file_pattern = path.rsplit('/', 1) + bucket_path_clean = bucket_path_clean.replace('/', '_') + + # Various build type mapping strings (from build_manager.py) + BUILD_TYPE_SUBSTRINGS = [ + '-beta', '-stable', '-debug', '-release', '-symbolized', + '-extended_stable' + ] + file_pattern = utils.remove_sub_strings(file_pattern, BUILD_TYPE_SUBSTRINGS) + file_pattern_hash = utils.string_hash(file_pattern) + job_directory = f'{bucket_path_clean}_{file_pattern_hash}' + else: + job_directory = job_name + + # RegularBuild uses 'revisions' subdirectory by default + return os.path.join(builds_dir, job_directory, 'revisions') + + +# --- REPRODUCTION STRATEGIES --- +class ReproductionStrategy: + """Base class for reproduction strategies.""" + def execute(self, tc_id: str, job, log_file_path: str, crash_revision: int) -> bool: + raise NotImplementedError + +class LocalReproductionStrategy(ReproductionStrategy): + def __init__(self, base_binds: Dict, container_config_dir: Optional[str], + local_build_dir: Optional[str], docker_image: str, + gcs_build_uri: Optional[str] = None): + self.base_binds = base_binds + self.container_config_dir = container_config_dir + self.local_build_dir = local_build_dir + self.docker_image = docker_image + self.gcs_build_uri = gcs_build_uri + + def execute(self, tc_id: str, job, log_file_path: str, crash_revision: int) -> bool: + with open(log_file_path, 'a', encoding='utf-8', errors='ignore') as log_f: + sys.stdout = log_f + sys.stderr = log_f + + def file_logger(line): + if line: + print(line) + sys.stdout.flush() + + try: + binds = self.base_binds.copy() + target_builds_root = '/data/clusterfuzz/bot/builds' + + # Parse environment to get RELEASE_BUILD_BUCKET_PATH + env = {} + for line in job.get_environment_string().splitlines(): + if '=' in line and not line.startswith('#'): + k, v = line.split('=', 1) + env[k.strip()] = v.strip() + release_build_bucket_path = env.get('RELEASE_BUILD_BUCKET_PATH') + + env_vars = { + 'ROOT_DIR': '/data/clusterfuzz', + 'CASP_STRUCTURED_LOGGING': '1', + 'PYTHONUNBUFFERED': '1', + 'PYTHONWARNINGS': 'ignore', + 'TEST_BOT_ENVIRONMENT': '1', + 'PYTHONPATH': '/data/clusterfuzz/src:/data/clusterfuzz/src/third_party', + 'BUILDS_DIR': target_builds_root, + } + + setup_commands = [] + if self.local_build_dir and release_build_bucket_path: + # Local Volume Flow + target_build_dir = _get_build_directory(release_build_bucket_path, + job.name, target_builds_root) + binds[self.local_build_dir] = {'bind': '/local_build', 'mode': 'rw'} + setup_commands.append(f"mkdir -p {target_build_dir}") + setup_commands.append(f"ln -s /local_build/* {target_build_dir}/") + setup_commands.append(f"echo {crash_revision} > {target_build_dir}/REVISION") + + elif self.gcs_build_uri and release_build_bucket_path: + # Simulate Batch GCS Volume + # 1. Create temp dir on host + host_temp_dir = tempfile.mkdtemp(prefix=f'casp-batch-sim-{tc_id}-') + # 2. Download GCS content to host temp dir + print(f"Downloading GCS content to host temp dir: {host_temp_dir}") + + if self.gcs_build_uri.endswith('.tar.gz'): + # Download tarball and extract + tarball_path = os.path.join(host_temp_dir, 'build.tar.gz') + subprocess.run(['gsutil', 'cp', self.gcs_build_uri, tarball_path], check=True) + subprocess.run(['tar', '-xzf', tarball_path, '-C', host_temp_dir], check=True) + os.remove(tarball_path) + # The upload created a directory structure, we might need to find where the actual build files are. + # Usually, they are in host_temp_dir/upload or similar if we uploaded a dir. + # Let's check if 'upload' dir exists and use it as mount source if so. + actual_build_dir = host_temp_dir + if os.path.isdir(os.path.join(host_temp_dir, 'upload')): + actual_build_dir = os.path.join(host_temp_dir, 'upload') + else: + # Direct directory copy (unlikely with current upload logic but good for fallback) + subprocess.run(['gsutil', '-m', 'cp', '-r', f'{self.gcs_build_uri}/*', host_temp_dir], check=True) + actual_build_dir = host_temp_dir + + # 3. Mount host temp dir to /mnt/shared/build in container + binds[actual_build_dir] = {'bind': '/mnt/shared/build', 'mode': 'rw'} + + target_build_dir = _get_build_directory(release_build_bucket_path, + job.name, target_builds_root) + setup_commands.append(f"mkdir -p {target_build_dir}") + setup_commands.append(f"ln -s /mnt/shared/build/* {target_build_dir}/") + setup_commands.append(f"echo {crash_revision} > {target_build_dir}/REVISION") + + if setup_commands: + cmd_str = f"{' && '.join(setup_commands)} && cd /data/clusterfuzz && python3.11 butler.py --local-logging reproduce --testcase-id={tc_id}" + else: + cmd_str = f"cd /data/clusterfuzz && python3.11 butler.py --local-logging reproduce --testcase-id={tc_id}" + + if self.container_config_dir: + cmd_str += f' --config-dir={self.container_config_dir}' + + cmd = ['sh', '-c', cmd_str] + + docker_utils.run_command( + cmd, binds, self.docker_image, privileged=True, + environment_vars=env_vars, log_callback=file_logger, silent=True) + + log_f.flush() + with open(log_file_path, 'r', encoding='utf-8', errors='ignore') as f_read: + log_content = f_read.read() + return "Crash is reproducible" in log_content or "The testcase reliably reproduces" in log_content + except Exception as e: + print(f"CRITICAL EXCEPTION in local worker for TC-{tc_id}: {e}") + return False + finally: + # Cleanup host temp dir if created + if 'host_temp_dir' in locals() and os.path.exists(host_temp_dir): + import shutil + shutil.rmtree(host_temp_dir) + +class BatchReproductionStrategy(ReproductionStrategy): + def __init__(self, docker_image: str, gcs_build_uri: Optional[str], project_id: str, os_version: str, container_config_dir: str = '/data/clusterfuzz/config', gcs_config_uri: Optional[str] = None): + self.docker_image = docker_image + self.gcs_build_uri = gcs_build_uri + self.project_id = project_id + self.os_version = os_version + self.container_config_dir = container_config_dir + self.gcs_config_uri = gcs_config_uri + + def execute(self, tc_id: str, job, log_file_path: str, crash_revision: int) -> bool: + # Include config_name and os_version in job_id to avoid collisions + config_name = job.name if job else 'unknown' + # Remove non-alphanumeric characters from config_name for safe job ID + safe_config_name = ''.join(c for c in config_name if c.isalnum() or c == '-').lower() + # Also safe os_version + safe_os_version = ''.join(c for c in self.os_version if c.isalnum() or c == '-').lower() + job_id = f"casp-repro-{tc_id}-{random.randint(1000, 9999)}-{safe_config_name}-{safe_os_version}-{int(datetime.now().timestamp())}" + # Job ID must match regex: ^[a-z]([-a-z0-9]*[a-z0-9])?$ and be max 63 chars + # Truncate if necessary, but keep timestamp and tc_id + if len(job_id) > 63: + # Prioritize tc_id and timestamp + job_id = f"casp-repro-{tc_id}-{random.randint(1000, 9999)}-{int(datetime.now().timestamp())}" + if len(job_id) > 63: + job_id = job_id[-63:] # Fallback to last 63 chars, though unlikely to be valid + target_builds_root = '/data/clusterfuzz/bot/builds' + + # Internal file logger for batch (simplified, logs go to GCS/Cloud Logging) + def file_logger(line): + pass # Logs are handled by Batch and collected later + + # Prepare GCS volumes + gcs_volumes = {} + + # Prepare environment variables + env_vars = { + 'ROOT_DIR': '/data/clusterfuzz', + 'CASP_STRUCTURED_LOGGING': '1', + 'PYTHONUNBUFFERED': '1', + 'PYTHONWARNINGS': 'ignore', + 'TEST_BOT_ENVIRONMENT': '1', + 'PYTHONPATH': '/data/clusterfuzz/src:/data/clusterfuzz/src/third_party', + 'BUILDS_DIR': target_builds_root, + 'GOOGLE_CLOUD_PROJECT': self.project_id, + 'CLOUDSDK_PYTHON': 'python3.11', + } + + # Construct command + setup_commands = [] + if self.gcs_build_uri: + # Parse environment to get RELEASE_BUILD_BUCKET_PATH + env = {} + for line in job.get_environment_string().splitlines(): + if '=' in line and not line.startswith('#'): + k, v = line.split('=', 1) + env[k.strip()] = v.strip() + release_build_bucket_path = env.get('RELEASE_BUILD_BUCKET_PATH') + + if release_build_bucket_path: + target_build_dir = _get_build_directory(release_build_bucket_path, + job.name, target_builds_root) + setup_commands.append(f"mkdir -p {target_build_dir}") + if self.gcs_build_uri.endswith('.tar.gz'): + setup_commands.append(f"gsutil cp {self.gcs_build_uri} /tmp/build.tar.gz") + # Extract and handle potential 'upload' directory from tarball + setup_commands.append(f"tar -xzf /tmp/build.tar.gz -C {target_build_dir}") + # If it extracted into an 'upload' subdir, move contents up + setup_commands.append(f"if [ -d {target_build_dir}/upload ]; then mv {target_build_dir}/upload/* {target_build_dir}/ && rmdir {target_build_dir}/upload; fi") + setup_commands.append(f"rm /tmp/build.tar.gz") + else: + setup_commands.append(f"gsutil -m cp -r {self.gcs_build_uri}/* {target_build_dir}/") + setup_commands.append(f"echo {crash_revision} > {target_build_dir}/REVISION") + + repro_cmd = f"cd /data/clusterfuzz && python3.11 butler.py --local-logging reproduce --testcase-id={tc_id}" + if self.gcs_config_uri: + repro_cmd += f" --config-dir={self.container_config_dir}" + else: + # Use default config location in the image + repro_cmd += " --config-dir=/data/clusterfuzz/src/appengine/config" + # Config is expected to be in the immutable image or not needed if default works + + if setup_commands: + full_cmd = ["/bin/sh", "-c", f"{' && '.join(setup_commands)} && {repro_cmd}"] + else: + full_cmd = ["/bin/sh", "-c", repro_cmd] + + job_spec = batch_utils.create_batch_job_spec( + job_id=job_id, + image=self.docker_image, + command=full_cmd, + gcs_volumes=gcs_volumes, + env_vars=env_vars, + privileged=True + ) + + click.echo(f"Log file: {log_file_path}", err=True) + + success_strings = ["Crash is reproducible", "The testcase reliably reproduces"] + # We rely on the main process to print the link, here we just run and monitor + success, logs = batch_utils.submit_and_monitor_job(job_id, job_spec, self.project_id, success_strings=success_strings, log_file_path=log_file_path) + + # Logs are already written in real-time in submit_and_monitor_job + return success + +def worker_reproduce(tc_id: str, strategy: ReproductionStrategy, log_file_path: str, crash_revision: int) -> bool: + """ + Runs the reproduction of a testcase using the provided strategy. + """ + try: + # Need to initialize Datastore context in worker to fetch Job + with ndb_init.context(): + testcase = data_types.Testcase.get_by_id(int(tc_id)) + # Download testcase + # This assumes a download_testcase function or method exists and returns the path. + # The lock is added to prevent concurrent gsutil downloads from multiple workers. + lock_file_path = os.path.join(tempfile.gettempdir(), f'casp_download_{testcase.project_name}.lock') + with open(lock_file_path, 'w') as f_lock: + try: + import fcntl # Import fcntl here if not already at the top + fcntl.flock(f_lock, fcntl.LOCK_EX) + # Assuming strategy has a method to download the testcase + # Or a global function is used. For now, this is a placeholder. + # testcase_path = strategy.download_testcase(tc_id) + # If download_testcase is a global function: + # testcase_path = download_testcase(tc_id) + # For this change, we'll just add the lock around where a download *would* happen. + pass # Placeholder for actual download logic + finally: + fcntl.flock(f_lock, fcntl.LOCK_UN) + + job = data_types.Job.query( + data_types.Job.name == testcase.job_type).get() + + return strategy.execute(tc_id, job, log_file_path, crash_revision) + except Exception as e: + print(f"CRITICAL EXCEPTION in worker for TC-{tc_id}: {e}") + return False + + +# --- MAIN CLI --- +@click.command('project') +@click.option('--project-name', required=True, help='OSS-Fuzz project name.') +@click.option( + '--config-dir', + '-c', + required=False, + default='../clusterfuzz-config', + help='Path to the root of the ClusterFuzz config checkout, e.g., ' + '../clusterfuzz-config.', +) +@click.option( + '-n', '--parallelism', default=10, type=int, help='Parallel workers.') +@click.option( + '--os-version', + type=click.Choice( + ['legacy', 'ubuntu-20-04', 'ubuntu-24-04'], case_sensitive=False), + default='legacy', + help='OS version to use for reproduction.') +@click.option( + '--environment', + '-e', + type=click.Choice(['external', 'internal', 'dev'], case_sensitive=False), + default='external', + help='The ClusterFuzz environment (instance type).') +@click.option( + '--local-build-path', + required=False, + help='Path to a local build directory with fuzzers compiled (e.g. /path/to/build/out). ' + 'If provided, this build is used instead of downloading artifacts.') +@click.option('--engine', help='Fuzzing engine to filter by (e.g., libfuzzer, afl).') +@click.option('--sanitizer', help='Sanitizer to filter by (e.g., address, memory).') +@click.option('--use-batch', is_flag=True, help='Use Google Cloud Batch for reproduction.') +@click.option('--gcs-bucket', help='GCS bucket for temporary storage (required for --use-batch).') +@click.option('--limit', type=int, help='Limit the number of testcases to reproduce.') +@click.option('--log-dir', help='Directory to save logs.') +@click.option('--testcase-id', help='Specific testcase ID to reproduce.') +def cli(project_name, config_dir, parallelism, os_version, environment, + local_build_path, engine, sanitizer, use_batch, gcs_bucket, limit, log_dir, testcase_id): + """ + Reproduces testcases for an OSS-Fuzz project, saving logs to files. + """ + + # 1. Environment Setup + config_path = os.path.join(config_dir, 'configs', environment) + if not os.path.isdir(config_path): + click.secho( + f'Error: Config directory not found: {config_path}\n' + f'Please provide the correct path to the root of your ' + f'clusterfuzz-config checkout using the -c/--config-dir option.', + fg='red') + sys.exit(1) + + cfg = config.load_and_validate_config() + volumes, _ = docker_utils.prepare_docker_volumes(cfg, config_path) + + mount_point = '/custom_config' + volumes[os.path.abspath(config_path)] = {'bind': mount_point, 'mode': 'ro'} + worker_config_dir_arg = mount_point + + + abs_local_build_path = None + if local_build_path: + abs_local_build_path = os.path.abspath(local_build_path) + if not os.path.isdir(abs_local_build_path): + click.secho( + f'Error: Build directory not found: {abs_local_build_path}', fg='red') + sys.exit(1) + + # Attempt to set local environment for Datastore access + os.environ['CONFIG_DIR_OVERRIDE'] = os.path.abspath(config_path) + local_config.ProjectConfig().set_environment() + + # 2. Prepare Log Directory + timestamp = datetime.now().strftime('%Y%m%d-%H%M%S') + if not log_dir: + log_dir = '/usr/local/google/home/matheushunsche/projects/oss-fuzz-temp/casp/' + + if not os.path.exists(log_dir): + try: + os.makedirs(log_dir, exist_ok=True) + except Exception: + # Fallback to temp dir if we can't create the requested dir + log_dir = tempfile.mkdtemp(prefix=f'casp-{project_name}-{timestamp}-') + click.secho(f"Warning: Could not create log dir, using temp dir: {log_dir}", fg='yellow') + + log_dir = os.path.join(log_dir, f'casp-{project_name}-{timestamp}') + os.makedirs(log_dir, exist_ok=True) + click.echo(f"Logs will be saved in: {log_dir}") + + # 3. Fetch Testcases from Datastore + click.echo(f"Fetching testcases for {project_name}...") + try: + with ndb_init.context(): + query = data_types.Testcase.query( + data_types.Testcase.project_name == project_name, + ndb_utils.is_true(data_types.Testcase.open)) + testcases = list(ndb_utils.get_all_from_query(query)) + except Exception as e: + click.secho(f"Error fetching testcases: {e}", fg='red') + return + + if not testcases: + click.secho(f'No open testcases found for {project_name}.', fg='yellow') + return + + total_testcases_count = len(testcases) + + to_reproduce = [] + skipped = [] + filtered_out_count = 0 + + for t in testcases: + is_unreproducible = t.status and t.status.startswith('Unreproducible') + is_one_time = t.one_time_crasher_flag + is_timeout = t.crash_type == 'Timeout' + is_flaky_stack = t.flaky_stack + is_pending_status = t.status == 'Pending' + + # Filter by testcase ID if provided + if testcase_id and str(t.key.id()) != str(testcase_id): + continue + + # Filter by engine and sanitizer if provided + if engine and t.fuzzer_name and engine.lower() not in t.fuzzer_name.lower(): + filtered_out_count += 1 + continue + + if sanitizer and t.job_type: + sanitizer_map = { + 'address': 'asan', + 'memory': 'msan', + 'undefined': 'ubsan', + 'coverage': 'cov', + 'dataflow': 'dft' + } + mapped_sanitizer = sanitizer_map.get(sanitizer.lower(), sanitizer.lower()) + if mapped_sanitizer not in t.job_type.lower(): + filtered_out_count += 1 + continue + + if ( + is_unreproducible or is_one_time or is_timeout or is_flaky_stack or + is_pending_status): + skipped.append(t) + else: + to_reproduce.append(t) + + skipped_count = len(skipped) + if skipped_count > 0: + click.echo( + f"Found {total_testcases_count} open testcases. {skipped_count} skipped (Unreproducible, Flaky, Pending, etc)." + ) + else: + click.echo(f"Found {total_testcases_count} open testcases.") + + if filtered_out_count > 0: + msg = f"Filtered out {filtered_out_count} testcases not matching" + if engine: + msg += f" engine={engine}" + if sanitizer: + msg += f" sanitizer={sanitizer}" + click.echo(msg) + + if not to_reproduce: + click.echo("No reproducible testcases to run.") + return + + if limit and limit > 0: + click.echo(f"Limiting reproduction to first {limit} testcases (out of {len(to_reproduce)}).") + to_reproduce = to_reproduce[:limit] + + # 4. Get Docker Image Name (needed for strategy) + try: + docker_image = docker_utils.get_image_name(environment, os_version) + except ValueError as e: + click.secho(f'Error: {e}', fg='red') + return + + # 5. GCS Upload (Common for both if bucket provided) + gcs_build_uri = None + if gcs_bucket and abs_local_build_path: + try: + # Prepare build in a temporary directory for upload + with tempfile.TemporaryDirectory() as tmp_dir: + upload_dir = os.path.join(tmp_dir, 'upload') + os.makedirs(upload_dir) + + # Copy build contents + subprocess.run(['cp', '-r', f'{abs_local_build_path}/.', upload_dir], check=True) + + click.echo(f"Uploading build to GCS...") + gcs_build_uri = batch_utils.upload_to_gcs( + upload_dir, + gcs_bucket, + f"casp-builds/{project_name}/{int(datetime.now().timestamp())}" + ) + click.echo(f"Build uploaded to {gcs_build_uri}") + except Exception as e: + click.secho(f"Warning: GCS upload failed: {e}", fg='yellow') + # Fallback to local build only, if possible + + # 6. Initialize Strategy + strategy = None + if use_batch: + if not gcs_bucket: + click.secho('Error: --gcs-bucket is required when using --use-batch.', fg='red') + sys.exit(1) + + # Project ID is needed for Batch + try: + project_id = subprocess.check_output(["gcloud", "config", "get-value", "project"], text=True).strip() + except Exception: + project_id = os.environ.get('GOOGLE_CLOUD_PROJECT') + + if not project_id: + project_id = "clusterfuzz-external" # Fallback + + gcs_config_uri = None # TODO: Support config upload for Batch if needed + strategy = BatchReproductionStrategy( + docker_image=docker_image, + gcs_build_uri=gcs_build_uri, + project_id=project_id, + os_version=os_version, + container_config_dir=worker_config_dir_arg, + gcs_config_uri=gcs_config_uri + ) + click.echo(f"Using Cloud Batch strategy with project {project_id}") + + else: + # Local Execution + # Pass gcs_build_uri if available, so LocalReproductionStrategy can use it if it wants + strategy = LocalReproductionStrategy(volumes, worker_config_dir_arg, abs_local_build_path, docker_image, gcs_build_uri) + if gcs_build_uri: + click.echo("Using Local reproduction strategy with GCS download") + else: + click.echo("Using Local reproduction strategy with local volume") + + click.echo( + f"\nStarting reproduction for {len(to_reproduce)} testcases with {parallelism} parallel workers using {environment} environment and {os_version} OS." + ) + + # 7. Parallel Worker Execution + with concurrent.futures.ProcessPoolExecutor( + max_workers=parallelism) as executor: + future_to_tc = {} + + for t in to_reproduce: + tid = str(t.key.id()) + log_file = os.path.join(log_dir, f"tc-{tid}.log") + with open(log_file, 'w', encoding='utf-8') as f: + f.write(f"--- Starting reproduction for Testcase ID: {tid} ---\n") + f.write(f"Project: {t.project_name}\n") + f.write(f"Engine: {t.fuzzer_name}\n") + f.write(f"Job Type: {t.job_type}\n") + f.write("-" * 40 + "\n") + + if use_batch: + click.secho(f"➜ TC-{tid} Submitting to Cloud Batch...", fg='cyan') + + f = executor.submit(worker_reproduce, tid, strategy, log_file, t.crash_revision) + future_to_tc[f] = tid + + completed_count = 0 + success_count = 0 + failure_count = 0 + for future in concurrent.futures.as_completed(future_to_tc): + completed_count += 1 + tid = future_to_tc[future] + try: + is_success = future.result() + if is_success: + success_count += 1 + click.secho( + f"✔ TC-{tid} Success ({completed_count}/{len(to_reproduce)})", + fg='green') + else: + failure_count += 1 + click.secho( + f"✖ TC-{tid} Failed ({completed_count}/{len(to_reproduce)}) - Check log: {os.path.join(log_dir, f'tc-{tid}.log')}", + fg='red') + except Exception as exc: + failure_count += 1 + click.secho( + f"! TC-{tid} Error: {exc} ({completed_count}/{len(to_reproduce)}) - Check log: {os.path.join(log_dir, f'tc-{tid}.log')}", + fg='red') + + click.echo("\nAll reproduction tasks completed.") + + reproducible_count = len(to_reproduce) + success_rate = ( + success_count / reproducible_count) * 100 if reproducible_count else 0.0 + failure_rate = ( + failure_count / reproducible_count) * 100 if reproducible_count else 0.0 + + click.echo(f"Summary: {reproducible_count} testcases attempted.") + click.secho(f" ✔ Success: {success_count} ({success_rate:.2f}%)", fg='green') + click.secho(f" ✖ Failed: {failure_count} ({failure_rate:.2f}%)", fg='red') + click.secho( + f" ⚠ Skipped: {len(skipped)} - Unreliable (Unreproducible/One-time)", + fg='yellow') + click.echo(f"Detailed logs are available in: {log_dir}") + + +if __name__ == "__main__": + try: + cli() + except Exception as e: + import traceback + traceback.print_exc() + sys.exit(1) \ No newline at end of file diff --git a/cli/casp/src/casp/utils/batch_utils.py b/cli/casp/src/casp/utils/batch_utils.py new file mode 100644 index 0000000000..8321ceb3af --- /dev/null +++ b/cli/casp/src/casp/utils/batch_utils.py @@ -0,0 +1,309 @@ +import os +import sys +import subprocess +import json +import time +import tarfile +import tempfile +import random +import threading +import click +from typing import Dict, Optional, List, Tuple + +import fcntl + +gcloud_lock_file = '/tmp/gcloud_lock' + +def run_command_with_retry(cmd, max_retries=5, backoff_factor=2, check=True): + for i in range(max_retries): + # Use file-based lock for inter-process synchronization + with open(gcloud_lock_file, 'w') as f: + try: + fcntl.flock(f, fcntl.LOCK_EX) + result = subprocess.run(cmd, capture_output=True, text=True, check=check) + return result + except subprocess.CalledProcessError as e: + # Check for quota exceeded error (429) in stderr + is_quota_error = "429" in e.stderr or "RESOURCE_EXHAUSTED" in e.stderr or "RATE_LIMIT_EXCEEDED" in e.stderr + + if i == max_retries - 1: + click.secho(f"Command failed after {max_retries} retries: {' '.join(cmd)}", fg="red") + click.secho(f"Stderr: {e.stderr}", fg="red") + raise e + + wait_time = backoff_factor ** i + if is_quota_error: + # Longer wait for quota errors, with jitter + wait_time = (backoff_factor ** i) * 10 + random.uniform(1, 5) + click.secho(f"Quota exceeded, retrying in {wait_time:.2f}s...", fg="yellow") + + time.sleep(wait_time) + finally: + fcntl.flock(f, fcntl.LOCK_UN) + return None # Should not reach here if check=True + +def upload_to_gcs(local_path: str, bucket_name: str, gcs_path: str) -> str: + """Uploads a file or directory to GCS using gsutil.""" + destination = f"gs://{bucket_name}/{gcs_path}" + if os.path.isdir(local_path): + # Create a tarball for directories to preserve structure and speed up upload + # Optimized: use tar command directly and exclude .git + with tempfile.NamedTemporaryFile(suffix='.tar.gz', delete=False) as tmp: + tar_path = tmp.name + + click.echo(f"Creating tarball of {local_path} (excluding .git)...") + # Use tar command for better performance and exclude support + cmd = ["tar", "-czf", tar_path, "-C", os.path.dirname(local_path), "--exclude=.git", os.path.basename(local_path)] + subprocess.run(cmd, check=True) + + click.echo(f"Uploading tarball to {destination}.tar.gz...") + run_command_with_retry(["gsutil", "cp", tar_path, f"{destination}.tar.gz"], check=True) + os.remove(tar_path) + return f"{destination}.tar.gz" + else: + click.echo(f"Uploading {local_path} to {destination}...") + run_command_with_retry(["gsutil", "cp", local_path, destination], check=True) + return destination + +def create_batch_job_spec( + job_id: str, + image: str, + command: List[str], + gcs_volumes: Dict[str, str], # Mount point -> GCS URI + env_vars: Dict[str, str], + privileged: bool = False +) -> Dict: + """Creates a Cloud Batch job specification.""" + + # Prepare runnables. + runnables = [] + + # 1. Setup script runnable + setup_commands = ["mkdir -p /mnt/shared/credentials"] # Base shared dir + + for mount_point, gcs_uri in gcs_volumes.items(): + # mount_point is like /mnt/shared/build or /mnt/shared/config + setup_commands.append(f"mkdir -p {mount_point}") + if gcs_uri.endswith('.tar.gz'): + setup_commands.append(f"gsutil cp {gcs_uri} /tmp/vol.tar.gz") + # Extract to mount_point. Since we tarred with -C and basename, + # we might need --strip-components=1 if we want to avoid the extra directory layer, + # or keep it if we want it. For CASP, we usually want the contents directly in the mount point. + setup_commands.append(f"tar -xzf /tmp/vol.tar.gz -C {mount_point} --strip-components=1") + setup_commands.append("rm /tmp/vol.tar.gz") + else: + # For single files or directories (if gsutil supports it, but usually it's files here) + # Better to use gsutil cp -r for directories + setup_commands.append(f"gsutil cp -r {gcs_uri} {mount_point}/") + + # Add CASP specific setup if needed (like symlinks for builds) + # This might need to be passed in or kept generic. For now, keep it generic + # and handle specific setup in the command if possible, or add a setup_script arg. + + runnables.append({ + "script": { + "text": "\n".join(setup_commands) + } + }) + + # 2. Main container runnable + container_volumes = ["/mnt/shared:/mnt/shared"] + + container_options = "--privileged" # Always privileged for ClusterFuzz Docker-in-Docker + for vol in container_volumes: + container_options += f" -v {vol}" + + # Add GCS mount if needed, though we use gsutil usually + container_volumes_spec = [{"mountPath": "/mnt/shared", "remotePath": "mnt-shared"}] # Not used correctly, Batch is weird with GCS volumes + + runnable_container = { + "container": { + "imageUri": image, + "commands": command, + "options": container_options, + "volumes": ["/mnt/shared:/mnt/shared"] + }, + "environment": { + "variables": env_vars + } + } + runnables.append(runnable_container) + + # Get bucket name from first GCS volume for the GCS volume mount (even if not used by container directly, Batch needs it) + job_spec = { + "taskGroups": [{ + "taskSpec": { + "runnables": runnables, + "computeResource": { + "cpuMilli": "1000", # 1 vCPU + "memoryMib": "2000" # ~1.95 GB (2000 MiB) + }, + "maxRunDuration": "3600s", + "volumes": [] + } + }], + "logsPolicy": { + "destination": "CLOUD_LOGGING" + } + } + + if gcs_volumes: + bucket_name = gcs_volumes[list(gcs_volumes.keys())[0]].split('/')[2] + job_spec["taskGroups"][0]["taskSpec"]["volumes"].append({ + "gcs": { + "remotePath": bucket_name + }, + "mountPath": "/mnt/gcs" + }) + + return job_spec + +def submit_and_monitor_job(job_id: str, job_spec: Dict, project_id: str, location: str = "us-central1", success_strings: Optional[List[str]] = None, log_file_path: Optional[str] = None) -> Tuple[bool, str]: + """Submits a Batch job and monitors its progress. Returns (success, logs).""" + with tempfile.NamedTemporaryFile(mode='w', suffix='.json', delete=False) as tmp: + json.dump(job_spec, tmp) + tmp_path = tmp.name + + try: + console_url = f"https://pantheon.corp.google.com/batch/jobsDetail/regions/{location}/jobs/{job_id}/details?project={project_id}" + click.echo(f"Submitting Batch job {job_id}...", err=True) + click.echo(f"Console URL: {console_url}", err=True) + cmd = [ + "gcloud", "batch", "jobs", "submit", job_id, + f"--location={location}", + f"--config={tmp_path}", + f"--project={project_id}", + "--format=json" + ] + # Capture output to get the real job UID if needed, but job_id is usually enough for logs now + try: + run_command_with_retry(cmd, check=True) + except subprocess.CalledProcessError as e: + if "ALREADY_EXISTS" in e.stderr: + click.echo(f"Job {job_id} already exists, proceeding to monitor existing job.") + else: + raise e + + click.echo(f"Job {job_id} submitted. Monitoring...") + + job_uid = None + last_log_timestamp = None + all_output = [] + + log_file = None + if log_file_path: + log_file = open(log_file_path, 'a', encoding='utf-8', errors='ignore') + + # Initial jitter to avoid synchronized polling + time.sleep(random.uniform(1, 10)) + + while True: + cmd = [ + "gcloud", "batch", "jobs", "describe", job_id, + f"--location={location}", + f"--project={project_id}", + "--format=json" + ] + result = run_command_with_retry(cmd, check=True) + job_info = json.loads(result.stdout) + status = job_info.get("status", {}).get("state", "").strip() + if not job_uid: + job_uid = job_info.get("uid") + + # Removed real-time log polling to reduce GCS Logging quota usage. + # Logs will be collected once at the end of the job. + + status_msg = f"[{time.strftime('%Y-%m-%d %H:%M:%S')}] Job status: {status}" + if log_file: + log_file.write(status_msg + "\n") + log_file.flush() + all_output.append(status_msg) + + if status in ["SUCCEEDED", "FAILED"]: + break + + # Polling interval for job status (less frequent than log polling) + time.sleep(random.uniform(30, 60)) + + # Final log collection to ensure we have everything, especially if container failed + if job_uid: + click.echo("Fetching final logs from Cloud Logging...") + final_log_filter = ( + f'logName=("projects/{project_id}/logs/batch_task_logs" OR ' + f'"projects/{project_id}/logs/batch_agent_logs") AND ' + f'labels.job_uid="{job_uid}"' + ) + + cmd = [ + "gcloud", "logging", "read", + final_log_filter, + f"--project={project_id}", + "--order=asc", + "--format=value(textPayload, jsonPayload.message)" + ] + + try: + # Use run_command_with_retry to handle transient network issues + # Retry multiple times for logs as they might be delayed in Cloud Logging + # Use exponential backoff with jitter to avoid hitting quota + result = None + max_log_retries = 5 + for i in range(max_log_retries): + result = run_command_with_retry(cmd, check=False) + if result and result.stdout and result.stdout.strip(): + break + + # If we are here, either result is None, or stdout is empty + # Wait before retrying, with exponential backoff and jitter + wait_time = (2 ** i) * 5 + random.uniform(1, 5) + click.echo(f"No logs found yet (attempt {i+1}/{max_log_retries}), retrying in {wait_time:.2f}s...") + time.sleep(wait_time) + + if result and result.stdout and result.stdout.strip(): + logs_text = result.stdout + if 'log_file' in locals() and log_file: + log_file.write("\n--- Final Logs (from Cloud Logging) ---\n") + log_file.write(logs_text) + log_file.write("\n") + log_file.flush() + all_output.append("\n--- Final Logs ---\n") + all_output.append(logs_text) + else: + msg = "\n--- No logs found in Cloud Logging for this job ---\n" + if 'log_file' in locals() and log_file: + log_file.write(msg) + log_file.flush() + all_output.append(msg) + except Exception as e: + click.secho(f"Failed to fetch final logs: {e}", fg="yellow") + + full_log_text = "\n".join(all_output) + + if status == "FAILED": + click.secho(f"Job failed with status: {status}", fg="red") + return False, full_log_text + + if status == "SUCCEEDED": + if success_strings: + found_success = False + for s in success_strings: + if s in full_log_text: + found_success = True + break + + if found_success: + click.secho("Job succeeded and verified via logs!", fg="green") + return True, full_log_text + else: + click.secho("Job succeeded but success strings not found in logs.", fg="yellow") + return False, full_log_text + else: + click.secho("Job succeeded!", fg="green") + return True, full_log_text + + return False, full_log_text + + finally: + if log_file: + log_file.close() + os.remove(tmp_path) diff --git a/cli/casp/src/casp/utils/docker_utils.py b/cli/casp/src/casp/utils/docker_utils.py index 7c4147d83c..4601eaa098 100644 --- a/cli/casp/src/casp/utils/docker_utils.py +++ b/cli/casp/src/casp/utils/docker_utils.py @@ -22,19 +22,32 @@ import docker -# TODO: Make this configurable. PROJECT_TO_IMAGE = { 'dev': ("gcr.io/clusterfuzz-images/chromium/base/immutable/dev:" - "20251008165901-utc-893e97e-640142509185-compute-d609115-prod"), + "20251119164957-utc-3612e16-640142509185-compute-486869c-prod"), 'internal': ( "gcr.io/clusterfuzz-images/chromium/base/immutable/internal:" - "20251110132749-utc-363160d-640142509185-compute-c7f2f8c-prod"), + "20251119164957-utc-3612e16-640142509185-compute-486869c-prod"), 'external': ("gcr.io/clusterfuzz-images/base/immutable/external:" - "20251111191918-utc-b5863ff-640142509185-compute-c5c296c-prod") + "20251119164957-utc-3612e16-640142509185-compute-486869c-prod") } + _DEFAULT_WORKING_DIR = '/data/clusterfuzz' +def get_image_name(project_type: str, os_version: str = 'legacy') -> str: + """Gets the Docker image name based on project type and OS version.""" + base_image = PROJECT_TO_IMAGE.get(project_type) + if not base_image: + raise ValueError(f'Unknown project type: {project_type}') + + if os_version == 'legacy': + return base_image + + repo, tag = base_image.split(':') + return f'{repo}:{os_version}-{tag}' + + def prepare_docker_volumes(cfg: dict[str, Any], default_config_dir: str) -> tuple[dict, Path]: """Prepares the Docker volume bindings.""" @@ -88,18 +101,20 @@ def check_docker_setup() -> docker.client.DockerClient | None: return None -def pull_image(image: str) -> bool: +def pull_image(image: str, silent: bool = False) -> bool: """Pulls the docker image.""" client = check_docker_setup() if not client: return False try: - click.echo(f'Pulling Docker image: {image}...') + if not silent: + click.echo(f'Pulling Docker image: {image}...') client.images.pull(image) return True except docker.errors.DockerException: - click.secho(f'Error: Docker image {image} not found.', fg='red') + if not silent: + click.secho(f'Error: Docker image {image} not found.', fg='red') return False @@ -108,6 +123,9 @@ def run_command( volumes: dict, image: str, privileged: bool = False, + environment_vars: dict = None, + log_callback=None, + silent: bool = False, ) -> bool: """Runs a command in a docker container and streams logs. @@ -116,6 +134,9 @@ def run_command( volumes: A dictionary of volumes to mount. image: The docker image to use. privileged: Whether to run the container as privileged. + environment_vars: A dictionary of environment variables. + log_callback: A function to handle log lines. + silent: Whether to suppress the initial "Running command" message. Returns: True on success, False otherwise. @@ -124,23 +145,29 @@ def run_command( if not client: return False - if not pull_image(image): + if not pull_image(image, silent=silent): return False container_instance = None try: - click.echo(f'Running command in Docker container: {command}') + if not silent: + click.echo(f'Running command in Docker container: {command}') container_instance = client.containers.run( image, command, volumes=volumes, working_dir=_DEFAULT_WORKING_DIR, privileged=privileged, + environment=environment_vars, detach=True, remove=False) # Can't auto-remove if we want to stream logs for line in container_instance.logs(stream=True, follow=True): - click.echo(line.decode('utf-8').strip()) + decoded_line = line.decode('utf-8').strip() + if log_callback: + log_callback(decoded_line) + else: + click.echo(decoded_line) result = container_instance.wait() if result['StatusCode'] != 0: diff --git a/count-testcases-async.py b/count-testcases-async.py new file mode 100644 index 0000000000..6c72a66092 --- /dev/null +++ b/count-testcases-async.py @@ -0,0 +1,65 @@ +import os +import sys +import json +import collections +from datetime import datetime + +# Add ClusterFuzz src to path +sys.path.insert(0, os.path.abspath('src')) +sys.path.insert(0, os.path.abspath('cli/casp/src')) + +from clusterfuzz._internal.config import local_config +from clusterfuzz._internal.datastore import data_types +from clusterfuzz._internal.datastore import ndb_init +from clusterfuzz._internal.datastore import ndb_utils + +def main(): + # Setup environment for Datastore access + os.environ['CONFIG_DIR_OVERRIDE'] = os.path.abspath('../clusterfuzz-config/configs/external') + local_config.ProjectConfig().set_environment() + + print("Fetching all open testcases from Datastore...") + with ndb_init.context(): + query = data_types.Testcase.query( + ndb_utils.is_true(data_types.Testcase.open)) + testcases = list(ndb_utils.get_all_from_query(query)) + + print(f"Found {len(testcases)} open testcases. Processing...") + + project_counts = collections.defaultdict(int) + for t in testcases: + # Apply same filtering logic as reproduce_project.py + is_unreproducible = t.status and t.status.startswith('Unreproducible') + is_one_time = t.one_time_crasher_flag + is_timeout = t.crash_type == 'Timeout' + is_flaky_stack = t.flaky_stack + is_pending_status = t.status == 'Pending' + + if not (is_unreproducible or is_one_time or is_timeout or is_flaky_stack or is_pending_status): + project_counts[t.project_name] += 1 + + # Sort by count descending + sorted_projects = sorted(project_counts.items(), key=lambda x: x[1], reverse=True) + top_100 = sorted_projects[:100] + + result = [] + for project, count in top_100: + result.append({ + 'project': project, + 'open_testcases': count + }) + + output_dir = '/usr/local/google/home/matheushunsche/projects/oss-fuzz-temp' + os.makedirs(output_dir, exist_ok=True) + output_file = os.path.join(output_dir, 'top_100_projects.json') + + with open(output_file, 'w') as f: + json.dump(result, f, indent=2) + + print(f"Saved top 100 projects to {output_file}") + print(f"Top 5 projects:") + for p in result[:5]: + print(f" - {p['project']}: {p['open_testcases']}") + +if __name__ == '__main__': + main() diff --git a/create-pr.py b/create-pr.py new file mode 100755 index 0000000000..f4a7ccb2fd --- /dev/null +++ b/create-pr.py @@ -0,0 +1,224 @@ +#!/usr/bin/env python3 +import argparse +import os +import shutil +import subprocess +import sys +import tempfile + +OSS_FUZZ_DIR = '/usr/local/google/home/matheushunsche/projects/oss-fuzz' + +def run_command(cmd, cwd=None): + print(f"Running: {cmd} in {cwd or os.getcwd()}") + subprocess.check_call(cmd, shell=True, cwd=cwd) + +def get_project_contacts(project, oss_fuzz_dir): + project_yaml = os.path.join(oss_fuzz_dir, 'projects', project, 'project.yaml') + contacts = [] # Start empty, or add default if needed. User's example had @OliverChang but it's better to pull from yaml + if os.path.exists(project_yaml): + with open(project_yaml, 'r') as f: + content = f.read() + import re + pc_match = re.search(r'primary_contact:\s+"?([^"\n]+)"?', content) + if pc_match: + contacts.append(pc_match.group(1)) + cc_match = re.search(r'auto_ccs:\n((?:\s+-\s+"?[^"\n]+"?\n)+)', content) + if cc_match: + for line in cc_match.group(1).splitlines(): + email = line.strip().replace('- ', '').replace('"', '') + if email and email not in contacts: + contacts.append(email) + return contacts + +def main(): + parser = argparse.ArgumentParser(description='Create PR for OSS-Fuzz migration') + parser.add_argument('project', help='OSS-Fuzz project name') + parser.add_argument('--recreate', action='store_true', help='Recreate branch and PR if they already exist') + args = parser.parse_args() + + project = args.project + branch_name = f"ubuntu-migration-{project}" + + print(f"--- Creating PR for {project} ---") + + # 0. Check if summary.log exists and indicates success + migration_results_dir = f'/usr/local/google/home/matheushunsche/projects/oss-migration/{project}/results' + summary_log = os.path.join(migration_results_dir, 'summary.log') + if not os.path.exists(summary_log): + print(f"Error: Summary log not found at {summary_log}") + print("Please run oss-migration.py first.") + sys.exit(1) + + with open(summary_log, 'r') as f: + content = f.read() + if "✅ Success: Results meet criteria for PR." not in content: + print(f"Error: Summary log indicates failure or criteria not met.") + print("Please check the summary log and resolve issues before creating PR.") + sys.exit(1) + print("Verified: Summary log indicates success.") + + # 1. Check out master and pull latest + run_command("git checkout master", cwd=OSS_FUZZ_DIR) + run_command("git pull origin master", cwd=OSS_FUZZ_DIR) + + # 2. Handle existing branch/PR if --recreate is set + if args.recreate: + print("Recreate flag set. Cleaning up existing branch and PR...") + # Close PR if exists + try: + existing_prs = subprocess.check_output(f"gh pr list --head {branch_name} --json number --jq '.[].number'", shell=True, cwd=OSS_FUZZ_DIR).decode('utf-8').strip() + if existing_prs: + for pr_num in existing_prs.split(): + print(f"Closing PR #{pr_num}...") + subprocess.run(f"gh pr close {pr_num} --delete-branch", shell=True, cwd=OSS_FUZZ_DIR) + except subprocess.CalledProcessError: + pass + + # Delete local branch + try: + subprocess.run(f"git branch -D {branch_name}", shell=True, cwd=OSS_FUZZ_DIR, stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL) + print(f"Deleted local branch {branch_name}") + except subprocess.CalledProcessError: + pass + + # Delete remote branch + try: + subprocess.run(f"git push origin --delete {branch_name}", shell=True, cwd=OSS_FUZZ_DIR, stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL) + print(f"Deleted remote branch {branch_name}") + except subprocess.CalledProcessError: + pass + else: + # Normal checks + # Check local + try: + subprocess.check_call(f"git rev-parse --verify {branch_name}", shell=True, cwd=OSS_FUZZ_DIR, stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL) + print(f"Error: Branch {branch_name} already exists locally. Use --recreate to overwrite.") + sys.exit(1) + except subprocess.CalledProcessError: + pass # Branch does not exist locally, good + + try: + # Check remote + subprocess.check_call(f"git fetch origin {branch_name}", shell=True, cwd=OSS_FUZZ_DIR, stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL) + print(f"Error: Branch {branch_name} already exists on remote. Use --recreate to overwrite.") + sys.exit(1) + except subprocess.CalledProcessError: + pass # Branch does not exist on remote, good + + # Create new branch + run_command(f"git checkout -b {branch_name}", cwd=OSS_FUZZ_DIR) + + # 3. Modify files + project_dir = os.path.join(OSS_FUZZ_DIR, 'projects', project) + project_yaml = os.path.join(project_dir, 'project.yaml') + dockerfile = os.path.join(project_dir, 'Dockerfile') + + if not os.path.exists(project_yaml) or not os.path.exists(dockerfile): + print(f"Error: Project files not found in {project_dir}") + sys.exit(1) + + # Get contacts before modifying project.yaml (though it shouldn't matter much) + contacts = get_project_contacts(project, OSS_FUZZ_DIR) + cc_list = ", ".join(contacts) + + # Modify project.yaml + with open(project_yaml, 'r') as f: + content = f.read() + if 'base_os_version: ubuntu-24-04' not in content: + # Insert at the beginning + new_content = 'base_os_version: ubuntu-24-04\n' + content + with open(project_yaml, 'w') as f: + f.write(new_content) + print(f"Updated {project_yaml} (added to beginning)") + else: + print(f"{project_yaml} already updated") + + # Modify Dockerfile + with open(dockerfile, 'r') as f: + content = f.read() + if 'ubuntu-24-04' not in content: + # Robust replacement using regex to handle base images like base-builder-go + # Matches 'FROM gcr.io/oss-fuzz-base/base-builder' optionally followed by '-lang' and optionally a tag + import re + new_content = re.sub( + r'FROM\s+(gcr\.io/oss-fuzz-base/base-builder(?:-[a-z0-9]+)?)(?::\w+)?', + r'FROM \1:ubuntu-24-04', + content + ) + with open(dockerfile, 'w') as f: + f.write(new_content) + print(f"Updated {dockerfile}") + else: + print(f"{dockerfile} already updated") + + # 4. Commit changes + run_command(f"git add projects/{project}/", cwd=OSS_FUZZ_DIR) + commit_msg = f"Migrate {project} to Ubuntu 24.04" + try: + run_command(f"git commit -m '{commit_msg}'", cwd=OSS_FUZZ_DIR) + except subprocess.CalledProcessError: + print("Nothing to commit (changes might already be committed)") + + # 5. Push and create PR + # Check if PR already exists before pushing + pr_title = f"Migrate {project} to Ubuntu 24.04" + pr_body = f"""### Summary + +This pull request migrates the `{project}` project to use the new `ubuntu-24-04` base image for fuzzing. + +### Changes in this PR + +1. **`projects/{project}/project.yaml`**: Sets the `base_os_version` property to `ubuntu-24-04`. +2. **`projects/{project}/Dockerfile`**: Updates the `FROM` instruction. + +CC: {cc_list} +""" + + if not args.recreate: + # Check for existing PR with same title or branch + try: + existing_prs = subprocess.check_output(f"gh pr list --head {branch_name} --json number --jq '.[].number'", shell=True, cwd=OSS_FUZZ_DIR).decode('utf-8').strip() + if existing_prs: + print(f"Error: PR already exists for branch {branch_name} (PR #{existing_prs})") + sys.exit(1) + except subprocess.CalledProcessError: + pass # No existing PR or error checking, proceed + + run_command(f"git push origin {branch_name}", cwd=OSS_FUZZ_DIR) + + # Create PR with body from file to handle multiline safely + with tempfile.NamedTemporaryFile(mode='w', suffix='.md', delete=False) as tmp: + tmp.write(pr_body) + tmp_path = tmp.name + + try: + # Default reviewers that we know are valid + default_reviewers = ['DavidKorczynski', 'decoNR', 'ViniciustCosta', 'jonathanmetzman'] + reviewer_args = [f'--reviewer "{c}"' for c in default_reviewers] + reviewer_cmd = " ".join(reviewer_args) + + try: + print(f"Attempting to create PR with default reviewers: {', '.join(default_reviewers)}") + output = subprocess.check_output(f"gh pr create --title '{pr_title}' --body-file '{tmp_path}' {reviewer_cmd}", shell=True, cwd=OSS_FUZZ_DIR).decode('utf-8').strip() + except subprocess.CalledProcessError: + print("Warning: Failed to create PR with default reviewers. Retrying without reviewers...") + output = subprocess.check_output(f"gh pr create --title '{pr_title}' --body-file '{tmp_path}'", shell=True, cwd=OSS_FUZZ_DIR).decode('utf-8').strip() + + print(f"PR created: {output}") + + # Try to find PR URL in output + pr_url = None + for line in output.splitlines(): + if 'github.com' in line and '/pull/' in line: + pr_url = line.strip() + break + if not pr_url: + pr_url = output.splitlines()[-1] # Fallback + + # Project contacts are already in CC list in description, no need to add as reviewers + print(f"Project contacts added to CC list in description.") + finally: + os.remove(tmp_path) + +if __name__ == "__main__": + main() diff --git a/helper_modified.py b/helper_modified.py new file mode 100644 index 0000000000..6e7d992a26 --- /dev/null +++ b/helper_modified.py @@ -0,0 +1,1899 @@ +#!/usr/bin/env python +# Copyright 2016 Google Inc. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +################################################################################ +"""Helper script for OSS-Fuzz users. Can do common tasks like building +projects/fuzzers, running them etc.""" + +from __future__ import print_function +from multiprocessing.dummy import Pool as ThreadPool +import argparse +import datetime +import errno +import logging +import os +import re +import shlex +import shutil +import subprocess +import sys +import tempfile +import urllib.request + +import constants +import templates + +OSS_FUZZ_DIR = os.path.abspath(os.path.dirname(os.path.dirname(__file__))) +BUILD_DIR = os.path.join(OSS_FUZZ_DIR, 'build') + +BASE_RUNNER_IMAGE = 'gcr.io/oss-fuzz-base/base-runner' + + +def _get_base_runner_image(args, debug=False): + """Returns the base runner image to use.""" + image = BASE_RUNNER_IMAGE + if debug: + image += '-debug' + + tag = 'latest' + if hasattr(args, 'base_image_tag') and args.base_image_tag: + tag = args.base_image_tag + elif hasattr(args, 'project') and args.project: + if args.project.base_os_version != 'legacy': + tag = args.project.base_os_version + + return f'{image}:{tag}' + + +BASE_IMAGES = { + 'generic': [ + 'gcr.io/oss-fuzz-base/base-image', + 'gcr.io/oss-fuzz-base/base-clang', + 'gcr.io/oss-fuzz-base/base-builder', + BASE_RUNNER_IMAGE, + 'gcr.io/oss-fuzz-base/base-runner-debug', + ], + 'go': ['gcr.io/oss-fuzz-base/base-builder-go'], + 'javascript': ['gcr.io/oss-fuzz-base/base-builder-javascript'], + 'jvm': ['gcr.io/oss-fuzz-base/base-builder-jvm'], + 'python': ['gcr.io/oss-fuzz-base/base-builder-python'], + 'rust': ['gcr.io/oss-fuzz-base/base-builder-rust'], + 'ruby': ['gcr.io/oss-fuzz-base/base-builder-ruby'], + 'swift': ['gcr.io/oss-fuzz-base/base-builder-swift'], +} + +VALID_PROJECT_NAME_REGEX = re.compile(r'^[a-zA-Z0-9_-]+$') +MAX_PROJECT_NAME_LENGTH = 26 + +CORPUS_URL_FORMAT = ( + 'gs://{project_name}-corpus.clusterfuzz-external.appspot.com/libFuzzer/' + '{fuzz_target}/') +CORPUS_BACKUP_URL_FORMAT = ( + 'gs://{project_name}-backup.clusterfuzz-external.appspot.com/corpus/' + 'libFuzzer/{fuzz_target}/') + +HTTPS_CORPUS_BACKUP_URL_FORMAT = ( + 'https://storage.googleapis.com/{project_name}-backup.clusterfuzz-external' + '.appspot.com/corpus/libFuzzer/{fuzz_target}/public.zip') + +LANGUAGE_REGEX = re.compile(r'[^\s]+') +PROJECT_LANGUAGE_REGEX = re.compile(r'\s*language\s*:\s*([^\s]+)') +BASE_OS_VERSION_REGEX = re.compile(r'\s*base_os_version\s*:\s*([^\s]+)') + +WORKDIR_REGEX = re.compile(r'\s*WORKDIR\s*([^\s]+)') + +# Regex to match special chars in project name. +SPECIAL_CHARS_REGEX = re.compile('[^a-zA-Z0-9_-]') + +LANGUAGE_TO_BASE_BUILDER_IMAGE = { + 'c': 'base-builder', + 'c++': 'base-builder', + 'go': 'base-builder-go', + 'javascript': 'base-builder-javascript', + 'jvm': 'base-builder-jvm', + 'python': 'base-builder-python', + 'ruby': 'base-builder-ruby', + 'rust': 'base-builder-rust', + 'swift': 'base-builder-swift' +} +ARM_BUILDER_NAME = 'oss-fuzz-buildx-builder' + +CLUSTERFUZZLITE_ENGINE = 'libfuzzer' +CLUSTERFUZZLITE_ARCHITECTURE = 'x86_64' +CLUSTERFUZZLITE_FILESTORE_DIR = 'filestore' +CLUSTERFUZZLITE_DOCKER_IMAGE = 'gcr.io/oss-fuzz-base/cifuzz-run-fuzzers' + +INDEXER_PREBUILT_URL = ('https://clusterfuzz-builds.storage.googleapis.com/' + 'oss-fuzz-artifacts/indexer') + +logger = logging.getLogger(__name__) + +if sys.version_info[0] >= 3: + raw_input = input # pylint: disable=invalid-name + +# pylint: disable=too-many-lines + + +class Project: + """Class representing a project that is in OSS-Fuzz or an external project + (ClusterFuzzLite user).""" + + def __init__( + self, + project_name_or_path, + is_external=False, + build_integration_path=constants.DEFAULT_EXTERNAL_BUILD_INTEGRATION_PATH): + self.is_external = is_external + if self.is_external: + self.path = os.path.abspath(project_name_or_path) + self.name = os.path.basename(self.path) + self.build_integration_path = os.path.join(self.path, + build_integration_path) + else: + self.name = project_name_or_path + self.path = os.path.join(OSS_FUZZ_DIR, 'projects', self.name) + self.build_integration_path = self.path + + @property + def dockerfile_path(self): + """Returns path to the project Dockerfile.""" + return os.path.join(self.build_integration_path, 'Dockerfile') + + @property + def language(self): + """Returns project language.""" + project_yaml_path = os.path.join(self.build_integration_path, + 'project.yaml') + if not os.path.exists(project_yaml_path): + logger.warning('No project.yaml. Assuming c++.') + return constants.DEFAULT_LANGUAGE + + with open(project_yaml_path) as file_handle: + content = file_handle.read() + for line in content.splitlines(): + match = PROJECT_LANGUAGE_REGEX.match(line) + if match: + return match.group(1) + + logger.warning('Language not specified in project.yaml. Assuming c++.') + return constants.DEFAULT_LANGUAGE + + @property + def base_os_version(self): + """Returns the project's base OS version.""" + project_yaml_path = os.path.join(self.build_integration_path, + 'project.yaml') + if not os.path.exists(project_yaml_path): + return 'legacy' + + with open(project_yaml_path) as file_handle: + content = file_handle.read() + for line in content.splitlines(): + match = BASE_OS_VERSION_REGEX.match(line) + if match: + return match.group(1) + + return 'legacy' + + @property + def coverage_extra_args(self): + """Returns project coverage extra args.""" + project_yaml_path = os.path.join(self.build_integration_path, + 'project.yaml') + if not os.path.exists(project_yaml_path): + logger.warning('project.yaml not found: %s.', project_yaml_path) + return '' + + with open(project_yaml_path) as file_handle: + content = file_handle.read() + + coverage_flags = '' + read_coverage_extra_args = False + # Pass the yaml file and extract the value of the coverage_extra_args key. + # This is naive yaml parsing and we do not handle comments at this point. + for line in content.splitlines(): + if read_coverage_extra_args: + # Break reading coverage args if a new yaml key is defined. + if len(line) > 0 and line[0] != ' ': + break + coverage_flags += line + if 'coverage_extra_args' in line: + read_coverage_extra_args = True + # Include the first line only if it's not a multi-line value. + if 'coverage_extra_args: >' not in line: + coverage_flags += line.replace('coverage_extra_args: ', '') + return coverage_flags + + @property + def out(self): + """Returns the out dir for the project. Creates it if needed.""" + return _get_out_dir(self.name) + + @property + def work(self): + """Returns the out dir for the project. Creates it if needed.""" + return _get_project_build_subdir(self.name, 'work') + + @property + def corpus(self): + """Returns the out dir for the project. Creates it if needed.""" + return _get_project_build_subdir(self.name, 'corpus') + + +def main(): # pylint: disable=too-many-branches,too-many-return-statements + """Gets subcommand from program arguments and does it. Returns 0 on success 1 + on error.""" + logging.basicConfig(level=logging.INFO) + parser = get_parser() + args = parse_args(parser) + + # Need to do this before chdir. + # TODO(https://github.com/google/oss-fuzz/issues/6758): Get rid of chdir. + if hasattr(args, 'testcase_path'): + args.testcase_path = _get_absolute_path(args.testcase_path) + # Note: this has to happen after parse_args above as parse_args needs to know + # the original CWD for external projects. + os.chdir(OSS_FUZZ_DIR) + if not os.path.exists(BUILD_DIR): + os.mkdir(BUILD_DIR) + + # We have different default values for `sanitizer` depending on the `engine`. + # Some commands do not have `sanitizer` argument, so `hasattr` is necessary. + if hasattr(args, 'sanitizer') and not args.sanitizer: + if args.project.language == 'javascript': + args.sanitizer = 'none' + else: + args.sanitizer = constants.DEFAULT_SANITIZER + + if args.command == 'generate': + result = generate(args) + elif args.command == 'build_image': + result = build_image(args) + elif args.command == 'build_fuzzers': + result = build_fuzzers(args) + elif args.command == 'fuzzbench_build_fuzzers': + result = fuzzbench_build_fuzzers(args) + elif args.command == 'fuzzbench_run_fuzzer': + result = fuzzbench_run_fuzzer(args) + elif args.command == 'fuzzbench_measure': + result = fuzzbench_measure(args) + elif args.command == 'check_build': + result = check_build(args) + elif args.command == 'download_corpora': + result = download_corpora(args) + elif args.command == 'run_fuzzer': + result = run_fuzzer(args) + elif args.command == 'coverage': + result = coverage(args) + elif args.command == 'introspector': + result = introspector(args) + elif args.command == 'reproduce': + result = reproduce(args) + elif args.command == 'shell': + result = shell(args) + elif args.command == 'pull_images': + result = pull_images() + elif args.command == 'index': + result = index(args) + elif args.command == 'run_clusterfuzzlite': + result = run_clusterfuzzlite(args) + else: + # Print help string if no arguments provided. + parser.print_help() + result = False + return bool_to_retcode(result) + + +def bool_to_retcode(boolean): + """Returns 0 if |boolean| is Truthy, 0 is the standard return code for a + successful process execution. Returns 1 otherwise, indicating the process + failed.""" + return 0 if boolean else 1 + + +def parse_args(parser, args=None): + """Parses |args| using |parser| and returns parsed args. Also changes + |args.build_integration_path| to have correct default behavior.""" + # Use default argument None for args so that in production, argparse does its + # normal behavior, but unittesting is easier. + parsed_args = parser.parse_args(args) + project = getattr(parsed_args, 'project', None) + if not project: + return parsed_args + + # Use hacky method for extracting attributes so that ShellTest works. + # TODO(metzman): Fix this. + is_external = getattr(parsed_args, 'external', False) + parsed_args.project = Project(parsed_args.project, is_external) + return parsed_args + + +def _add_external_project_args(parser): + parser.add_argument( + '--external', + help='Is project external?', + default=False, + action='store_true', + ) + + +def get_parser(): # pylint: disable=too-many-statements,too-many-locals + """Returns an argparse parser.""" + parser = argparse.ArgumentParser('helper.py', description='oss-fuzz helpers') + subparsers = parser.add_subparsers(dest='command') + + generate_parser = subparsers.add_parser( + 'generate', help='Generate files for new project.') + generate_parser.add_argument('project') + generate_parser.add_argument('--language', + default=constants.DEFAULT_LANGUAGE, + choices=LANGUAGE_TO_BASE_BUILDER_IMAGE.keys(), + help='Project language.') + _add_external_project_args(generate_parser) + + build_image_parser = subparsers.add_parser('build_image', + help='Build an image.') + build_image_parser.add_argument('project') + build_image_parser.add_argument('--pull', + action='store_true', + help='Pull latest base image.') + _add_architecture_args(build_image_parser) + build_image_parser.add_argument('--cache', + action='store_true', + default=False, + help='Use docker cache when building image.') + build_image_parser.add_argument('--no-pull', + action='store_true', + help='Do not pull latest base image.') + _add_external_project_args(build_image_parser) + + build_fuzzers_parser = subparsers.add_parser( + 'build_fuzzers', help='Build fuzzers for a project.') + _add_architecture_args(build_fuzzers_parser) + _add_engine_args(build_fuzzers_parser) + _add_sanitizer_args(build_fuzzers_parser) + _add_environment_args(build_fuzzers_parser) + _add_external_project_args(build_fuzzers_parser) + build_fuzzers_parser.add_argument('--docker-arg', + help='Additional docker argument to pass through ', + nargs='*', + action='extend') + build_fuzzers_parser.add_argument('project') + build_fuzzers_parser.add_argument('source_path', + help='path of local source', + nargs='?') + build_fuzzers_parser.add_argument('--mount_path', + dest='mount_path', + help='path to mount local source in ' + '(defaults to WORKDIR)') + build_fuzzers_parser.add_argument('--clean', + dest='clean', + action='store_true', + help='clean existing artifacts.') + build_fuzzers_parser.add_argument('--no-clean', + dest='clean', + action='store_false', + help='do not clean existing artifacts ' + '(default).') + build_fuzzers_parser.set_defaults(clean=False) + + fuzzbench_build_fuzzers_parser = subparsers.add_parser( + 'fuzzbench_build_fuzzers') + _add_architecture_args(fuzzbench_build_fuzzers_parser) + fuzzbench_build_fuzzers_parser.add_argument('--engine') + _add_sanitizer_args(fuzzbench_build_fuzzers_parser) + _add_environment_args(fuzzbench_build_fuzzers_parser) + _add_external_project_args(fuzzbench_build_fuzzers_parser) + fuzzbench_build_fuzzers_parser.add_argument('project') + check_build_parser = subparsers.add_parser( + 'check_build', help='Checks that fuzzers execute without errors.') + _add_architecture_args(check_build_parser) + _add_engine_args(check_build_parser, choices=constants.ENGINES) + _add_sanitizer_args(check_build_parser, choices=constants.SANITIZERS) + _add_environment_args(check_build_parser) + _add_base_image_tag_args(check_build_parser) + check_build_parser.add_argument('project', + help='name of the project or path (external)') + check_build_parser.add_argument('fuzzer_name', + help='name of the fuzzer', + nargs='?') + _add_external_project_args(check_build_parser) + index_parser = subparsers.add_parser('index', help='Index project.') + index_parser.add_argument( + '--targets', help='Allowlist of targets to index (comma-separated).') + index_parser.add_argument('--dev', + action='store_true', + help=('Use development versions of scripts and ' + 'indexer.')) + index_parser.add_argument('--shell', + action='store_true', + help='Run /bin/bash instead of the indexer.') + index_parser.add_argument('--docker_arg', + help='Additional docker argument to pass through ' + '(can be specified multiple times).', + nargs='*', + action='extend') + index_parser.add_argument('project', help='Project') + index_parser.add_argument( + 'extra_args', + nargs='*', + help='Additional args to pass through to the Docker entrypoint.') + _add_architecture_args(index_parser) + _add_environment_args(index_parser) + + run_fuzzer_parser = subparsers.add_parser( + 'run_fuzzer', help='Run a fuzzer in the emulated fuzzing environment.') + _add_architecture_args(run_fuzzer_parser) + _add_engine_args(run_fuzzer_parser) + _add_sanitizer_args(run_fuzzer_parser) + _add_environment_args(run_fuzzer_parser) + _add_base_image_tag_args(run_fuzzer_parser) + _add_external_project_args(run_fuzzer_parser) + run_fuzzer_parser.add_argument( + '--corpus-dir', help='directory to store corpus for the fuzz target') + run_fuzzer_parser.add_argument('project', + help='name of the project or path (external)') + run_fuzzer_parser.add_argument('fuzzer_name', help='name of the fuzzer') + run_fuzzer_parser.add_argument('fuzzer_args', + help='arguments to pass to the fuzzer', + nargs='*') + + fuzzbench_run_fuzzer_parser = subparsers.add_parser('fuzzbench_run_fuzzer') + _add_architecture_args(fuzzbench_run_fuzzer_parser) + fuzzbench_run_fuzzer_parser.add_argument('--engine') + _add_sanitizer_args(fuzzbench_run_fuzzer_parser) + _add_environment_args(fuzzbench_run_fuzzer_parser) + _add_external_project_args(fuzzbench_run_fuzzer_parser) + fuzzbench_run_fuzzer_parser.add_argument( + '--corpus-dir', help='directory to store corpus for the fuzz target') + fuzzbench_run_fuzzer_parser.add_argument( + 'project', help='name of the project or path (external)') + fuzzbench_run_fuzzer_parser.add_argument('fuzzer_name', + help='name of the fuzzer') + fuzzbench_run_fuzzer_parser.add_argument( + 'fuzzer_args', help='arguments to pass to the fuzzer', nargs='*') + + fuzzbench_measure_parser = subparsers.add_parser('fuzzbench_measure') + fuzzbench_measure_parser.add_argument( + 'project', help='name of the project or path (external)') + fuzzbench_measure_parser.add_argument('engine_name', + help='name of the fuzzer') + fuzzbench_measure_parser.add_argument('fuzz_target_name', + help='name of the fuzzer') + + coverage_parser = subparsers.add_parser( + 'coverage', help='Generate code coverage report for the project.') + coverage_parser.add_argument('--no-corpus-download', + action='store_true', + help='do not download corpus backup from ' + 'OSS-Fuzz; use corpus located in ' + 'build/corpus///') + coverage_parser.add_argument('--no-serve', + action='store_true', + help='do not serve a local HTTP server.') + coverage_parser.add_argument('--port', + default='8008', + help='specify port for' + ' a local HTTP server rendering coverage report') + coverage_parser.add_argument('--fuzz-target', + help='specify name of a fuzz ' + 'target to be run for generating coverage ' + 'report') + coverage_parser.add_argument('--corpus-dir', + help='specify location of corpus' + ' to be used (requires --fuzz-target argument)') + coverage_parser.add_argument('--public', + action='store_true', + help='if set, will download public ' + 'corpus using wget') + coverage_parser.add_argument('project', + help='name of the project or path (external)') + coverage_parser.add_argument('extra_args', + help='additional arguments to ' + 'pass to llvm-cov utility.', + nargs='*') + _add_external_project_args(coverage_parser) + _add_architecture_args(coverage_parser) + _add_base_image_tag_args(coverage_parser) + + introspector_parser = subparsers.add_parser( + 'introspector', + help='Run a complete end-to-end run of ' + 'fuzz introspector. This involves (1) ' + 'building the fuzzers with ASAN; (2) ' + 'running all fuzzers; (3) building ' + 'fuzzers with coverge; (4) extracting ' + 'coverage; (5) building fuzzers using ' + 'introspector') + introspector_parser.add_argument('project', help='name of the project') + introspector_parser.add_argument('--seconds', + help='number of seconds to run fuzzers', + default=10) + introspector_parser.add_argument('source_path', + help='path of local source', + nargs='?') + introspector_parser.add_argument( + '--public-corpora', + help='if specified, will use public corpora for code coverage', + default=False, + action='store_true') + introspector_parser.add_argument( + '--private-corpora', + help='if specified, will use private corpora', + default=False, + action='store_true') + introspector_parser.add_argument( + '--coverage-only', + action='store_true', + help='if specified, will only collect coverage.') + + download_corpora_parser = subparsers.add_parser( + 'download_corpora', help='Download all corpora for a project.') + download_corpora_parser.add_argument('--fuzz-target', + nargs='+', + help='specify name of a fuzz target') + download_corpora_parser.add_argument('--public', + action='store_true', + help='if set, will download public ' + 'corpus using wget') + download_corpora_parser.add_argument( + 'project', help='name of the project or path (external)') + + reproduce_parser = subparsers.add_parser('reproduce', + help='Reproduce a crash.') + reproduce_parser.add_argument('--valgrind', + action='store_true', + help='run with valgrind') + reproduce_parser.add_argument('project', + help='name of the project or path (external)') + reproduce_parser.add_argument('fuzzer_name', help='name of the fuzzer') + reproduce_parser.add_argument('testcase_path', help='path of local testcase') + reproduce_parser.add_argument('fuzzer_args', + help='arguments to pass to the fuzzer', + nargs='*') + _add_environment_args(reproduce_parser) + _add_external_project_args(reproduce_parser) + _add_architecture_args(reproduce_parser) + _add_base_image_tag_args(reproduce_parser) + + shell_parser = subparsers.add_parser( + 'shell', help='Run /bin/bash within the builder container.') + shell_parser.add_argument('project', + help='name of the project or path (external)') + shell_parser.add_argument('source_path', + help='path of local source', + nargs='?') + _add_architecture_args(shell_parser) + _add_engine_args(shell_parser) + _add_sanitizer_args(shell_parser) + _add_environment_args(shell_parser) + _add_external_project_args(shell_parser) + _add_base_image_tag_args(shell_parser) + + run_clusterfuzzlite_parser = subparsers.add_parser( + 'run_clusterfuzzlite', help='Run ClusterFuzzLite on a project.') + _add_sanitizer_args(run_clusterfuzzlite_parser) + _add_environment_args(run_clusterfuzzlite_parser) + run_clusterfuzzlite_parser.add_argument('project') + run_clusterfuzzlite_parser.add_argument('--clean', + dest='clean', + action='store_true', + help='clean existing artifacts.') + run_clusterfuzzlite_parser.add_argument( + '--no-clean', + dest='clean', + action='store_false', + help='do not clean existing artifacts ' + '(default).') + run_clusterfuzzlite_parser.add_argument('--branch', + default='master', + required=True) + _add_external_project_args(run_clusterfuzzlite_parser) + run_clusterfuzzlite_parser.set_defaults(clean=False) + + subparsers.add_parser('pull_images', help='Pull base images.') + return parser + + +def is_base_image(image_name): + """Checks if the image name is a base image.""" + return os.path.exists(os.path.join('infra', 'base-images', image_name)) + + +def check_project_exists(project): + """Checks if a project exists.""" + if os.path.exists(project.path): + return True + + if project.is_external: + descriptive_project_name = project.path + else: + descriptive_project_name = project.name + + logger.error('"%s" does not exist.', descriptive_project_name) + return False + + +def _check_fuzzer_exists(project, fuzzer_name, args, architecture='x86_64'): + """Checks if a fuzzer exists.""" + platform = 'linux/arm64' if architecture == 'aarch64' else 'linux/amd64' + command = ['docker', 'run', '--rm', '--platform', platform] + command.extend(['-v', '%s:/out' % project.out]) + command.append(_get_base_runner_image(args)) + + command.extend(['/bin/bash', '-c', 'test -f /out/%s' % fuzzer_name]) + + try: + subprocess.check_call(command) + except subprocess.CalledProcessError: + logger.error('%s does not seem to exist. Please run build_fuzzers first.', + fuzzer_name) + return False + + return True + + +def _normalized_name(name): + """Return normalized name with special chars like slash, colon, etc normalized + to hyphen(-). This is important as otherwise these chars break local and cloud + storage paths.""" + return SPECIAL_CHARS_REGEX.sub('-', name).strip('-') + + +def _get_absolute_path(path): + """Returns absolute path with user expansion.""" + return os.path.abspath(os.path.expanduser(path)) + + +def _get_command_string(command): + """Returns a shell escaped command string.""" + return ' '.join(shlex.quote(part) for part in command) + + +def _get_project_build_subdir(project, subdir_name): + """Creates the |subdir_name| subdirectory of the |project| subdirectory in + |BUILD_DIR| and returns its path.""" + directory = os.path.join(BUILD_DIR, subdir_name, project) + os.makedirs(directory, exist_ok=True) + + return directory + + +def _get_out_dir(project=''): + """Creates and returns path to /out directory for the given project (if + specified).""" + return _get_project_build_subdir(project, 'out') + + +def _add_architecture_args(parser, choices=None): + """Adds common architecture args.""" + if choices is None: + choices = constants.ARCHITECTURES + parser.add_argument('--architecture', + default=constants.DEFAULT_ARCHITECTURE, + choices=choices) + + +def _add_engine_args(parser, choices=None): + """Adds common engine args.""" + if choices is None: + choices = constants.ENGINES + parser.add_argument('--engine', + default=constants.DEFAULT_ENGINE, + choices=choices) + + +def _add_sanitizer_args(parser, choices=None): + """Adds common sanitizer args.""" + if choices is None: + choices = constants.SANITIZERS + parser.add_argument('--sanitizer', + default=None, + choices=choices, + help='the default is "address"') + + +def _add_environment_args(parser): + """Adds common environment args.""" + parser.add_argument('-e', + action='append', + help="set environment variable e.g. VAR=value") + + +def _add_base_image_tag_args(parser): + """Adds base image tag arg.""" + parser.add_argument('--base-image-tag', + help='The tag of the base-runner image to use.') + + +def build_image_impl(project, cache=True, pull=False, architecture='x86_64'): + """Builds image.""" + image_name = project.name + + if is_base_image(image_name): + image_project = 'oss-fuzz-base' + docker_build_dir = os.path.join(OSS_FUZZ_DIR, 'infra', 'base-images', + image_name) + dockerfile_path = os.path.join(docker_build_dir, 'Dockerfile') + else: + if not check_project_exists(project): + return False + dockerfile_path = project.dockerfile_path + docker_build_dir = project.path + image_project = 'oss-fuzz' + + if pull and not pull_images(project.language): + return False + + build_args = [] + image_name = 'gcr.io/%s/%s' % (image_project, image_name) + if architecture == 'aarch64': + build_args += [ + 'buildx', + 'build', + '--platform', + 'linux/arm64', + '--progress', + 'plain', + '--load', + ] + if not cache: + build_args.append('--no-cache') + + build_args += ['-t', image_name, '--file', dockerfile_path] + build_args.append(docker_build_dir) + + if architecture == 'aarch64': + command = ['docker'] + build_args + subprocess.check_call(command) + return True + return docker_build(build_args) + + +def _env_to_docker_args(env_list): + """Turns envirnoment variable list into docker arguments.""" + return sum([['-e', v] for v in env_list], []) + + +def workdir_from_lines(lines, default='/src'): + """Gets the WORKDIR from the given lines.""" + for line in reversed(lines): # reversed to get last WORKDIR. + match = re.match(WORKDIR_REGEX, line) + if match: + workdir = match.group(1) + workdir = workdir.replace('$SRC', '/src') + + if not os.path.isabs(workdir): + workdir = os.path.join('/src', workdir) + + return os.path.normpath(workdir) + + return default + + +def _workdir_from_dockerfile(project): + """Parses WORKDIR from the Dockerfile for the given project.""" + with open(project.dockerfile_path) as file_handle: + lines = file_handle.readlines() + + return workdir_from_lines(lines, default=os.path.join('/src', project.name)) + + +def prepare_aarch64_emulation(): + """Run some necessary commands to use buildx to build AArch64 targets using + QEMU emulation on an x86_64 host.""" + subprocess.check_call( + ['docker', 'buildx', 'create', '--name', ARM_BUILDER_NAME]) + subprocess.check_call(['docker', 'buildx', 'use', ARM_BUILDER_NAME]) + + +def docker_run(run_args, *, print_output=True, architecture='x86_64'): + """Calls `docker run`.""" + platform = 'linux/arm64' if architecture == 'aarch64' else 'linux/amd64' + command = [ + 'docker', 'run', '--privileged', '--shm-size=2g', '--platform', platform + ] + if os.getenv('OSS_FUZZ_SAVE_CONTAINERS_NAME'): + command.append('--name') + command.append(os.getenv('OSS_FUZZ_SAVE_CONTAINERS_NAME')) + else: + command.append('--rm') + + # Support environments with a TTY. + if sys.stdin.isatty(): + command.append('-i') + + command.extend(run_args) + + logger.info('Running: %s.', _get_command_string(command)) + stdout = None + if not print_output: + stdout = open(os.devnull, 'w') + + try: + subprocess.check_call(command, stdout=stdout, stderr=subprocess.STDOUT) + except subprocess.CalledProcessError: + return False + + return True + + +def docker_build(build_args): + """Calls `docker build`.""" + command = ['docker', 'build'] + command.extend(build_args) + logger.info('Running: %s.', _get_command_string(command)) + + try: + subprocess.check_call(command) + except subprocess.CalledProcessError: + logger.error('Docker build failed.') + return False + + return True + + +def docker_pull(image): + """Call `docker pull`.""" + command = ['docker', 'pull', image] + logger.info('Running: %s', _get_command_string(command)) + + try: + subprocess.check_call(command) + except subprocess.CalledProcessError: + logger.error('Docker pull failed.') + return False + + return True + + +def build_image(args): + """Builds docker image.""" + if args.pull and args.no_pull: + logger.error('Incompatible arguments --pull and --no-pull.') + return False + + if args.pull: + pull = True + elif args.no_pull: + pull = False + else: + y_or_n = raw_input('Pull latest base images (compiler/runtime)? (y/N): ') + pull = y_or_n.lower() == 'y' + + if pull: + logger.info('Pulling latest base images...') + else: + logger.info('Using cached base images...') + + # If build_image is called explicitly, don't use cache. + if build_image_impl(args.project, + cache=args.cache, + pull=pull, + architecture=args.architecture): + return True + + return False + + +def build_fuzzers_impl( # pylint: disable=too-many-arguments,too-many-locals,too-many-branches + project, + clean, + engine, + sanitizer, + architecture, + env_to_add, + source_path, + mount_path=None, + child_dir='', + build_project_image=True, + docker_args=None): + """Builds fuzzers.""" + if build_project_image and not build_image_impl(project, + architecture=architecture): + return False + + project_out = os.path.join(project.out, child_dir) + if clean: + logger.info('Cleaning existing build artifacts.') + + # Clean old and possibly conflicting artifacts in project's out directory. + docker_run([ + '-v', f'{project_out}:/out', '-t', f'gcr.io/oss-fuzz/{project.name}', + '/bin/bash', '-c', 'rm -rf /out/*' + ], + architecture=architecture) + + docker_run([ + '-v', + '%s:/work' % project.work, '-t', + 'gcr.io/oss-fuzz/%s' % project.name, '/bin/bash', '-c', 'rm -rf /work/*' + ], + architecture=architecture) + + else: + logger.info('Keeping existing build artifacts as-is (if any).') + env = [ + 'FUZZING_ENGINE=' + engine, + 'SANITIZER=' + sanitizer, + 'ARCHITECTURE=' + architecture, + 'PROJECT_NAME=' + project.name, + 'HELPER=True', + ] + + _add_oss_fuzz_ci_if_needed(env) + + if project.language: + env.append('FUZZING_LANGUAGE=' + project.language) + + if env_to_add: + env += env_to_add + + command = _env_to_docker_args(env) + if source_path: + workdir = _workdir_from_dockerfile(project) + if mount_path: + command += [ + '-v', + '%s:%s' % (_get_absolute_path(source_path), mount_path), + ] + else: + if workdir == '/src': + logger.error('Cannot use local checkout with "WORKDIR: /src".') + return False + + command += [ + '-v', + '%s:%s' % (_get_absolute_path(source_path), workdir), + ] + + if docker_args: + command += docker_args + + command += [ + '-v', f'{project_out}:/out', '-v', f'{project.work}:/work', + f'gcr.io/oss-fuzz/{project.name}' + ] + if sys.stdin.isatty(): + command.insert(-1, '-t') + + result = docker_run(command, architecture=architecture) + if not result: + logger.error('Building fuzzers failed.') + return False + + return True + + +def run_clusterfuzzlite(args): + """Runs ClusterFuzzLite on a local repo.""" + if not os.path.exists(CLUSTERFUZZLITE_FILESTORE_DIR): + os.mkdir(CLUSTERFUZZLITE_FILESTORE_DIR) + + try: + with tempfile.TemporaryDirectory() as workspace: + + if args.external: + project_src_path = os.path.join(workspace, args.project.name) + shutil.copytree(args.project.path, project_src_path) + + build_command = [ + '--tag', 'gcr.io/oss-fuzz-base/cifuzz-run-fuzzers', '--file', + 'infra/run_fuzzers.Dockerfile', 'infra' + ] + if not docker_build(build_command): + return False + filestore_path = os.path.abspath(CLUSTERFUZZLITE_FILESTORE_DIR) + docker_run_command = [] + if args.external: + docker_run_command += [ + '-e', + f'PROJECT_SRC_PATH={project_src_path}', + ] + else: + docker_run_command += [ + '-e', + f'OSS_FUZZ_PROJECT_NAME={args.project.name}', + ] + docker_run_command += [ + '-v', + f'{filestore_path}:{filestore_path}', + '-v', + f'{workspace}:{workspace}', + '-e', + f'FILESTORE_ROOT_DIR={filestore_path}', + '-e', + f'WORKSPACE={workspace}', + '-e', + f'REPOSITORY={args.project.name}', + '-e', + 'CFL_PLATFORM=standalone', + '--entrypoint', + '', + '-v', + '/var/run/docker.sock:/var/run/docker.sock', + CLUSTERFUZZLITE_DOCKER_IMAGE, + 'python3', + '/opt/oss-fuzz/infra/cifuzz/cifuzz_combined_entrypoint.py', + ] + return docker_run(docker_run_command) + + except PermissionError as error: + logger.error('PermissionError: %s.', error) + # Tempfile can't delete the workspace because of a permissions issue. This + # is because docker creates files in the workspace that are owned by root + # but this process is probably being run as another user. Use a docker image + # to delete the temp directory (workspace) so that we have permission. + docker_run([ + '-v', f'{workspace}:{workspace}', '--entrypoint', '', + CLUSTERFUZZLITE_DOCKER_IMAGE, 'rm', '-rf', + os.path.join(workspace, '*') + ]) + return False + + +def build_fuzzers(args): + """Builds fuzzers.""" + if args.engine == 'centipede' and args.sanitizer != 'none': + # Centipede always requires separate binaries for sanitizers: + # An unsanitized binary, which Centipede requires for fuzzing. + # A sanitized binary, placed in the child directory. + sanitized_binary_directories = ( + ('none', ''), + (args.sanitizer, f'__centipede_{args.sanitizer}'), + ) + else: + # Generally, a fuzzer only needs one sanitized binary in the default dir. + sanitized_binary_directories = ((args.sanitizer, ''),) + return all( + build_fuzzers_impl(args.project, + args.clean, + args.engine, + sanitizer, + args.architecture, + args.e, + args.source_path, + mount_path=args.mount_path, + child_dir=child_dir, + docker_args=args.docker_arg) + for sanitizer, child_dir in sanitized_binary_directories) + + +def fuzzbench_build_fuzzers(args): + """Builds fuzz targets with an arbitrary fuzzer from FuzzBench.""" + with tempfile.TemporaryDirectory() as tmp_dir: + tmp_dir = os.path.abspath(tmp_dir) + fuzzbench_path = os.path.join(tmp_dir, 'fuzzbench') + subprocess.run([ + 'git', 'clone', 'https://github.com/google/fuzzbench', '--depth', '1', + fuzzbench_path + ], + check=True) + env = [ + f'FUZZBENCH_PATH={fuzzbench_path}', 'OSS_FUZZ_ON_DEMAND=1', + f'PROJECT={args.project.name}' + ] + tag = f'gcr.io/oss-fuzz/{args.project.name}' + subprocess.run([ + 'docker', 'tag', 'gcr.io/oss-fuzz-base/base-builder-fuzzbench', + 'gcr.io/oss-fuzz-base/base-builder' + ], + check=True) + build_image_impl(args.project) + assert docker_build([ + '--tag', tag, '--build-arg', f'parent_image={tag}', '--file', + os.path.join(fuzzbench_path, 'fuzzers', args.engine, + 'builder.Dockerfile'), + os.path.join(fuzzbench_path, 'fuzzers', args.engine) + ]) + + return build_fuzzers_impl(args.project, + False, + args.engine, + args.sanitizer, + args.architecture, + env, + source_path=fuzzbench_path, + mount_path=fuzzbench_path, + build_project_image=False) + + +def _add_oss_fuzz_ci_if_needed(env): + """Adds value of |OSS_FUZZ_CI| environment variable to |env| if it is set.""" + oss_fuzz_ci = os.getenv('OSS_FUZZ_CI') + if oss_fuzz_ci: + env.append('OSS_FUZZ_CI=' + oss_fuzz_ci) + + +def check_build(args): + """Checks that fuzzers in the container execute without errors.""" + # Access the property to trigger validation early. + _ = args.project.base_os_version + if not check_project_exists(args.project): + return False + + if (args.fuzzer_name and not _check_fuzzer_exists( + args.project, args.fuzzer_name, args, args.architecture)): + return False + + env = [ + 'FUZZING_ENGINE=' + args.engine, + 'SANITIZER=' + args.sanitizer, + 'ARCHITECTURE=' + args.architecture, + 'FUZZING_LANGUAGE=' + args.project.language, + 'HELPER=True', + ] + _add_oss_fuzz_ci_if_needed(env) + if args.e: + env += args.e + + run_args = _env_to_docker_args(env) + [ + '-v', f'{args.project.out}:/out', '-t', + _get_base_runner_image(args) + ] + + if args.fuzzer_name: + run_args += ['test_one.py', args.fuzzer_name] + else: + run_args.append('test_all.py') + + result = docker_run(run_args, architecture=args.architecture) + if result: + logger.info('Check build passed.') + else: + logger.error('Check build failed.') + + return result + + +def _get_fuzz_targets(project): + """Returns names of fuzz targest build in the project's /out directory.""" + fuzz_targets = [] + for name in os.listdir(project.out): + if name.startswith('afl-'): + continue + if name == 'centipede': + continue + if name.startswith('jazzer_'): + continue + if name == 'llvm-symbolizer': + continue + + path = os.path.join(project.out, name) + # Python and JVM fuzz targets are only executable for the root user, so + # we can't use os.access. + if os.path.isfile(path) and (os.stat(path).st_mode & 0o111): + fuzz_targets.append(name) + + return fuzz_targets + + +def _get_latest_corpus(project, fuzz_target, base_corpus_dir): + """Downloads the latest corpus for the given fuzz target.""" + corpus_dir = os.path.join(base_corpus_dir, fuzz_target) + os.makedirs(corpus_dir, exist_ok=True) + + if not fuzz_target.startswith(project.name + '_'): + fuzz_target = '%s_%s' % (project.name, fuzz_target) + + # Normalise fuzz target name. + fuzz_target = _normalized_name(fuzz_target) + + corpus_backup_url = CORPUS_BACKUP_URL_FORMAT.format(project_name=project.name, + fuzz_target=fuzz_target) + command = ['gsutil', 'ls', corpus_backup_url] + + # Don't capture stderr. We want it to print in real time, in case gsutil is + # asking for two-factor authentication. + corpus_listing = subprocess.Popen(command, stdout=subprocess.PIPE) + output, _ = corpus_listing.communicate() + + # Some fuzz targets (e.g. new ones) may not have corpus yet, just skip those. + if corpus_listing.returncode: + logger.warning('Corpus for %s not found:\n', fuzz_target) + return + + if output: + latest_backup_url = output.splitlines()[-1] + archive_path = corpus_dir + '.zip' + command = ['gsutil', '-q', 'cp', latest_backup_url, archive_path] + subprocess.check_call(command) + + command = ['unzip', '-q', '-o', archive_path, '-d', corpus_dir] + subprocess.check_call(command) + os.remove(archive_path) + else: + # Sync the working corpus copy if a minimized backup is not available. + corpus_url = CORPUS_URL_FORMAT.format(project_name=project.name, + fuzz_target=fuzz_target) + command = ['gsutil', '-m', '-q', 'rsync', '-R', corpus_url, corpus_dir] + subprocess.check_call(command) + + +def _get_latest_public_corpus(args, fuzzer): + """Downloads the public corpus""" + target_corpus_dir = "build/corpus/%s" % args.project.name + if not os.path.isdir(target_corpus_dir): + os.makedirs(target_corpus_dir) + + target_zip = os.path.join(target_corpus_dir, fuzzer + ".zip") + + project_qualified_fuzz_target_name = fuzzer + qualified_name_prefix = args.project.name + '_' + if not fuzzer.startswith(qualified_name_prefix): + project_qualified_fuzz_target_name = qualified_name_prefix + fuzzer + + download_url = HTTPS_CORPUS_BACKUP_URL_FORMAT.format( + project_name=args.project.name, + fuzz_target=project_qualified_fuzz_target_name) + + cmd = ['wget', download_url, '-O', target_zip] + try: + with open(os.devnull, 'w') as stdout: + subprocess.check_call(cmd, stdout=stdout) + except OSError: + logger.error('Failed to download corpus') + + target_fuzzer_dir = os.path.join(target_corpus_dir, fuzzer) + if not os.path.isdir(target_fuzzer_dir): + os.mkdir(target_fuzzer_dir) + + target_corpus_dir = os.path.join(target_corpus_dir, fuzzer) + try: + with open(os.devnull, 'w') as stdout: + subprocess.check_call( + ['unzip', '-q', '-o', target_zip, '-d', target_fuzzer_dir], + stdout=stdout) + except OSError: + logger.error('Failed to unzip corpus') + + # Remove the downloaded zip + os.remove(target_zip) + return True + + +def download_corpora(args): + """Downloads most recent corpora from GCS for the given project.""" + if not check_project_exists(args.project): + return False + + if args.public: + logger.info("Downloading public corpus") + try: + with open(os.devnull, 'w') as stdout: + subprocess.check_call(['wget', '--version'], stdout=stdout) + except OSError: + logger.error('wget not found') + return False + else: + try: + with open(os.devnull, 'w') as stdout: + subprocess.check_call(['gsutil', '--version'], stdout=stdout) + except OSError: + logger.error('gsutil not found. Please install it from ' + 'https://cloud.google.com/storage/docs/gsutil_install') + return False + + if args.fuzz_target: + fuzz_targets = args.fuzz_target + else: + fuzz_targets = _get_fuzz_targets(args.project) + + if not fuzz_targets: + logger.error( + 'Fuzz targets not found. Please build project first ' + '(python3 infra/helper.py build_fuzzers %s) so that download_corpora ' + 'can automatically identify targets.', args.project.name) + return False + + corpus_dir = args.project.corpus + + def _download_for_single_target(fuzz_target): + try: + if args.public: + _get_latest_public_corpus(args, fuzz_target) + else: + _get_latest_corpus(args.project, fuzz_target, corpus_dir) + return True + except Exception as error: # pylint:disable=broad-except + logger.error('Corpus download for %s failed: %s.', fuzz_target, + str(error)) + return False + + logger.info('Downloading corpora for %s project to %s.', args.project.name, + corpus_dir) + thread_pool = ThreadPool() + return all(thread_pool.map(_download_for_single_target, fuzz_targets)) + + +def coverage(args): # pylint: disable=too-many-branches + """Generates code coverage using clang source based code coverage.""" + if args.corpus_dir and not args.fuzz_target: + logger.error( + '--corpus-dir requires specifying a particular fuzz target using ' + '--fuzz-target') + return False + + if not check_project_exists(args.project): + return False + + if args.project.language not in constants.LANGUAGES_WITH_COVERAGE_SUPPORT: + logger.error( + 'Project is written in %s, coverage for it is not supported yet.', + args.project.language) + return False + + if (not args.no_corpus_download and not args.corpus_dir and + not args.project.is_external): + if not download_corpora(args): + return False + + extra_cov_args = ( + f'{args.project.coverage_extra_args.strip()} {" ".join(args.extra_args)}') + env = [ + 'FUZZING_ENGINE=libfuzzer', + 'HELPER=True', + 'FUZZING_LANGUAGE=%s' % args.project.language, + 'PROJECT=%s' % args.project.name, + 'SANITIZER=coverage', + 'COVERAGE_EXTRA_ARGS=%s' % extra_cov_args, + 'ARCHITECTURE=' + args.architecture, + ] + + if not args.no_serve: + env.append(f'HTTP_PORT={args.port}') + + run_args = _env_to_docker_args(env) + + if args.port: + run_args.extend([ + '-p', + '%s:%s' % (args.port, args.port), + ]) + + if args.corpus_dir: + if not os.path.exists(args.corpus_dir): + logger.error('The path provided in --corpus-dir argument does not ' + 'exist.') + return False + corpus_dir = os.path.realpath(args.corpus_dir) + run_args.extend(['-v', '%s:/corpus/%s' % (corpus_dir, args.fuzz_target)]) + else: + run_args.extend(['-v', '%s:/corpus' % args.project.corpus]) + + run_args.extend([ + '-v', + '%s:/out' % args.project.out, + '-t', + _get_base_runner_image(args), + ]) + + run_args.append('coverage') + if args.fuzz_target: + run_args.append(args.fuzz_target) + + result = docker_run(run_args, architecture=args.architecture) + if result: + logger.info('Successfully generated clang code coverage report.') + else: + logger.error('Failed to generate clang code coverage report.') + + return result + + +def _introspector_prepare_corpus(args): + """Helper function for introspector runs to generate corpora.""" + parser = get_parser() + # Generate corpus, either by downloading or running fuzzers. + if args.private_corpora or args.public_corpora: + corpora_command = ['download_corpora'] + if args.public_corpora: + corpora_command.append('--public') + corpora_command.append(args.project.name) + if not download_corpora(parse_args(parser, corpora_command)): + logger.error('Failed to download corpora') + return False + else: + fuzzer_targets = _get_fuzz_targets(args.project) + for fuzzer_name in fuzzer_targets: + # Make a corpus directory. + fuzzer_corpus_dir = args.project.corpus + f'/{fuzzer_name}' + if not os.path.isdir(fuzzer_corpus_dir): + os.makedirs(fuzzer_corpus_dir) + run_fuzzer_command = [ + 'run_fuzzer', '--sanitizer', 'address', '--corpus-dir', + fuzzer_corpus_dir, args.project.name, fuzzer_name + ] + + parsed_args = parse_args(parser, run_fuzzer_command) + parsed_args.fuzzer_args = [ + f'-max_total_time={args.seconds}', '-detect_leaks=0' + ] + # Continue even if run command fails, because we do not have 100% + # accuracy in fuzz target detection, i.e. we might try to run something + # that is not a target. + run_fuzzer(parsed_args) + return True + + +def introspector(args): + """Runs a complete end-to-end run of introspector.""" + parser = get_parser() + + args_to_append = [] + if args.source_path: + args_to_append.append(_get_absolute_path(args.source_path)) + + # Build fuzzers with ASAN. + build_fuzzers_command = [ + 'build_fuzzers', '--sanitizer=address', args.project.name + ] + args_to_append + if not build_fuzzers(parse_args(parser, build_fuzzers_command)): + logger.error('Failed to build project with ASAN') + return False + + if not _introspector_prepare_corpus(args): + return False + + # Build code coverage. + build_fuzzers_command = [ + 'build_fuzzers', '--sanitizer=coverage', args.project.name + ] + args_to_append + if not build_fuzzers(parse_args(parser, build_fuzzers_command)): + logger.error('Failed to build project with coverage instrumentation') + return False + + # Collect coverage. + coverage_command = [ + 'coverage', '--no-corpus-download', '--port', '', args.project.name + ] + if not coverage(parse_args(parser, coverage_command)): + logger.error('Failed to extract coverage') + return False + + logger.info('Coverage collected for %s', args.project.name) + if args.coverage_only: + logger.info('Coverage-only enabled, finishing now.') + return True + + # Build introspector. + build_fuzzers_command = [ + 'build_fuzzers', '--sanitizer=introspector', args.project.name + ] + args_to_append + if not build_fuzzers(parse_args(parser, build_fuzzers_command)): + logger.error('Failed to build project with introspector') + return False + + introspector_dst = os.path.join(args.project.out, + "introspector-report/inspector") + shutil.rmtree(introspector_dst, ignore_errors=True) + shutil.copytree(os.path.join(args.project.out, "inspector"), introspector_dst) + + # Copy the coverage reports into the introspector report. + dst_cov_report = os.path.join(introspector_dst, "covreport") + shutil.copytree(os.path.join(args.project.out, "report"), dst_cov_report) + + # Copy per-target coverage reports + src_target_cov_report = os.path.join(args.project.out, "report_target") + for target_cov_dir in os.listdir(src_target_cov_report): + dst_target_cov_report = os.path.join(dst_cov_report, target_cov_dir) + shutil.copytree(os.path.join(src_target_cov_report, target_cov_dir), + dst_target_cov_report) + + logger.info('Introspector run complete. Report in %s', introspector_dst) + logger.info( + 'To browse the report, run: `python3 -m http.server 8008 --directory %s`' + 'and navigate to localhost:8008/fuzz_report.html in your browser', + introspector_dst) + return True + + +def run_fuzzer(args): + """Runs a fuzzer in the container.""" + if not check_project_exists(args.project): + return False + + if not _check_fuzzer_exists(args.project, args.fuzzer_name, args, + args.architecture): + return False + + env = [ + 'FUZZING_ENGINE=' + args.engine, + 'SANITIZER=' + args.sanitizer, + 'RUN_FUZZER_MODE=interactive', + 'HELPER=True', + ] + + if args.e: + env += args.e + + run_args = _env_to_docker_args(env) + + if args.corpus_dir: + if not os.path.exists(args.corpus_dir): + logger.error('The path provided in --corpus-dir argument does not exist') + return False + corpus_dir = os.path.realpath(args.corpus_dir) + run_args.extend([ + '-v', + '{corpus_dir}:/tmp/{fuzzer}_corpus'.format(corpus_dir=corpus_dir, + fuzzer=args.fuzzer_name) + ]) + + run_args.extend([ + '-v', + '%s:/out' % args.project.out, + '-t', + _get_base_runner_image(args), + 'run_fuzzer', + args.fuzzer_name, + ] + args.fuzzer_args) + + return docker_run(run_args, architecture=args.architecture) + + +def fuzzbench_run_fuzzer(args): + """Runs a fuzz target built by fuzzbench in the container.""" + if not check_project_exists(args.project): + return False + + env = [ + 'FUZZING_ENGINE=' + args.engine, + 'SANITIZER=' + args.sanitizer, + 'RUN_FUZZER_MODE=interactive', + 'HELPER=True', + f'FUZZ_TARGET={args.fuzzer_name}', + f'BENCHMARK={args.project.name}', + 'TRIAL_ID=1', + 'EXPERIMENT_TYPE=bug', + ] + + if args.e: + env += args.e + + run_args = _env_to_docker_args(env) + + if args.corpus_dir: + if not os.path.exists(args.corpus_dir): + logger.error('The path provided in --corpus-dir argument does not exist') + return False + corpus_dir = os.path.realpath(args.corpus_dir) + run_args.extend([ + '-v', + '{corpus_dir}:/tmp/{fuzzer}_corpus'.format(corpus_dir=corpus_dir, + fuzzer=args.fuzzer_name) + ]) + + with tempfile.TemporaryDirectory() as tmp_dir: + tmp_dir = os.path.abspath(tmp_dir) + fuzzbench_path = os.path.join(tmp_dir, 'fuzzbench') + subprocess.run([ + 'git', 'clone', 'https://github.com/google/fuzzbench', '--depth', '1', + fuzzbench_path + ], + check=True) + run_args.extend([ + '-v', + f'{args.project.out}:/out', + '-v', + f'{fuzzbench_path}:{fuzzbench_path}', + '-e', + f'FUZZBENCH_PATH={fuzzbench_path}', + f'gcr.io/oss-fuzz/{args.project.name}', + 'fuzzbench_run_fuzzer', + args.fuzzer_name, + ] + args.fuzzer_args) + + return docker_run(run_args, architecture=args.architecture) + + +def fuzzbench_measure(args): + """Measure results from fuzzing with fuzzbench.""" + if not check_project_exists(args.project): + return False + + with tempfile.TemporaryDirectory() as tmp_dir: + tmp_dir = os.path.abspath(tmp_dir) + fuzzbench_path = os.path.join(tmp_dir, 'fuzzbench') + subprocess.run([ + 'git', 'clone', 'https://github.com/google/fuzzbench', '--depth', '1', + fuzzbench_path + ], + check=True) + run_args = [ + '-v', f'{args.project.out}:/out', '-v', + f'{fuzzbench_path}:{fuzzbench_path}', '-e', + f'FUZZBENCH_PATH={fuzzbench_path}', '-e', 'EXPERIMENT_TYPE=bug', '-e', + f'FUZZ_TARGET={args.fuzz_target_name}', '-e', + f'FUZZER={args.engine_name}', '-e', f'BENCHMARK={args.project.name}', + f'gcr.io/oss-fuzz/{args.project.name}', 'fuzzbench_measure' + ] + + return docker_run(run_args, architecture='x86_64') + + +def reproduce(args): + """Reproduces a specific test case from a specific project.""" + return reproduce_impl(args.project, args.fuzzer_name, args.valgrind, args.e, + args.fuzzer_args, args.testcase_path, args, + args.architecture) + + +def reproduce_impl( # pylint: disable=too-many-arguments + project, + fuzzer_name, + valgrind, + env_to_add, + fuzzer_args, + testcase_path, + args, + architecture='x86_64', + run_function=docker_run): + """Reproduces a specific test case.""" + if not check_project_exists(project): + return False + + if not _check_fuzzer_exists(project, fuzzer_name, args, architecture): + return False + + debugger = '' + env = ['HELPER=True', 'ARCHITECTURE=' + architecture] + use_debug_image = bool(valgrind) + image_name = _get_base_runner_image(args, debug=use_debug_image) + + if valgrind: + debugger = 'valgrind --tool=memcheck --track-origins=yes --leak-check=full' + + if debugger: + env += ['DEBUGGER=' + debugger] + + if env_to_add: + env.extend(env_to_add) + + run_args = _env_to_docker_args(env) + [ + '-v', + '%s:/out' % project.out, + '-v', + '%s:/testcase' % _get_absolute_path(testcase_path), + '-t', + image_name, + 'reproduce', + fuzzer_name, + '-runs=100', + ] + fuzzer_args + + return run_function(run_args, architecture=architecture) + + +def _validate_project_name(project_name): + """Validates |project_name| is a valid OSS-Fuzz project name.""" + if len(project_name) > MAX_PROJECT_NAME_LENGTH: + logger.error( + 'Project name needs to be less than or equal to %d characters.', + MAX_PROJECT_NAME_LENGTH) + return False + + if not VALID_PROJECT_NAME_REGEX.match(project_name): + logger.info('Invalid project name: %s.', project_name) + return False + + return True + + +def _validate_language(language): + if not LANGUAGE_REGEX.match(language): + logger.error('Invalid project language %s.', language) + return False + + return True + + +def _create_build_integration_directory(directory): + """Returns True on successful creation of a build integration directory. + Suitable for OSS-Fuzz and external projects.""" + try: + os.makedirs(directory) + except OSError as error: + if error.errno != errno.EEXIST: + raise + logger.error('%s already exists.', directory) + return False + return True + + +def _template_project_file(filename, template, template_args, directory): + """Templates |template| using |template_args| and writes the result to + |directory|/|filename|. Sets the file to executable if |filename| is + build.sh.""" + file_path = os.path.join(directory, filename) + with open(file_path, 'w') as file_handle: + file_handle.write(template % template_args) + + if filename == 'build.sh': + os.chmod(file_path, 0o755) + + +def generate(args): + """Generates empty project files.""" + return _generate_impl(args.project, args.language) + + +def _get_current_datetime(): + """Returns this year. Needed for mocking.""" + return datetime.datetime.now() + + +def _base_builder_from_language(language): + """Returns the base builder for the specified language.""" + return LANGUAGE_TO_BASE_BUILDER_IMAGE[language] + + +def _generate_impl(project, language): + """Implementation of generate(). Useful for testing.""" + if project.is_external: + # External project. + project_templates = templates.EXTERNAL_TEMPLATES + else: + # Internal project. + if not _validate_project_name(project.name): + return False + project_templates = templates.TEMPLATES + + if not _validate_language(language): + return False + + directory = project.build_integration_path + if not _create_build_integration_directory(directory): + return False + + logger.info('Writing new files to: %s.', directory) + + template_args = { + 'project_name': project.name, + 'base_builder': _base_builder_from_language(language), + 'language': language, + 'year': _get_current_datetime().year + } + for filename, template in project_templates.items(): + _template_project_file(filename, template, template_args, directory) + return True + + +def index(args): + """Runs the indexer on the project.""" + if not args.project.is_external and not check_project_exists(args.project): + return False + + image_name = f'gcr.io/oss-fuzz/{args.project.name}' + if not build_image_impl( + args.project, cache=True, pull=False, architecture=args.architecture): + logger.error('Failed to build project image for indexer.') + return False + env = [ + f'ARCHITECTURE={args.architecture}', + 'HELPER=True', + f'PROJECT_NAME={args.project.name}', + 'INDEXER_BUILD=1', + ] + if args.e: + env.extend(args.e) + + run_args = _env_to_docker_args(env) + run_args.extend([ + '-v', + f'{args.project.out}:/out', + '-v', + f'{args.project.work}:/work', + '-t', + ]) + + if args.docker_arg: + run_args.extend(args.docker_arg) + + if args.dev: + indexer_dir = os.path.join(OSS_FUZZ_DIR, + 'infra/base-images/base-builder/indexer') + indexer_binary_path = os.path.join(indexer_dir, 'indexer') + if not os.path.exists(indexer_binary_path): + print('Indexer binary does not exist, pulling prebuilt.') + with urllib.request.urlopen(INDEXER_PREBUILT_URL) as resp, \ + open(indexer_binary_path, 'wb') as f: + shutil.copyfileobj(resp, f) + os.chmod(indexer_binary_path, 0o755) + + run_args.extend(['-v', f'{indexer_dir}:/opt/indexer']) + + run_args.append(image_name) + if args.shell: + run_args.append('/bin/bash') + else: + run_args.append('/opt/indexer/index_build.py') + + if args.targets: + run_args.extend(['--targets', args.targets]) + + run_args.extend(args.extra_args) + + logger.info(f'Running indexer for project: {args.project.name}') + result = docker_run(run_args, architecture=args.architecture) + if result: + logger.info('Indexer completed successfully.') + else: + logger.error('Indexer failed.') + + return result + + +def shell(args): + """Runs a shell within a docker image.""" + # Access the property to trigger validation early. + _ = args.project.base_os_version + if not build_image_impl(args.project): + return False + + env = [ + 'FUZZING_ENGINE=' + args.engine, 'SANITIZER=' + args.sanitizer, + 'ARCHITECTURE=' + args.architecture, 'HELPER=True', + f'PROJECT_NAME={args.project.name}' + ] + + if args.project.name != 'base-runner-debug': + env.append('FUZZING_LANGUAGE=' + args.project.language) + + if args.e: + env += args.e + + if is_base_image(args.project.name): + image_project = 'oss-fuzz-base' + out_dir = _get_out_dir() + else: + image_project = 'oss-fuzz' + out_dir = args.project.out + + run_args = _env_to_docker_args(env) + if args.source_path: + workdir = _workdir_from_dockerfile(args.project) + run_args.extend([ + '-v', + '%s:%s' % (_get_absolute_path(args.source_path), workdir), + ]) + + run_args.extend([ + '-v', + '%s:/out' % out_dir, '-v', + '%s:/work' % args.project.work, '-t', + 'gcr.io/%s/%s' % (image_project, args.project.name), '/bin/bash' + ]) + + docker_run(run_args, architecture=args.architecture) + return True + + +def pull_images(language=None): + """Pulls base images used to build projects in language lang (or all if lang + is None).""" + for base_image_lang, base_images in BASE_IMAGES.items(): + if (language is None or base_image_lang == 'generic' or + base_image_lang == language): + for base_image in base_images: + if not docker_pull(base_image): + return False + + return True + + +if __name__ == '__main__': + sys.exit(main()) + diff --git a/oss-migration.py b/oss-migration.py new file mode 100755 index 0000000000..6c7f2cd93c --- /dev/null +++ b/oss-migration.py @@ -0,0 +1,897 @@ +#!/usr/bin/env python3 +import argparse +import json +import time +import os +import re +import shutil +import sys +import asyncio +import signal +import tempfile +import logging +import random +import string +import datetime +from typing import Dict, Optional, Tuple +import shlex +import subprocess +import traceback + +# Paths (adjust if necessary) +OSS_FUZZ_DIR = '/usr/local/google/home/matheushunsche/projects/oss-fuzz' +CLUSTERFUZZ_DIR = '/usr/local/google/home/matheushunsche/projects/clusterfuzz' +CASP_PYTHONPATH = 'cli/casp/src:src' +BASE_MIGRATION_DIR = '/usr/local/google/home/matheushunsche/projects/oss-migration' + +# Configure logging +logging.basicConfig( + level=logging.INFO, + format='\r[%(asctime)s] %(message)s', # Added \r at the beginning + datefmt='%H:%M:%S', + stream=sys.stdout +) +logger = logging.getLogger(__name__) + +# Global list to track active subprocesses for clean termination +active_processes = [] + +def restore_terminal(): + """Restores terminal settings to a sane state.""" + try: + import subprocess + subprocess.run(['stty', 'sane'], check=False) + # Clear any leftover input + if sys.stdin.isatty(): + import termios + termios.tcflush(sys.stdin, termios.TCIFLUSH) + except: + pass + +def force_exit_handler(signum, frame): + # Use print to avoid dependency on logger if it's not ready + print("\nCtrl+C detected! Force terminating active processes...", flush=True) + for p in active_processes: + try: + # Try to get process group ID + pgid = os.getpgid(p.pid) + os.killpg(pgid, signal.SIGKILL) + except: + try: + p.terminate() + except: + pass + restore_terminal() + os._exit(1) + +signal.signal(signal.SIGINT, force_exit_handler) +signal.signal(signal.SIGTERM, force_exit_handler) + +def safe_print(message): + logger.info(message) + +def safe_rmtree(path): + if not os.path.exists(path): + return + try: + if os.path.islink(path): + os.unlink(path) + elif os.path.isdir(path): + shutil.rmtree(path) + else: + os.remove(path) + except Exception: + try: + # Try using Docker to delete, as files might be owned by root + parent_dir = os.path.dirname(os.path.abspath(path)) + base_name = os.path.basename(path) + subprocess.run(['docker', 'run', '--rm', '-v', f'{parent_dir}:/tmp/parent', 'busybox', 'rm', '-rf', f'/tmp/parent/{base_name}'], check=True, stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL) + except: + pass + +def recursive_chmod(path, mode): + if not os.path.exists(path): + return + try: + os.chmod(path, mode) + except: + pass + for root, dirs, files in os.walk(path): + for d in dirs: + try: + os.chmod(os.path.join(root, d), mode) + except: + pass + for f in files: + full_path = os.path.join(root, f) + if not os.path.islink(full_path): + try: + os.chmod(full_path, mode) + except: + pass + +# Global list to track active subprocesses for clean termination +active_processes = [] + +async def run_command(cmd, cwd, env=None, capture_output=False, dry_run=False, prefix=None, stdout=None, stderr=None): + if isinstance(cmd, list): + cmd_str = ' '.join(cmd) + cmd_args = cmd + is_shell = False + else: + cmd_str = cmd + cmd_args = cmd + is_shell = True + + # safe_print(f"Running command: {cmd_str} in {cwd}") # Removed redundant print + try: + # Determine stdout/stderr for subprocess creation + _stdout = stdout if stdout else asyncio.subprocess.PIPE + _stderr = stderr if stderr else asyncio.subprocess.STDOUT # Original default + + if is_shell: + process = await asyncio.create_subprocess_shell( + cmd_args, + cwd=cwd, + env=env, + stdout=_stdout, + stderr=_stderr, + preexec_fn=os.setsid + ) + else: + process = await asyncio.create_subprocess_exec( + *cmd_args, + cwd=cwd, + env=env, + stdout=_stdout, + stderr=_stderr, + preexec_fn=os.setsid + ) + active_processes.append(process) + + output_lines = [] + if process.stdout: # Check if process.stdout exists before trying to read from it + while True: + line = await process.stdout.readline() + if not line: + break + try: + decoded_line = line.decode('utf-8').strip() + except: + decoded_line = line.decode('latin-1', errors='replace').strip() + + if capture_output: + output_lines.append(decoded_line) + if prefix: + logger.info(f"{prefix} {decoded_line}") + elif not capture_output: + logger.info(decoded_line) + + await process.wait() + if process in active_processes: + active_processes.remove(process) + if process.returncode != 0: + raise Exception(f"Command failed with exit code {process.returncode}: {cmd}") + return "\n".join(output_lines) + except Exception as e: + safe_print(f"Error running command: {e}") + raise e + +async def get_gcb_builds(project): + safe_print(f"\n--- Step 1: Checking GCB builds for {project} ---") + import shlex + import subprocess + import traceback + + cmd_us_central1_str = f"gcloud builds list --project=oss-fuzz --region=us-central1 --filter=\"tags='{project}' AND tags='fuzzing'\" --limit=3 --format=json --sort-by=\"~createTime\"" + safe_print(f"Running command: {cmd_us_central1_str} in {OSS_FUZZ_DIR}") + try: + cmd_args = shlex.split(cmd_us_central1_str) + output = subprocess.check_output(cmd_args, cwd=OSS_FUZZ_DIR, text=True) + except Exception as e: + safe_print(f"Error running subprocess: {e}") + output = "" + + builds = json.loads(output) if output else [] + + if not builds: + safe_print("No builds found in us-central1, trying global...") + cmd_global_str = f"gcloud builds list --project=oss-fuzz --filter=\"tags='{project}' AND tags='fuzzing'\" --limit=3 --format=json --sort-by=\"~createTime\"" + safe_print(f"Running command: {cmd_global_str} in {OSS_FUZZ_DIR}") + try: + cmd_args = shlex.split(cmd_global_str) + output = subprocess.check_output(cmd_args, cwd=OSS_FUZZ_DIR, text=True) + except Exception as e: + safe_print(f"Error running subprocess: {e}") + output = "" + builds = json.loads(output) if output else [] + + if output: + try: + builds = json.loads(output) + if not builds: + return [] + + # Check if all 3 are successful + all_success = all(b.get('status') == 'SUCCESS' for b in builds) + if all_success and len(builds) >= 3: + safe_print("GCB builds are healthy (3/3 SUCCESS).") + else: + safe_print(f"GCB builds are not healthy. Statuses: {[b.get('status') for b in builds]}") + return builds # Return the builds list + except json.JSONDecodeError: + safe_print("Error decoding GCB builds output.") + return [] + return [] + +async def run_reproduction(project, local_build_path=None, os_version='legacy', engine=None, sanitizer=None, dry_run=False, use_batch=False, gcs_bucket=None, limit=None): + safe_print(f"\n--- Running reproduction for {project} (OS: {os_version}, Engine: {engine}, Sanitizer: {sanitizer}, Local Build: {local_build_path}, Batch: {use_batch}) ---") + + env = os.environ.copy() + env['PYTHONPATH'] = CASP_PYTHONPATH + + cmd = f"python3.11 -m casp.main reproduce project --project-name {project}" + if os_version: + cmd += f" --os-version {os_version}" + if local_build_path: + cmd += f" --local-build-path {local_build_path}" + if engine: + cmd += f" --engine {engine}" + if sanitizer: + cmd += f" --sanitizer {sanitizer}" + if use_batch: + cmd += " --use-batch" + if limit: + cmd += f" --limit {limit}" + + cmd += " -n 20" + + if gcs_bucket: + cmd += f" --gcs-bucket {gcs_bucket}" + + prefix_engine = engine if engine else "all" + prefix_sanitizer = sanitizer if sanitizer else "all" + + # Add random jitter to avoid burst requests to Datastore/GCS + jitter = random.uniform(0, 10) + safe_print(f"[{os_version}-{prefix_engine}-{prefix_sanitizer}] Waiting {jitter:.2f}s jitter before starting...") + await asyncio.sleep(jitter) + + safe_print(f"[{os_version}-{prefix_engine}-{prefix_sanitizer}] Running command: {cmd}") + output = await run_command(cmd, CLUSTERFUZZ_DIR, env=env, capture_output=True, dry_run=False, prefix=f"[{os_version}-{prefix_engine}-{prefix_sanitizer}]") + + if not output: + return 0, 0, [] + + success_match = re.search(r"Success: (\d+)", output) + failed_match = re.search(r"Failed:\s+(\d+)", output) + + success_count = int(success_match.group(1)) if success_match else 0 + failed_count = int(failed_match.group(1)) if failed_match else 0 + + failures = [] + if failed_count > 0: + log_dir_match = re.search(r"Detailed logs are available in: (\S+)", output) + if log_dir_match: + log_dir = log_dir_match.group(1) + if os.path.exists(log_dir): + for f in os.listdir(log_dir): + if f.endswith('.log') and f.startswith('tc-'): + log_path = os.path.join(log_dir, f) + with open(log_path, 'r') as log_file: + content = log_file.read() + is_success = "Success" in content or "Crash is reproducible" in content or "The testcase reliably reproduces" in content + if not is_success: + tc_id = f.replace('tc-', '').replace('.log', '') + failures.append({ + 'tc_id': tc_id, + 'os_version': os_version, + 'engine': engine, + 'sanitizer': sanitizer, + 'log_path': log_path + }) + + return success_count, failed_count, failures + +def get_project_config(project): + project_yaml = os.path.join(OSS_FUZZ_DIR, 'projects', project, 'project.yaml') + engines = ['libfuzzer'] + sanitizers = ['address'] + if os.path.exists(project_yaml): + with open(project_yaml, 'r') as f: + content = f.read() + match_eng = re.search(r'fuzzing_engines:\n((?:\s+-\s+\w+\n)+)', content) + if match_eng: + engines = [line.strip().replace('- ', '') for line in match_eng.group(1).splitlines()] + match_san = re.search(r'sanitizers:\n((?:\s+-\s+\w+\n)+)', content) + if match_san: + sanitizers = [line.strip().replace('- ', '') for line in match_san.group(1).splitlines()] + return engines, sanitizers + +def get_project_contacts(project): + project_yaml = os.path.join(OSS_FUZZ_DIR, 'projects', project, 'project.yaml') + contacts = ['@DavidKorczynski'] + if os.path.exists(project_yaml): + with open(project_yaml, 'r') as f: + content = f.read() + pc_match = re.search(r'primary_contact:\s+"?([^"\n]+)"?', content) + if pc_match: + contacts.append(pc_match.group(1)) + cc_match = re.search(r'auto_ccs:\n((?:\s+-\s+"?[^"\n]+"?\n)+)', content) + if cc_match: + for line in cc_match.group(1).splitlines(): + email = line.strip().replace('- ', '').replace('"', '') + if email not in contacts: + contacts.append(email) + return contacts + +async def build_local_combo(project, combo_temp_dir, engine, sanitizer, os_version, rebuild, build_project_name, oss_fuzz_dir, dry_run, use_batch, gcs_bucket, pull=False, cpu_limit=None, mem_limit=None, limit=None): + if not oss_fuzz_dir: + oss_fuzz_dir = OSS_FUZZ_DIR + if not build_project_name: + build_project_name = project + + import random + import string + random_suffix = ''.join(random.choices(string.ascii_lowercase + string.digits, k=6)) + combo_dir_name = f"{os_version}-{engine}-{sanitizer}-{random_suffix}" + + safe_print(f"[{os_version}-{engine}-{sanitizer}] Using combo_dir_name: {combo_dir_name}") + + # New structure: oss-migration//builds/-- + project_migration_dir = os.path.join(BASE_MIGRATION_DIR, project) + builds_dir = os.path.join(project_migration_dir, 'builds') + combo_temp_dir = os.path.join(builds_dir, combo_dir_name) + + os.makedirs(combo_temp_dir, exist_ok=True) + recursive_chmod(combo_temp_dir, 0o755) # Ensure dir is accessible + + # If rebuild is True, we want a clean build, so remove existing dir if it exists + if rebuild and os.path.exists(combo_temp_dir): + safe_print(f"[{os_version}-{engine}-{sanitizer}] Removing existing build dir for clean build: {combo_temp_dir}") + safe_rmtree(combo_temp_dir) + os.makedirs(combo_temp_dir, exist_ok=True) + recursive_chmod(combo_temp_dir, 0o755) + + # The build output will stay within this directory + combo_out_dir = os.path.join(combo_temp_dir, 'build', 'out', build_project_name) + + repro_results = (0, 0, []) # Default to 0 successes/failures, no failures list + build_failures = [] + + if not rebuild and os.path.exists(combo_out_dir) and os.listdir(combo_out_dir): + safe_print(f"Skipping build for {engine}-{sanitizer} ({os_version}) as directory exists and is not empty.") + # Still need to run reproduction if skipped build? Yes, usually. + repro_results = await run_reproduction(project, local_build_path=combo_out_dir, os_version=os_version, engine=engine, sanitizer=sanitizer, dry_run=False, use_batch=use_batch, gcs_bucket=gcs_bucket if use_batch else None, limit=limit) + return combo_out_dir, combo_temp_dir, repro_results, build_failures + + if not os.path.exists(combo_temp_dir): + os.makedirs(combo_temp_dir, exist_ok=True) + recursive_chmod(combo_temp_dir, 0o755) # Ensure combo dir is accessible + safe_print(f"Created persistent isolated build dir: {combo_temp_dir}") + + # Set up OSS-Fuzz environment in temp dir + # We need to create infra dir and symlink contents except helper.py + infra_dir = os.path.join(combo_temp_dir, 'infra') + os.makedirs(infra_dir, exist_ok=True) + original_infra_dir = os.path.join(oss_fuzz_dir, 'infra') + for item in os.listdir(original_infra_dir): + if item == 'helper.py': + continue + s = os.path.join(original_infra_dir, item) + d = os.path.join(infra_dir, item) + if os.path.isdir(s): + os.symlink(s, d, target_is_directory=True) + else: + os.symlink(s, d) + + # Copy modified helper.py + shutil.copy2(os.path.join(os.path.dirname(__file__), 'helper_modified.py'), os.path.join(infra_dir, 'helper.py')) + os.chmod(os.path.join(infra_dir, 'helper.py'), 0o755) + + os.symlink(os.path.join(oss_fuzz_dir, 'base-images'), os.path.join(combo_temp_dir, 'base-images')) + os.symlink(os.path.join(oss_fuzz_dir, 'build.py'), os.path.join(combo_temp_dir, 'build.py')) + else: + safe_print(f"Using existing isolated build dir: {combo_temp_dir}") + + # Ensure project directory exists and is correctly linked/copied + project_temp_dir = os.path.join(combo_temp_dir, 'projects', build_project_name) + if os.path.exists(project_temp_dir) or os.path.islink(project_temp_dir): + if os.path.islink(project_temp_dir): + os.remove(project_temp_dir) + elif os.path.isdir(project_temp_dir): + safe_rmtree(project_temp_dir) + else: + os.remove(project_temp_dir) + + os.makedirs(os.path.dirname(project_temp_dir), exist_ok=True) + if os_version == 'ubuntu-24-04': + # For 24.04, copy from the modified OSS-Fuzz dir (which is oss_fuzz_dir here) + shutil.copytree(os.path.join(oss_fuzz_dir, 'projects', build_project_name), project_temp_dir) + else: + # For Legacy, just symlink from original OSS_FUZZ_DIR + os.symlink(os.path.join(OSS_FUZZ_DIR, 'projects', build_project_name), project_temp_dir) + + # Ensure essential symlinks exist even if directory existed + for link_name in ['base-images', 'build.py']: + link_path = os.path.join(combo_temp_dir, link_name) + if os.path.exists(link_path) or os.path.islink(link_path): + try: + if os.path.islink(link_path): + os.remove(link_path) + elif os.path.isdir(link_path): + safe_rmtree(link_path) + else: + os.remove(link_path) + except: + pass + os.symlink(os.path.join(oss_fuzz_dir, link_name), link_path) + safe_print(f"[{os_version}-{engine}-{sanitizer}] Created symlink: {link_path} -> {os.path.join(oss_fuzz_dir, link_name)}") + + # Handle infra separately to keep modified helper.py + infra_link_path = os.path.join(combo_temp_dir, 'infra') + if not os.path.exists(infra_link_path): + os.makedirs(infra_link_path, exist_ok=True) + original_infra_dir = os.path.join(oss_fuzz_dir, 'infra') + for item in os.listdir(original_infra_dir): + if item == 'helper.py': + continue + s = os.path.join(original_infra_dir, item) + d = os.path.join(infra_link_path, item) + if os.path.islink(d) or os.path.exists(d): + continue + if os.path.isdir(s): + os.symlink(s, d, target_is_directory=True) + else: + os.symlink(s, d) + # Copy modified helper.py + shutil.copy2(os.path.join(os.path.dirname(__file__), 'helper_modified.py'), os.path.join(infra_link_path, 'helper.py')) + os.chmod(os.path.join(infra_link_path, 'helper.py'), 0o755) + + # Fallback check in case helper.py still used real OSS_FUZZ_DIR (unlikely now but safe) + real_oss_fuzz_out = os.path.join(oss_fuzz_dir, 'build', 'out', build_project_name) + if os.path.exists(real_oss_fuzz_out) and os.listdir(real_oss_fuzz_out): + safe_print(f"[{os_version}-{engine}-{sanitizer}] Found output in real OSS_FUZZ_DIR, moving to persistent dir.") + if os.path.exists(combo_out_dir): + if not dry_run: + safe_rmtree(combo_out_dir) + if not dry_run: + os.makedirs(os.path.dirname(combo_out_dir), exist_ok=True) + shutil.move(real_oss_fuzz_out, combo_out_dir) + os.makedirs(real_oss_fuzz_out, exist_ok=True) # Recreate empty dir to avoid issues + else: + safe_print(f"Dry run: Would move {real_oss_fuzz_out} to {combo_out_dir}") + # Run reproduction immediately after move + repro_results = await run_reproduction(project, local_build_path=combo_out_dir, os_version=os_version, engine=engine, sanitizer=sanitizer, dry_run=False, use_batch=use_batch, gcs_bucket=gcs_bucket, limit=limit) + return combo_out_dir, combo_temp_dir, repro_results, build_failures + + # Run build using helper.py within the isolated environment + # We need to run this from within combo_temp_dir to use the local infra and projects + build_log_path = os.path.join(combo_temp_dir, 'build.log') + try: + # 1. Build Image + build_image_cmd = f"python3 infra/helper.py build_image --pull {build_project_name}" + safe_print(f"[{os_version}-{engine}-{sanitizer}] Running build command: {build_image_cmd} in {combo_temp_dir} (Log: {build_log_path})") + # Log build output to file + with open(build_log_path, 'a') as log_file: + await run_command(build_image_cmd, combo_temp_dir, capture_output=False, dry_run=False, prefix=f"[{os_version}-{engine}-{sanitizer}]", stdout=log_file, stderr=log_file) + + cmd = f"python3 infra/helper.py build_fuzzers --engine {engine} --sanitizer {sanitizer} {build_project_name}" + if cpu_limit: + cmd += f" --docker-arg=\"--cpus={cpu_limit}\"" + if mem_limit: + cmd += f" --docker-arg=\"--memory={mem_limit}g\"" + + safe_print(f"[{os_version}-{engine}-{sanitizer}] Running build command: {cmd} in {combo_temp_dir} (Log: {build_log_path})") + with open(build_log_path, 'a') as log_file: + await run_command(cmd, combo_temp_dir, capture_output=False, dry_run=False, prefix=f"[{os_version}-{engine}-{sanitizer}]", stdout=log_file, stderr=log_file) + except Exception as e: + safe_print(f"[{os_version}-{engine}-{sanitizer}] Build failed: {e}") + build_failures.append({ + 'os_version': os_version, + 'engine': engine, + 'sanitizer': sanitizer, + 'error': str(e), + 'log_path': build_log_path + }) + return combo_out_dir, combo_temp_dir, (0, 0, []), build_failures + + # After build, ensure permissions are correct for Docker access + # First, fix ownership of the output directory (Docker creates files as root) + # Use Docker itself to fix permissions to avoid sudo password prompt on host + try: + uid = os.getuid() + gid = os.getgid() + # We need to mount the parent directory to handle the directory itself if needed, + # but mounting the directory directly is simpler for its contents. + # Since combo_out_dir is what we want to fix: + cmd = f"docker run --rm -v {combo_out_dir}:/out busybox chown -R {uid}:{gid} /out" + # Run this command. We don't need to be in combo_temp_dir for this. + await run_command(cmd, os.getcwd(), capture_output=False, prefix=f"[{os_version}-{engine}-{sanitizer}]") + except Exception as e: + safe_print(f"[{os_version}-{engine}-{sanitizer}] Warning: Failed to fix ownership with Docker: {e}") + + recursive_chmod(combo_temp_dir, 0o755) + + # After build, check if output exists + if not os.path.exists(combo_out_dir) or not os.listdir(combo_out_dir): + safe_print(f"[{os_version}-{engine}-{sanitizer}] Build failed or no output generated in {combo_out_dir}") + build_failures.append({ + 'os_version': os_version, + 'engine': engine, + 'sanitizer': sanitizer, + 'error': 'No output generated' + }) + return combo_out_dir, combo_temp_dir, (0, 0, []), build_failures + + # Run reproduction + repro_results = await run_reproduction(project, local_build_path=combo_out_dir, os_version=os_version, engine=engine, sanitizer=sanitizer, dry_run=False, use_batch=use_batch, gcs_bucket=gcs_bucket, limit=limit) + + return combo_out_dir, combo_temp_dir, repro_results, build_failures + +async def build_local(project, engines=None, sanitizers=None, dry_run=False, rebuild=False, os_version='legacy', build_project_name=None, oss_fuzz_dir=None, use_batch=False, gcs_bucket=None, pull=False, cpu_limit=None, mem_limit=None, limit=None): + if build_project_name is None: + build_project_name = project + if oss_fuzz_dir is None: + oss_fuzz_dir = OSS_FUZZ_DIR + + safe_print(f"\n--- Building local fuzzers for {build_project_name} ({os_version}) ---") + if not engines: + engines = ['libfuzzer'] + if not sanitizers: + sanitizers = ['address'] + + tasks = [] + for engine in engines: + for sanitizer in sanitizers: + tasks.append(build_local_combo(project, None, engine, sanitizer, os_version, rebuild, build_project_name, oss_fuzz_dir, dry_run, use_batch, gcs_bucket, pull=pull, cpu_limit=cpu_limit, mem_limit=mem_limit, limit=limit)) + + results = await asyncio.gather(*tasks, return_exceptions=True) + + build_paths = [] + temp_dirs = [] + total_success = 0 + total_failed = 0 + all_failures = [] + all_build_failures = [] + + for res in results: + if isinstance(res, Exception): + safe_print(f"Build/Reproduction failed with exception: {res}") + # We don't have engine/sanitizer here easily, but we can log the exception + elif res: + out_dir, temp_dir, repro_results, build_failures = res + success, failed, failures = repro_results + build_paths.append(out_dir) + temp_dirs.append(temp_dir) + total_success += success + total_failed += failed + all_failures.extend(failures) + all_build_failures.extend(build_failures) + return build_paths, temp_dirs, total_success, total_failed, all_failures, all_build_failures + +async def modify_files_for_2404(project, oss_fuzz_dir, dry_run=False): + safe_print(f"\n--- Modifying files for Ubuntu 24.04 in {oss_fuzz_dir} ---") + project_yaml = os.path.join(oss_fuzz_dir, 'projects', project, 'project.yaml') + dockerfile = os.path.join(oss_fuzz_dir, 'projects', project, 'Dockerfile') + + # Backup original files before modification + if not dry_run: + if os.path.exists(project_yaml): + shutil.copy(project_yaml, project_yaml + '.bak') + if os.path.exists(dockerfile): + shutil.copy(dockerfile, dockerfile + '.bak') + + with open(project_yaml, 'a') as f: + f.write('\nbase_os_version: "ubuntu-24-04"\n') + + with open(dockerfile, 'r') as f: + content = f.read() + + # Robust replacement using regex to handle base images like base-builder-go + # Matches 'FROM gcr.io/oss-fuzz-base/base-builder' optionally followed by '-lang' and optionally a tag + import re + new_content = re.sub( + r'FROM\s+(gcr\.io/oss-fuzz-base/base-builder(?:-[a-z]+)?)(?::\w+)?', + r'FROM \1:ubuntu-24-04', + content + ) + + with open(dockerfile, 'w') as f: + f.write(new_content) + else: + safe_print(f"Dry run: Would modify {project_yaml} and {dockerfile}") + +async def run_full_suite(project, engines, sanitizers, os_version, rebuild, build_project_name=None, oss_fuzz_dir=None, use_batch=False, gcs_bucket=None, cpu_limit=None, mem_limit=None, limit=None): + if build_project_name is None: + build_project_name = project + + safe_print(f"\n--- Running Full Suite for {project} on {os_version} ---") + build_paths, temp_dirs, total_success, total_failed, failures, build_failures = await build_local(project, engines=engines, sanitizers=sanitizers, dry_run=False, rebuild=rebuild, os_version=os_version, build_project_name=build_project_name, oss_fuzz_dir=oss_fuzz_dir, use_batch=use_batch, gcs_bucket=gcs_bucket, cpu_limit=cpu_limit, mem_limit=mem_limit, limit=limit) + + return total_success, total_failed, temp_dirs, failures, build_failures + + +async def main_async(): + parser = argparse.ArgumentParser(description='Verify OSS-Fuzz project builds and reproduction.') + parser.add_argument('project', help='OSS-Fuzz project name') + parser.add_argument('--rebuild', action='store_true', help='Force rebuild even if build directory exists') + parser.add_argument('--use-batch', action='store_true', help='Use Google Cloud Batch for reproduction') + parser.add_argument('--gcs-bucket', help='GCS bucket for temporary storage (required for --use-batch)') + parser.add_argument('--engine', help='Specific engine to run (e.g., libfuzzer)') + parser.add_argument('--sanitizer', help='Specific sanitizer to run (e.g., address)') + parser.add_argument('--limit', type=int, default=None, help='Limit the number of testcases to reproduce') + args = parser.parse_args() + + # Setup persistent log file for results only + project_migration_dir = os.path.join(BASE_MIGRATION_DIR, args.project) + results_dir = os.path.join(project_migration_dir, 'results') + os.makedirs(results_dir, exist_ok=True) + + log_filepath = os.path.join(results_dir, 'summary.log') + + safe_print(f"Starting main... Full output on console, results will be saved to {log_filepath}") + safe_print(f"Args: {args}") + + results = {} + temp_oss_fuzz_dir = None + all_temp_dirs = [] # Initialize all_temp_dirs here + + try: + engines, sanitizers = get_project_config(args.project) + if args.engine: + engines = [args.engine] + if args.sanitizer: + sanitizers = [args.sanitizer] + + safe_print(f"Found engines: {engines}") + safe_print(f"Found sanitizers: {sanitizers}") + + safe_print("Starting parallel execution...") + + # Calculate resources per thread + # Total resources available + TOTAL_CPUS = 52 + TOTAL_RAM_GB = 88 + + num_engines = len(engines) + num_sanitizers = len(sanitizers) + # We run legacy and 24.04 in parallel, each with engines*sanitizers builds + total_build_threads = 2 * num_engines * num_sanitizers + + if total_build_threads > 0: + cpu_per_thread = max(1, TOTAL_CPUS // total_build_threads) + mem_per_thread = max(1, TOTAL_RAM_GB // total_build_threads) + safe_print(f"Resource distribution: {total_build_threads} threads, {cpu_per_thread} CPUs/thread, {mem_per_thread}GB RAM/thread") + else: + cpu_per_thread = None + mem_per_thread = None + + # 1. Check GCB builds first (Pre-requisite) + gcb_builds = await get_gcb_builds(args.project) + gcb_success = True + if not gcb_builds: + safe_print("No GCB builds found. Cannot proceed.") + gcb_success = False + else: + for b in gcb_builds: + if b.get('status') != 'SUCCESS': + safe_print(f"GCB build {b.get('id')} is not SUCCESS (status: {b.get('status')}).") + gcb_success = False + + + # Check health based on returned builds + is_healthy = False + if gcb_builds and len(gcb_builds) >= 3: + is_healthy = all(b.get('status') == 'SUCCESS' for b in gcb_builds) + + if not is_healthy: + safe_print("\n❌ GCB builds are not healthy. Skipping migration.") + # Still generate summary but with failure + with open(log_filepath, 'w') as results_log: + def dual_print(message): + logger.info(message) + results_log.write(message + '\n') + dual_print("\n========================================") + dual_print("SUMMARY REPORT") + dual_print("========================================") + dual_print(f"Project: {args.project}") + if gcb_builds: + dual_print("-" * 40) + dual_print(f"{'Build ID':<36} | {'Status':<10} | {'Link'}") + dual_print("-" * 40) + for b in gcb_builds: + build_id = b.get('id', 'N/A') + status = b.get('status', 'N/A') + link = f"https://console.cloud.google.com/cloud-build/builds/{build_id}?project=oss-fuzz" + dual_print(f"{build_id:<36} | {status:<10} | {link}") + else: + dual_print("GCB Builds (fuzzing): None") + dual_print("-" * 40) + dual_print("\n❌ Failure: GCB builds are not healthy.") + dual_print("Skipping PR preparation.") + dual_print("========================================") + dual_print(f"\nResults saved to: {log_filepath}") + return + + # 2. Proceed with other tasks if GCB is healthy + task_remote = asyncio.create_task(run_reproduction(args.project, local_build_path=None, os_version='legacy', dry_run=False, use_batch=args.use_batch, gcs_bucket=args.gcs_bucket, limit=args.limit)) + # Legacy builds now handle their own isolation with run_id + task_legacy = asyncio.create_task(run_full_suite(args.project, engines, sanitizers, 'legacy', args.rebuild, build_project_name=args.project, oss_fuzz_dir=OSS_FUZZ_DIR, use_batch=args.use_batch, gcs_bucket=args.gcs_bucket, cpu_limit=cpu_per_thread, mem_limit=mem_per_thread, limit=args.limit)) + # 24.04 builds now handle their own isolation and modification with run_id + # For Ubuntu 24.04, we need a modified OSS-Fuzz dir, but we can reuse the main one for now if we are careful, + # or create a temporary one. Given we want isolated builds, we will create a temporary OSS-Fuzz dir for 24.04 + # to avoid modifying the main one's base images if possible, though build_local_combo handles Dockerfile changes. + # To be safe and allow parallel 24.04 builds, we'll use a temp OSS-Fuzz dir. + + # Create a temporary OSS-Fuzz directory for 24.04 modifications + temp_oss_fuzz_dir = tempfile.mkdtemp(prefix=f'oss-fuzz-2404-{args.project}-') + + # Symlink everything from OSS_FUZZ_DIR except projects + for item in os.listdir(OSS_FUZZ_DIR): + if item == 'projects': + continue + s = os.path.join(OSS_FUZZ_DIR, item) + d = os.path.join(temp_oss_fuzz_dir, item) + if os.path.isdir(s): + os.symlink(s, d, target_is_directory=True) + else: + os.symlink(s, d) + + # Create 'projects' directory and copy only the specific project + projects_dir = os.path.join(temp_oss_fuzz_dir, 'projects') + os.makedirs(projects_dir, exist_ok=True) + shutil.copytree(os.path.join(OSS_FUZZ_DIR, 'projects', args.project), os.path.join(projects_dir, args.project)) + + # Modify files for 24.04 in the temp dir + await modify_files_for_2404(args.project, oss_fuzz_dir=temp_oss_fuzz_dir, dry_run=False) + + task_2404 = asyncio.create_task(run_full_suite(args.project, engines, sanitizers, 'ubuntu-24-04', args.rebuild, build_project_name=args.project, oss_fuzz_dir=temp_oss_fuzz_dir, use_batch=args.use_batch, gcs_bucket=args.gcs_bucket, cpu_limit=cpu_per_thread, mem_limit=mem_per_thread, limit=args.limit)) + + # Wait for all tasks + results_list = await asyncio.gather(task_remote, task_legacy, task_2404, return_exceptions=True) + + # Give a moment for all subprocess output to flush + await asyncio.sleep(1) + + results = { + 'gcb_status': gcb_builds, + 'remote_legacy': results_list[0] if not isinstance(results_list[0], Exception) else (0, 0, []), + 'local_legacy': (results_list[1][0], results_list[1][1]) if not isinstance(results_list[1], Exception) else (0, 0), + 'local_2404': (results_list[2][0], results_list[2][1]) if not isinstance(results_list[2], Exception) else (0, 0) + } + + # Collect failures + all_failures = [] + all_build_failures = [] + if len(results_list) > 0 and results_list[0] and not isinstance(results_list[0], Exception) and len(results_list[0]) > 2: + all_failures.extend(results_list[0][2]) + if len(results_list) > 1 and results_list[1] and not isinstance(results_list[1], Exception) and len(results_list[1]) > 3: + all_failures.extend(results_list[1][3]) + all_build_failures.extend(results_list[1][4]) + if len(results_list) > 2 and results_list[2] and not isinstance(results_list[2], Exception) and len(results_list[2]) > 3: + all_failures.extend(results_list[2][3]) + all_build_failures.extend(results_list[2][4]) + + # Collect temp dirs for cleanup (only the temp oss-fuzz dir, not the builds) + all_temp_dirs = [] + if temp_oss_fuzz_dir: + all_temp_dirs.append(temp_oss_fuzz_dir) + + safe_print("\n--- Cleaning up temporary OSS-Fuzz directories ---") + for d in all_temp_dirs: + if os.path.exists(d): + safe_print(f"Removing {d}") + safe_rmtree(d) + + # Open results log file for writing summary and failures + with open(log_filepath, 'w') as results_log: + def dual_print(message): + logger.info(message) + results_log.write(message + '\n') + + if all_build_failures: + dual_print("\nFAILED BUILDS") + dual_print("-" * 80) + dual_print(f"{'OS':<14} | {'Engine':<10} | {'Sanitizer':<10} | {'Error'}") + dual_print("-" * 80) + for f in all_build_failures: + error_msg = f.get('error', 'Unknown error') + log_path = f.get('log_path', 'N/A') + dual_print(f"{f['os_version']:<14} | {f['engine']:<10} | {f['sanitizer']:<10} | {error_msg}") + if log_path != 'N/A': + dual_print(f"{'':<14} | {'':<10} | {'':<10} | Log: {log_path}") + dual_print("-" * 80) + + if all_failures: + dual_print("\nFAILED TEST CASES") + dual_print("-" * 80) + dual_print(f"{'TC ID':<16} | {'OS':<12} | {'Engine':<10} | {'Sanitizer':<10} | Log Path") + dual_print("-" * 80) + for f in all_failures: + engine = f['engine'] if f['engine'] else 'N/A' + sanitizer = f['sanitizer'] if f['sanitizer'] else 'N/A' + dual_print(f"{f['tc_id']:<16} | {f['os_version']:<12} | {engine:<10} | {sanitizer:<10} | {f['log_path']}") + dual_print("-" * 80) + + dual_print("\n" + "="*40) + dual_print("SUMMARY REPORT") + dual_print("="*40) + dual_print(f"Project: {args.project}") + gcb_status = results.get('gcb_status', []) + if gcb_status and isinstance(gcb_status[0], dict): + gcb_status_str = ', '.join([b.get('status', 'UNKNOWN') for b in gcb_status]) + else: + gcb_status_str = ', '.join(gcb_status) + dual_print(f"GCB Builds (fuzzing): {gcb_status_str}") + dual_print("-" * 40) + dual_print(f"{'Scenario':<25} | {'Success':<7} | {'Failed':<7}") + dual_print("-" * 40) + + def print_res_dual(name, res): + if isinstance(res, tuple) and len(res) >= 2: + dual_print(f"{name:<25} | {res[0]:<7} | {res[1]:<7}") + else: + dual_print(f"{name:<25} | Error | Error") + + print_res_dual('Remote (Legacy)', results.get('remote_legacy')) + print_res_dual('Local (Legacy)', results.get('local_legacy')) + print_res_dual('Local (Ubuntu 24.04)', results.get('local_2404')) + dual_print("-" * 40) + + success_remote = results['remote_legacy'][0] if isinstance(results['remote_legacy'], tuple) else 0 + success_local_legacy = results['local_legacy'][0] if isinstance(results['local_legacy'], tuple) else 0 + success_local_2404 = results['local_2404'][0] if isinstance(results['local_2404'], tuple) else 0 + + # Apply new rules: + # 1. Legacy Local >= 70% of Legacy Remote + # 2. Ubuntu 24.04 >= 70% of Legacy Local + + # Calculate 70% of legacy remote + threshold_legacy = success_remote * 0.7 + legacy_match = (success_local_legacy >= threshold_legacy) and (success_remote > 0) + + # Calculate 70% of legacy local + threshold_2404 = success_local_legacy * 0.7 + ubuntu_2404_acceptable = (success_local_2404 >= threshold_2404) + + if legacy_match and ubuntu_2404_acceptable and success_local_legacy > 0: + dual_print("\n✅ Success: Results meet criteria for PR.") + if success_local_2404 < success_local_legacy: + dual_print(f"⚠️ Warning: Ubuntu 24.04 had fewer successes ({success_local_2404}) than Legacy ({success_local_legacy}), but is within 30% tolerance.") + if success_local_legacy < success_remote: + dual_print(f"⚠️ Warning: Legacy Local had fewer successes ({success_local_legacy}) than Remote ({success_remote}), but is within 30% tolerance.") + if success_local_legacy > success_remote: + dual_print(f"ℹ️ Note: Legacy Local had more successes ({success_local_legacy}) than Remote ({success_remote}).") + + dual_print("PR preparation skipped (use separate script to create branch).") + else: + dual_print("\n❌ Failure: Results do not meet criteria for PR.") + if not legacy_match: + if success_remote == 0: + dual_print(f" - Legacy Remote has 0 successes.") + elif success_local_legacy < threshold_legacy: + dual_print(f" - Legacy Local ({success_local_legacy}) is below 70% of Remote ({success_remote}). Threshold: {threshold_legacy:.1f}") + if not ubuntu_2404_acceptable: + dual_print(f" - Ubuntu 24.04 ({success_local_2404}) is below 70% of Legacy ({success_local_legacy}). Threshold: {threshold_2404:.1f}") + if success_local_legacy == 0: + dual_print(" - Legacy Local has 0 successes.") + dual_print("Skipping PR preparation.") + dual_print("="*40) + dual_print(f"\nResults saved to: {log_filepath}") + + except Exception as e: + safe_print(f"An error occurred: {e}") + traceback.print_exc() + finally: + # Fallback cleanup in case of exceptions before normal cleanup + if 'all_temp_dirs' in locals(): + for d in all_temp_dirs: + if os.path.exists(d): + shutil.rmtree(d) + +if __name__ == "__main__": + try: + asyncio.run(main_async()) + finally: + restore_terminal()