Skip to content

Fix benchmark status reporting to accurately reflect trial completion #847

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 4 commits into from
Mar 13, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
53 changes: 52 additions & 1 deletion report/common.py
Original file line number Diff line number Diff line change
Expand Up @@ -269,7 +269,10 @@ def list_benchmark_ids(self) -> List[str]:
def match_benchmark(self, benchmark_id: str, results: list[evaluator.Result],
targets: list[str]) -> Benchmark:
"""Returns a benchmark class based on |benchmark_id|."""
status = 'Done' if results and all(results) else 'Running'
expected_trials = self._get_expected_trials_count(benchmark_id)
status = 'Done' if (results and len(results) == expected_trials and
all(results)) else 'Running'

filtered_results = [(i, stat) for i, stat in enumerate(results) if stat]

if filtered_results:
Expand All @@ -279,6 +282,54 @@ def match_benchmark(self, benchmark_id: str, results: list[evaluator.Result],

return self._create_benchmark(benchmark_id, status, result)

def _get_expected_trials_count(self, benchmark_id: str) -> int:
"""Returns the expected number of trials for a benchmark."""
env_trial_count = os.environ.get('BENCHMARK_TRIAL_COUNT')
if env_trial_count:
try:
return int(env_trial_count)
except ValueError:
pass

# Check fuzz_targets directory for new experiments
fuzz_targets_dir = os.path.join(self._results_dir, benchmark_id,
'fuzz_targets')
if FileSystem(fuzz_targets_dir).exists():
# For new experiments, use the max trial ID as the expected count
# This handles the case where trials come out of order
max_trial_id = 0
trial_ids = set()

for filename in FileSystem(fuzz_targets_dir).listdir():
if os.path.splitext(filename)[1] in TARGET_EXTS:
# Extract trial ID from filenames like "01.fuzz_target"
trial_id = os.path.splitext(filename)[0]
trial_ids.add(trial_id)
try:
trial_num = int(trial_id)
max_trial_id = max(max_trial_id, trial_num)
except ValueError:
pass

if max_trial_id > 0:
return max_trial_id

# Fallback: if we couldn't parse trial IDs as integers, use the count
if trial_ids:
return len(trial_ids)

# Check raw_targets directory for older experiments
raw_targets_dir = os.path.join(self._results_dir, benchmark_id,
'raw_targets')
if FileSystem(raw_targets_dir).exists():
targets = [
f for f in FileSystem(raw_targets_dir).listdir()
if os.path.splitext(f)[1] in TARGET_EXTS
]
return len(targets)

return 1

def get_final_target_code(self, benchmark: str, sample: str) -> str:
"""Gets the targets of benchmark |benchmark| with sample ID |sample|."""
targets_dir = os.path.join(self._results_dir, benchmark, 'fixed_targets')
Expand Down
3 changes: 3 additions & 0 deletions run_all_experiments.py
Original file line number Diff line number Diff line change
Expand Up @@ -535,6 +535,9 @@ def main():
_setup_logging(args.log_level, is_cloud=args.cloud_experiment_name != '')
logger.info('Starting experiments on PR branch')

# Store the trial count in an environment variable
os.environ['BENCHMARK_TRIAL_COUNT'] = str(args.num_samples)

# Capture time at start
start = time.time()
add_to_json_report(args.work_dir, 'start_time',
Expand Down