Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 5 additions & 1 deletion openevolve/prompt/sampler.py
Original file line number Diff line number Diff line change
Expand Up @@ -186,12 +186,16 @@ def _identify_improvement_areas(
# Only compare numeric metrics
if not isinstance(value, (int, float)) or isinstance(value, bool):
continue

# if metric ends with (-), then it is a metric that we want to minimize
if metric.endswith("(-)"):
value = -value

improved = True
regressed = True

for attempt in recent_attempts:
attempt_value = attempt["metrics"].get(metric, 0)
attempt_value = -attempt["metrics"].get(metric, 0) if metric.endswith("(-)") else attempt["metrics"].get(metric, 0)
# Only compare if both values are numeric
if isinstance(value, (int, float)) and isinstance(attempt_value, (int, float)):
if attempt_value <= value:
Expand Down
55 changes: 55 additions & 0 deletions tests/test_prompt_sampler.py
Original file line number Diff line number Diff line change
Expand Up @@ -48,6 +48,61 @@ def test_build_prompt(self):
self.assertIn("def test(): pass", prompt["user"])
self.assertIn("score: 0.5", prompt["user"])

def test_metric_minimization_feature(self):
"""Test that metrics starting with '-' are handled correctly for minimization"""
current_program = "def test(): pass"
parent_program = "def test(): pass"

# Test with both regular and minimization metrics
program_metrics = {
"improvement" : 0.3,
"improvement(-)" : 0.1,
"mixed" : 0.3,
"mixed(-)" : 0.3,
"regression" : 0.1,
"regression(-)" : 0.5,
}

# Create previous programs with different metric values to test comparison logic
previous_programs = [
{
"id": "prev1",
"code": "def prev1(): pass",
"metrics": {
"improvement" : 0.1,
"improvement(-)" : 0.2,
"mixed" : 0.1,
"mixed(-)" : 0.5,
"regression" : 0.5,
"regression(-)" : 0.3,
},
},
{
"id": "prev2",
"code": "def prev2(): pass",
"metrics": {
"improvement" : 0.2,
"improvement(-)" : 0.3,
"mixed" : 0.5,
"mixed(-)" : 0.1,
"regression" : 0.7,
"regression(-)" : 0.2,
},
}
]

response = self.prompt_sampler._identify_improvement_areas(
current_program=current_program,
parent_program=parent_program,
metrics=program_metrics,
previous_programs=previous_programs
)
expected_response = [
"- Metrics showing improvement: improvement, improvement(-). Consider continuing with similar changes.",
"- Metrics showing regression: regression, regression(-). Consider reverting or revising recent changes in these areas."
]
expected_response = "\n".join(expected_response)
self.assertEqual(response, expected_response)

if __name__ == "__main__":
unittest.main()
Loading