Skip to content

Commit f6a7d7b

Browse files
committed
Added metric minimization/maximization option and a test case
1 parent 079c8d8 commit f6a7d7b

File tree

2 files changed

+60
-1
lines changed

2 files changed

+60
-1
lines changed

openevolve/prompt/sampler.py

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -186,12 +186,16 @@ def _identify_improvement_areas(
186186
# Only compare numeric metrics
187187
if not isinstance(value, (int, float)) or isinstance(value, bool):
188188
continue
189+
190+
# if metric ends with (-), then it is a metric that we want to minimize
191+
if metric.endswith("(-)"):
192+
value = -value
189193

190194
improved = True
191195
regressed = True
192196

193197
for attempt in recent_attempts:
194-
attempt_value = attempt["metrics"].get(metric, 0)
198+
attempt_value = -attempt["metrics"].get(metric, 0) if metric.endswith("(-)") else attempt["metrics"].get(metric, 0)
195199
# Only compare if both values are numeric
196200
if isinstance(value, (int, float)) and isinstance(attempt_value, (int, float)):
197201
if attempt_value <= value:

tests/test_prompt_sampler.py

Lines changed: 55 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -48,6 +48,61 @@ def test_build_prompt(self):
4848
self.assertIn("def test(): pass", prompt["user"])
4949
self.assertIn("score: 0.5", prompt["user"])
5050

51+
def test_metric_minimization_feature(self):
52+
"""Test that metrics starting with '-' are handled correctly for minimization"""
53+
current_program = "def test(): pass"
54+
parent_program = "def test(): pass"
55+
56+
# Test with both regular and minimization metrics
57+
program_metrics = {
58+
"improvement" : 0.3,
59+
"improvement(-)" : 0.1,
60+
"mixed" : 0.3,
61+
"mixed(-)" : 0.3,
62+
"regression" : 0.1,
63+
"regression(-)" : 0.5,
64+
}
65+
66+
# Create previous programs with different metric values to test comparison logic
67+
previous_programs = [
68+
{
69+
"id": "prev1",
70+
"code": "def prev1(): pass",
71+
"metrics": {
72+
"improvement" : 0.1,
73+
"improvement(-)" : 0.2,
74+
"mixed" : 0.1,
75+
"mixed(-)" : 0.5,
76+
"regression" : 0.5,
77+
"regression(-)" : 0.3,
78+
},
79+
},
80+
{
81+
"id": "prev2",
82+
"code": "def prev2(): pass",
83+
"metrics": {
84+
"improvement" : 0.2,
85+
"improvement(-)" : 0.3,
86+
"mixed" : 0.5,
87+
"mixed(-)" : 0.1,
88+
"regression" : 0.7,
89+
"regression(-)" : 0.2,
90+
},
91+
}
92+
]
5193

94+
response = self.prompt_sampler._identify_improvement_areas(
95+
current_program=current_program,
96+
parent_program=parent_program,
97+
metrics=program_metrics,
98+
previous_programs=previous_programs
99+
)
100+
expected_response = [
101+
"- Metrics showing improvement: improvement, improvement(-). Consider continuing with similar changes.",
102+
"- Metrics showing regression: regression, regression(-). Consider reverting or revising recent changes in these areas."
103+
]
104+
expected_response = "\n".join(expected_response)
105+
self.assertEqual(response, expected_response)
106+
52107
if __name__ == "__main__":
53108
unittest.main()

0 commit comments

Comments
 (0)