algorithmicsuperintelligence · SuhailB · Jul 8, 2025
diff --git a/openevolve/prompt/sampler.py b/openevolve/prompt/sampler.py
@@ -186,12 +186,16 @@ def _identify_improvement_areas(
                 # Only compare numeric metrics
                 if not isinstance(value, (int, float)) or isinstance(value, bool):
                     continue
+
+                # if metric ends with (-), then it is a metric that we want to minimize
+                if metric.endswith("(-)"):
+                    value = -value
 
                 improved = True
                 regressed = True
 
                 for attempt in recent_attempts:
-                    attempt_value = attempt["metrics"].get(metric, 0)
+                    attempt_value = -attempt["metrics"].get(metric, 0) if metric.endswith("(-)") else attempt["metrics"].get(metric, 0)
                     # Only compare if both values are numeric
                     if isinstance(value, (int, float)) and isinstance(attempt_value, (int, float)):
                         if attempt_value <= value:

diff --git a/tests/test_prompt_sampler.py b/tests/test_prompt_sampler.py
@@ -48,6 +48,61 @@ def test_build_prompt(self):
         self.assertIn("def test(): pass", prompt["user"])
         self.assertIn("score: 0.5", prompt["user"])
 
+    def test_metric_minimization_feature(self):
+        """Test that metrics starting with '-' are handled correctly for minimization"""
+        current_program = "def test(): pass"
+        parent_program = "def test(): pass"
+
+        # Test with both regular and minimization metrics
+        program_metrics = {
+            "improvement"      : 0.3,          
+            "improvement(-)"   : 0.1,  
+            "mixed"            : 0.3,
+            "mixed(-)"         : 0.3,
+            "regression"       : 0.1,
+            "regression(-)"    : 0.5,
+        }
+
+        # Create previous programs with different metric values to test comparison logic
+        previous_programs = [
+            {
+                "id": "prev1",
+                "code": "def prev1(): pass",
+                "metrics": {
+                    "improvement"      : 0.1,
+                    "improvement(-)"   : 0.2,  
+                    "mixed"            : 0.1,
+                    "mixed(-)"         : 0.5,
+                    "regression"       : 0.5,
+                    "regression(-)"    : 0.3,
+                },
+            },
+            {
+                "id": "prev2", 
+                "code": "def prev2(): pass",
+                "metrics": {
+                    "improvement"      : 0.2,
+                    "improvement(-)"   : 0.3,
+                    "mixed"            : 0.5,
+                    "mixed(-)"         : 0.1,
+                    "regression"       : 0.7,
+                    "regression(-)"    : 0.2,
+                },
+            }
+        ]
 
+        response = self.prompt_sampler._identify_improvement_areas(
+            current_program=current_program,
+            parent_program=parent_program,
+            metrics=program_metrics,
+            previous_programs=previous_programs
+        )
+        expected_response = [
+        "- Metrics showing improvement: improvement, improvement(-). Consider continuing with similar changes.",
+        "- Metrics showing regression: regression, regression(-). Consider reverting or revising recent changes in these areas."
+        ]
+        expected_response = "\n".join(expected_response)
+        self.assertEqual(response, expected_response)
+
 if __name__ == "__main__":
     unittest.main()