Updated fit, score, predict argument names.

Alessandro Lucantonio · Alessandro Lucantonio · commit 118a52a30de7 · 2025-03-07T14:02:49.000+01:00
diff --git a/environment.yaml b/environment.yaml
@@ -27,3 +27,4 @@ dependencies:
       - tox
       - mygrad
       - tox-conda
+      - pmlb
diff --git a/src/alpine/gp/regressor.py b/src/alpine/gp/regressor.py
@@ -414,9 +414,9 @@ def mapper(f, individuals, toolbox_ref):
 
     def fit(self, X_train, y_train=None, X_val=None, y_val=None):
         """Fits the training data using GP-based symbolic regression."""
-        train_data = {"X_train": X_train, "y_train": y_train}
+        train_data = {"X": X_train, "y": y_train}
         if self.validate and X_val is not None:
-            val_data = {"X_val": X_val, "y_val": y_val}
+            val_data = {"X": X_val, "y": y_val}
             datasets = {"train": train_data, "val": val_data}
         else:
             datasets = {"train": train_data}
@@ -427,7 +427,7 @@ def fit(self, X_train, y_train=None, X_val=None, y_val=None):
         self.__run()
 
     def predict(self, X_test):
-        test_data = {"X_test": X_test}
+        test_data = {"X": X_test}
         datasets = {"test": test_data}
         self.__store_datasets(datasets)
         self.__register_predict_func()
@@ -438,7 +438,7 @@ def score(self, X_test, y_test):
         """Computes the error metric (passed to the `GPSymbolicRegressor` constructor)
         on a given dataset.
         """
-        test_data = {"X_test": X_test, "y_test": y_test}
+        test_data = {"X": X_test, "y": y_test}
         datasets = {"test": test_data}
         self.__store_datasets(datasets)
         self.__register_score_func()
diff --git a/tests/test_basic_sr.py b/tests/test_basic_sr.py
@@ -39,38 +39,38 @@ def eval_MSE_sol(individual, X, y):
 
 
 @ray.remote
-def predict(individuals_str, toolbox, X_test):
+def predict(individuals_str, toolbox, X):
 
     callables = compile_individuals(toolbox, individuals_str)
 
     u = [None] * len(individuals_str)
 
     for i, ind in enumerate(callables):
-        _, u[i] = eval_MSE_sol(ind, X_test, None)
+        _, u[i] = eval_MSE_sol(ind, X, None)
 
     return u
 
 
 @ray.remote
-def score(individuals_str, toolbox, X_test, y_test):
+def score(individuals_str, toolbox, X, y):
 
     callables = compile_individuals(toolbox, individuals_str)
 
     MSE = [None] * len(individuals_str)
 
     for i, ind in enumerate(callables):
-        MSE[i], _ = eval_MSE_sol(ind, X_test, y_test)
+        MSE[i], _ = eval_MSE_sol(ind, X, y)
 
     return MSE
 
 
 @ray.remote
-def fitness(individuals_str, toolbox, X_train, y_train):
+def fitness(individuals_str, toolbox, X, y):
     callables = compile_individuals(toolbox, individuals_str)
 
     fitnesses = [None] * len(individuals_str)
     for i, ind in enumerate(callables):
-        MSE, _ = eval_MSE_sol(ind, X_train, y_train)
+        MSE, _ = eval_MSE_sol(ind, X, y)
 
         fitnesses[i] = (MSE,)
 
diff --git a/tests/test_poisson1d.py b/tests/test_poisson1d.py
@@ -37,7 +37,7 @@ def eval_MSE_sol(
     residual: Callable, X, y, S: SimplicialComplex, u_0: C.CochainP0
 ) -> float:
 
-    num_nodes = X.shape[1]
+    num_nodes = S.num_nodes
 
     # need to call config again before using JAX in energy evaluations to make sure that
     # the current worker has initialized JAX
@@ -57,47 +57,48 @@ def obj(x, y):
 
     MSE = 0.0
 
-    u = []
+    us = []
 
-    for i, curr_y in enumerate(y):
+    for i, curr_force in enumerate(X):
         # set additional arguments of the objective function
         # (apart from the vector of unknowns)
-        args = {"y": curr_y}
+        args = {"y": curr_force}
         prb.set_obj_args(args)
 
         # minimize the objective
-        x = prb.solve(
+        u = prb.solve(
             x0=u_0.coeffs.flatten(), ftol_abs=1e-12, ftol_rel=1e-12, maxeval=1000
         )
 
-        if (
-            prb.last_opt_result == 1
-            or prb.last_opt_result == 3
-            or prb.last_opt_result == 4
-        ):
+        if y is not None:
+            if (
+                prb.last_opt_result == 1
+                or prb.last_opt_result == 3
+                or prb.last_opt_result == 4
+            ):
 
-            current_err = np.linalg.norm(x - X[i, :]) ** 2
-        else:
-            current_err = math.nan
+                current_err = np.linalg.norm(u - y[i, :]) ** 2
+            else:
+                current_err = math.nan
 
-        if math.isnan(current_err):
-            MSE = 1e5
-            break
+            if math.isnan(current_err):
+                MSE = 1e5
+                break
 
-        MSE += current_err
+            MSE += current_err
 
-        u.append(x)
+        us.append(u)
 
-    MSE *= 1 / X.shape[0]
+    MSE *= 1 / num_nodes
 
-    return MSE, u
+    return MSE, us
 
 
 @ray.remote
 def predict(
     individuals_str: list[str],
     toolbox,
-    X_test,
+    X,
     S: SimplicialComplex,
     u_0: C.CochainP0,
     penalty: dict,
@@ -108,7 +109,7 @@ def predict(
     u = [None] * len(individuals_str)
 
     for i, ind in enumerate(callables):
-        _, u[i] = eval_MSE_sol(ind, X_test, None, S, u_0)
+        _, u[i] = eval_MSE_sol(ind, X, None, S, u_0)
 
     return u
 
@@ -117,8 +118,8 @@ def predict(
 def score(
     individuals_str: list[str],
     toolbox,
-    X_test,
-    y_test,
+    X,
+    y,
     S: SimplicialComplex,
     u_0: C.CochainP0,
     penalty: dict,
@@ -129,7 +130,7 @@ def score(
     MSE = [None] * len(individuals_str)
 
     for i, ind in enumerate(callables):
-        MSE[i], _ = eval_MSE_sol(ind, X_test, y_test, S, u_0)
+        MSE[i], _ = eval_MSE_sol(ind, X, y, S, u_0)
 
     return MSE
 
@@ -138,8 +139,8 @@ def score(
 def fitness(
     individuals_str: list[str],
     toolbox,
-    X_train,
-    y_train,
+    X,
+    y,
     S: SimplicialComplex,
     u_0: C.CochainP0,
     penalty: dict,
@@ -150,7 +151,7 @@ def fitness(
 
     fitnesses = [None] * len(individuals_str)
     for i, ind in enumerate(callables):
-        MSE, _ = eval_MSE_sol(ind, X_train, y_train, S, u_0)
+        MSE, _ = eval_MSE_sol(ind, X, y, S, u_0)
 
         # add penalty on length of the tree to promote simpler solutions
         fitnesses[i] = (MSE + penalty["reg_param"] * indlen[i],)
@@ -181,8 +182,9 @@ def test_poisson1d(set_test_dir, yamlfile):
     # Delta u + f = 0, where Delta is the discrete Laplace-de Rham operator
     f = C.laplacian(u)
     f.coeffs *= -1.0
-    X_train = np.array([u.coeffs.flatten()], dtype=dctkit.float_dtype)
-    y_train = np.array([f.coeffs.flatten()], dtype=dctkit.float_dtype)
+
+    X_train = np.array([f.coeffs.flatten()], dtype=dctkit.float_dtype)
+    y_train = np.array([u.coeffs.flatten()], dtype=dctkit.float_dtype)
 
     # initial guess for the unknown of the Poisson problem (cochain of nodals values)
     u_0_vec = np.zeros(num_nodes, dtype=dctkit.float_dtype)
@@ -219,15 +221,15 @@ def test_poisson1d(set_test_dir, yamlfile):
         **regressor_params
     )
 
-    train_data = Dataset("D", X_train, y_train)
+    # train_data = Dataset("D", X_train, y_train)
 
     gpsr.fit(X_train, y_train, X_val=X_train, y_val=y_train)
 
     u_best = gpsr.predict(X_train)
 
     fit_score = gpsr.score(X_train, y_train)
 
-    gpsr.save_best_test_sols(train_data, "./")
+    # gpsr.save_best_test_sols(train_data, "./")
 
     ray.shutdown()
     assert np.allclose(u.coeffs.flatten(), np.ravel(u_best))

-Original file line number
+Diff line change
       - tox
       - mygrad
       - tox-conda
 +      - pmlb