From 10872209f9891030d91051313b714268118d687e Mon Sep 17 00:00:00 2001 From: Alessandro Lucantonio Date: Wed, 5 Mar 2025 11:20:39 +0100 Subject: [PATCH 01/23] Fixed issues with setting CPU device for JAX-based fitness evals. --- environment.yaml | 5 +- examples/poisson.ipynb | 256 ----------------------------------------- tests/test_basic_sr.py | 44 ++++--- 3 files changed, 33 insertions(+), 272 deletions(-) delete mode 100644 examples/poisson.ipynb diff --git a/environment.yaml b/environment.yaml index 3327f64..c77f8b0 100644 --- a/environment.yaml +++ b/environment.yaml @@ -4,11 +4,11 @@ channels: - defaults dependencies: - gmsh - - jax + - jax==0.5.0 - jaxopt - numpy - pygmo - - python + - python==3.12 - python-gmsh - trame - ipywidgets @@ -26,3 +26,4 @@ dependencies: - pygmsh - tox - mygrad + - tox-conda diff --git a/examples/poisson.ipynb b/examples/poisson.ipynb deleted file mode 100644 index 428d630..0000000 --- a/examples/poisson.ipynb +++ /dev/null @@ -1,256 +0,0 @@ -{ - "cells": [ - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "from dctkit.dec import cochain as C\n", - "from dctkit.mesh.simplex import SimplicialComplex\n", - "from dctkit.mesh.util import generate_line_mesh, build_complex_from_mesh\n", - "from dctkit.math.opt import optctrl as oc\n", - "import matplotlib.pyplot as plt\n", - "from deap import gp\n", - "from alpine.gp import gpsymbreg as gps\n", - "from alpine.data import Dataset\n", - "from dctkit import config\n", - "import dctkit\n", - "import numpy as np\n", - "import ray\n", - "import math\n", - "import yaml\n", - "from typing import Tuple, Callable, List\n", - "import numpy.typing as npt" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "# choose precision and whether to use GPU or CPU\n", - "# needed for context of the plots at the end of the evolution\n", - "config()\n", - "\n", - "def eval_MSE_sol(residual: Callable, D: Dataset, S: SimplicialComplex, u_0: C.CochainP0) -> float:\n", - "\n", - " num_nodes = D.X.shape[1]\n", - "\n", - " # need to call config again before using JAX in energy evaluations to make sure that\n", - " # the current worker has initialized JAX\n", - " config()\n", - "\n", - "\n", - " # objective: squared norm of the residual of the equation + penalty on Dirichlet \n", - " # boundary condition on the first node\n", - " def obj(x, y):\n", - " penalty = 100.*x[0]**2\n", - " u = C.CochainP0(S, x)\n", - " f = C.CochainP0(S, y)\n", - " r = residual(u, f)\n", - " total_energy = C.inner(r, r) + penalty\n", - " return total_energy\n", - "\n", - " prb = oc.OptimizationProblem(dim=num_nodes, state_dim=num_nodes, objfun=obj)\n", - "\n", - " total_err = 0.\n", - "\n", - " best_sols = []\n", - "\n", - " for i, curr_y in enumerate(D.y):\n", - " # set additional arguments of the objective function (apart from the vector of unknowns)\n", - " args = {'y': curr_y}\n", - " prb.set_obj_args(args)\n", - "\n", - " # minimize the objective\n", - " x = prb.solve(x0=u_0.coeffs.flatten(), ftol_abs=1e-12, ftol_rel=1e-12, maxeval=1000)\n", - "\n", - " if (prb.last_opt_result == 1 or prb.last_opt_result == 3\n", - " or prb.last_opt_result == 4):\n", - "\n", - " current_err = np.linalg.norm(x-D.X[i, :])**2\n", - " else:\n", - " current_err = math.nan\n", - "\n", - " if math.isnan(current_err):\n", - " total_err = 1e5\n", - " break\n", - "\n", - " total_err += current_err\n", - "\n", - " best_sols.append(x)\n", - "\n", - " total_err *= 1/D.X.shape[0]\n", - "\n", - " return total_err, best_sols" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "@ray.remote\n", - "def predict(individuals_batch: list[gp.PrimitiveSetTyped], toolbox, D: Dataset,\n", - " S: SimplicialComplex, u_0: C.CochainP0, penalty: dict) -> List[npt.NDArray]:\n", - "\n", - " best_sols = [None]*len(individuals_batch)\n", - "\n", - " for i, individual in enumerate(individuals_batch):\n", - " callable = toolbox.compile(expr=individual)\n", - " _, best_sols[i] = eval_MSE_sol(callable, D, S, u_0)\n", - "\n", - " return best_sols" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "@ray.remote\n", - "def fitness(individuals_batch: list[gp.PrimitiveSetTyped], toolbox, D: Dataset,\n", - " S: SimplicialComplex, u_0: C.CochainP0, penalty: dict) -> Tuple[float, ]:\n", - "\n", - " objvals = [None]*len(individuals_batch)\n", - "\n", - " for i, individual in enumerate(individuals_batch):\n", - " callable = toolbox.compile(expr=individual)\n", - " # add penalty on length of the tree to promote simpler solutions\n", - " objval, _ = eval_MSE_sol(callable, D, S, u_0) #+ penalty[\"reg_param\"]*indlen)\n", - " objvals[i] = (objval,)\n", - "\n", - " return objvals" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "def stgp_poisson():\n", - " with open(\"poisson.yaml\") as config_file:\n", - " config_file_data = yaml.safe_load(config_file)\n", - "\n", - " # generate mesh and dataset\n", - " mesh, _ = generate_line_mesh(num_nodes=11, L=1.)\n", - " S = build_complex_from_mesh(mesh)\n", - " S.get_hodge_star()\n", - " x = S.node_coords \n", - " num_nodes = S.num_nodes\n", - "\n", - " # generate training and test datasets\n", - " # exact solution = x² \n", - " u = C.CochainP0(S, np.array(x[:,0]**2, dtype=dctkit.float_dtype))\n", - " # compute source term such that u solves the discrete Poisson equation \n", - " # Delta u + f = 0, where Delta is the discrete Laplace-de Rham operator\n", - " f = C.laplacian(u)\n", - " f.coeffs *= -1.\n", - " X_train = np.array([u.coeffs.flatten()],dtype=dctkit.float_dtype)\n", - " y_train = np.array([f.coeffs.flatten()], dtype=dctkit.float_dtype)\n", - "\n", - " # initial guess for the unknown of the Poisson problem (cochain of nodals values)\n", - " u_0_vec = np.zeros(num_nodes, dtype=dctkit.float_dtype)\n", - " u_0 = C.CochainP0(S, u_0_vec)\n", - "\n", - " # define primitive set for the residual of the discrete Poisson equation\n", - " pset = gp.PrimitiveSetTyped(\"RESIDUAL\", [C.CochainP0, C.CochainP0], C.CochainP0)\n", - "\n", - " # rename arguments of the residual\n", - " pset.renameArguments(ARG0=\"u\")\n", - " pset.renameArguments(ARG1=\"f\")\n", - "\n", - " penalty = config_file_data[\"gp\"][\"penalty\"]\n", - " common_params = {'S': S, 'u_0': u_0, 'penalty': penalty}\n", - "\n", - " gpsr = gps.GPSymbolicRegressor(pset=pset, fitness=fitness.remote,\n", - " predict_func=predict.remote, common_data=common_params,\n", - " print_log=True, \n", - " config_file_data=config_file_data)\n", - "\n", - "\n", - "\n", - " train_data = Dataset(\"D\", X_train, y_train)\n", - " gpsr.fit(train_data)\n", - "\n", - " u_best = gpsr.predict(train_data)\n", - "\n", - " ray.shutdown()\n", - " plt.figure()\n", - " plt.plot(x[:,0], u.coeffs.flatten())\n", - " plt.plot(x[:,0], np.ravel(u_best), \"ro\")\n", - " plt.show()\n" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "stgp_poisson()\n" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [] - } - ], - "metadata": { - "kernelspec": { - "display_name": "Python 3 (ipykernel)", - "language": "python", - "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.10.14" - } - }, - "nbformat": 4, - "nbformat_minor": 4 -} diff --git a/tests/test_basic_sr.py b/tests/test_basic_sr.py index d8f7795..2676297 100644 --- a/tests/test_basic_sr.py +++ b/tests/test_basic_sr.py @@ -7,7 +7,8 @@ import jax.numpy as jnp import ray -# Define new functions +# sets CPU device for JAX at process level +os.environ["JAX_PLATFORMS"] = "cpu" config() @@ -16,16 +17,19 @@ def compile_individuals(toolbox, individuals_str_batch): return [toolbox.compile(expr=ind) for ind in individuals_str_batch] -x = jnp.array([x/10. for x in range(-10, 10)]) +x = jnp.array([x / 10.0 for x in range(-10, 10)]) y = x**4 + x**3 + x**2 + x def eval_MSE_sol(individual, true_data): + import os + + os.environ["JAX_PLATFORMS"] = "cpu" config() # Evaluate the mean squared error between the expression # and the real function : x**4 + x**3 + x**2 + x y_pred = individual(true_data.X) - MSE = jnp.sum(jnp.square(y_pred-true_data.y)) / len(true_data.X) + MSE = jnp.sum(jnp.square(y_pred - true_data.y)) / len(true_data.X) if jnp.isnan(MSE): MSE = 1e5 return MSE, y_pred @@ -36,7 +40,7 @@ def predict(individuals_str, toolbox, true_data): callables = compile_individuals(toolbox, individuals_str) - u = [None]*len(individuals_str) + u = [None] * len(individuals_str) for i, ind in enumerate(callables): _, u[i] = eval_MSE_sol(ind, true_data) @@ -49,7 +53,7 @@ def score(individuals_str, toolbox, true_data): callables = compile_individuals(toolbox, individuals_str) - MSE = [None]*len(individuals_str) + MSE = [None] * len(individuals_str) for i, ind in enumerate(callables): MSE[i], _ = eval_MSE_sol(ind, true_data) @@ -61,7 +65,7 @@ def score(individuals_str, toolbox, true_data): def fitness(individuals_str, toolbox, true_data): callables = compile_individuals(toolbox, individuals_str) - fitnesses = [None]*len(individuals_str) + fitnesses = [None] * len(individuals_str) for i, ind in enumerate(callables): MSE, _ = eval_MSE_sol(ind, true_data) @@ -76,18 +80,30 @@ def test_basic_sr(set_test_dir): with open(filename) as config_file: config_file_data = yaml.safe_load(config_file) - pset = gp.PrimitiveSetTyped("MAIN", [float,], float) + pset = gp.PrimitiveSetTyped( + "MAIN", + [ + float, + ], + float, + ) pset.addPrimitive(jnp.add, [float, float], float, "AddF") - pset.renameArguments(ARG0='x') + pset.renameArguments(ARG0="x") common_data = {} seed = [ - "AddF(AddF(AddF(MulF(MulF(x, MulF(x, x)),x), MulF(x,MulF(x, x))), MulF(x, x)), x)"] # noqa: E501 - gpsr = GPSymbolicRegressor(pset=pset, fitness=fitness.remote, - error_metric=score.remote, predict_func=predict.remote, - common_data=common_data, - config_file_data=config_file_data, - seed=seed, batch_size=10) + "AddF(AddF(AddF(MulF(MulF(x, MulF(x, x)),x), MulF(x,MulF(x, x))), MulF(x, x)), x)" + ] # noqa: E501 + gpsr = GPSymbolicRegressor( + pset=pset, + fitness=fitness.remote, + error_metric=score.remote, + predict_func=predict.remote, + common_data=common_data, + config_file_data=config_file_data, + seed=seed, + batch_size=10, + ) train_data = Dataset("true_data", x, y) gpsr.fit(train_data) From 1be8be67d364baf6ecbc002d1976302722bbed3d Mon Sep 17 00:00:00 2001 From: Alessandro Lucantonio Date: Wed, 5 Mar 2025 12:48:17 +0100 Subject: [PATCH 02/23] Introducing parameters as arguments of the regressor class. --- examples/simple_sr_noyaml.py | 155 +++++++++++++++++++++++++++++++++++ src/alpine/gp/gpsymbreg.py | 114 ++++++++++++++++---------- src/alpine/gp/util.py | 38 +++++++-- tests/test_basic_sr.py | 1 + 4 files changed, 256 insertions(+), 52 deletions(-) create mode 100644 examples/simple_sr_noyaml.py diff --git a/examples/simple_sr_noyaml.py b/examples/simple_sr_noyaml.py new file mode 100644 index 0000000..4b64e27 --- /dev/null +++ b/examples/simple_sr_noyaml.py @@ -0,0 +1,155 @@ +from deap import gp +from alpine.gp.gpsymbreg import GPSymbolicRegressor +from alpine.data import Dataset +import numpy as np +import ray +import warnings +import re +from alpine.gp import util + + +def compile_individuals(toolbox, individuals_str_batch): + return [toolbox.compile(expr=ind) for ind in individuals_str_batch] + + +# Ground truth +x = np.array([x / 10.0 for x in range(-10, 10)]) +y = x**4 + x**3 + x**2 + x + + +def check_trig_fn(ind): + return len(re.findall("cos", str(ind))) + len(re.findall("sin", str(ind))) + + +def check_nested_trig_fn(ind): + return util.detect_nested_trigonometric_functions(str(ind)) + + +def get_features_batch( + individuals_str_batch, + individ_feature_extractors=[len, check_nested_trig_fn, check_trig_fn], +): + features_batch = [ + [fe(i) for i in individuals_str_batch] for fe in individ_feature_extractors + ] + + individ_length = features_batch[0] + nested_trigs = features_batch[1] + num_trigs = features_batch[2] + return individ_length, nested_trigs, num_trigs + + +def eval_MSE_sol(individual, true_data): + warnings.filterwarnings("ignore") + + y_pred = individual(true_data.X) + MSE = np.mean(np.square(y_pred - true_data.y)) + if np.isnan(MSE): + MSE = 1e5 + return MSE, y_pred + + +@ray.remote +def predict(individuals_str, toolbox, true_data, penalty): + + callables = compile_individuals(toolbox, individuals_str) + + u = [None] * len(individuals_str) + + for i, ind in enumerate(callables): + _, u[i] = eval_MSE_sol(ind, true_data) + + return u + + +@ray.remote +def score(individuals_str, toolbox, true_data, penalty): + + callables = compile_individuals(toolbox, individuals_str) + + MSE = [None] * len(individuals_str) + + for i, ind in enumerate(callables): + MSE[i], _ = eval_MSE_sol(ind, true_data) + + return MSE + + +@ray.remote +def fitness(individuals_str, toolbox, true_data, penalty): + callables = compile_individuals(toolbox, individuals_str) + + individ_length, nested_trigs, num_trigs = get_features_batch(individuals_str) + + fitnesses = [None] * len(individuals_str) + for i, ind in enumerate(callables): + if individ_length[i] >= 50: + fitnesses[i] = (1e8,) + else: + MSE, _ = eval_MSE_sol(ind, true_data) + + fitnesses[i] = ( + MSE + + 100000 * nested_trigs[i] + + penalty["reg_param"] * individ_length[i], + ) + + return fitnesses + + +def main(): + + pset = gp.PrimitiveSetTyped( + "MAIN", + [ + float, + ], + float, + ) + pset.renameArguments(ARG0="x") + + primitives = { + "imports": {"alpine.gp.numpy_primitives": ["numpy_primitives"]}, + "used": [ + {"name": "add", "dimension": None, "rank": None}, + {"name": "sub", "dimension": None, "rank": None}, + {"name": "mul", "dimension": None, "rank": None}, + {"name": "div", "dimension": None, "rank": None}, + {"name": "sin", "dimension": None, "rank": None}, + {"name": "cos", "dimension": None, "rank": None}, + {"name": "exp", "dimension": None, "rank": None}, + {"name": "log", "dimension": None, "rank": None}, + ], + } + + pset = util.add_primitives_to_pset_from_dict(pset, primitives) + + penalty = {"reg_param": 0.0} + common_data = {"penalty": penalty} + + gpsr = GPSymbolicRegressor( + pset=pset, + fitness=fitness.remote, + error_metric=score.remote, + predict_func=predict.remote, + common_data=common_data, + NINDIVIDUALS=100, + num_islands=10, + NGEN=200, + MUTPB=0.1, + min_height=2, + max_height=6, + crossover_prob=0.9, + overlapping_generation=True, + print_log=True, + batch_size=100, + ) + + train_data = Dataset("true_data", x, y) + gpsr.fit(train_data) + + ray.shutdown() + + +if __name__ == "__main__": + main() diff --git a/src/alpine/gp/gpsymbreg.py b/src/alpine/gp/gpsymbreg.py index a2756c5..993eb1f 100644 --- a/src/alpine/gp/gpsymbreg.py +++ b/src/alpine/gp/gpsymbreg.py @@ -66,23 +66,38 @@ def __init__( self, pset: gp.PrimitiveSet | gp.PrimitiveSetTyped, fitness: Callable, - error_metric: Callable | None = None, - predict_func: Callable | None = None, - common_data: Dict | None = None, - toolbox: base.Toolbox = None, - individualCreator: gp.PrimitiveTree = None, + select_fun: str = "tools.selection.tournament_with_elitism", + select_args: str = "{'num_elitist': self.n_elitist, 'tournsize': 3, 'stochastic_tourn': { 'enabled': False, 'prob': [0.8, 0.2] }}", + mut_fun: str = "gp.mutUniform", + mut_args: str = "{'expr': self.toolbox.expr_mut, 'pset': self.pset}", + expr_mut_fun: str = "gp.genHalfAndHalf", + expr_mut_args: str = "{'min_': 1, 'max_': 3}", + crossover_fun: str = "gp.cxOnePoint", + crossover_args: str = "{}", + min_height: int = 1, + max_height: int = 3, NINDIVIDUALS: int = 10, NGEN: int = 1, num_islands: int = 1, + mig_freq: int = 10, + mig_frac: float = 0.05, crossover_prob: float = 0.5, MUTPB: float = 0.2, frac_elitist: float = 0.0, overlapping_generation: bool = False, + immigration_enabled: bool = False, + immigration_freq: int = 0, + immigration_frac: float = 0.0, + error_metric: Callable | None = None, + predict_func: Callable | None = None, + common_data: Dict | None = None, + # toolbox: base.Toolbox = None, + # individualCreator: gp.PrimitiveTree = None, validate: bool = False, preprocess_func: Callable | None = None, callback_func: Callable | None = None, seed: List[str] | None = None, - config_file_data: Dict | None = None, + # config_file_data: Dict | None = None, plot_history: bool = False, print_log: bool = False, num_best_inds_str: int = 1, @@ -124,24 +139,38 @@ def __init__( # FIXME: does everything work when the functions do not have common args? self.store_fit_error_common_args(common_data) - if config_file_data is not None: - self.__load_config_data(config_file_data) - else: - self.NINDIVIDUALS = NINDIVIDUALS - self.NGEN = NGEN - self.num_islands = num_islands - self.crossover_prob = crossover_prob - self.MUTPB = MUTPB - - self.overlapping_generation = overlapping_generation - self.validate = validate - - # Elitism settings - self.n_elitist = int(frac_elitist * self.NINDIVIDUALS) - - self.createIndividual = individualCreator - - self.toolbox = toolbox + self.NINDIVIDUALS = NINDIVIDUALS + self.NGEN = NGEN + self.num_islands = num_islands + self.crossover_prob = crossover_prob + self.MUTPB = MUTPB + self.select_fun = select_fun + self.select_args = select_args + self.mut_fun = mut_fun + self.mut_args = mut_args + self.expr_mut_fun = expr_mut_fun + self.expr_mut_args = expr_mut_args + self.crossover_fun = crossover_fun + self.crossover_args = crossover_args + self.min_height = min_height + self.max_height = max_height + self.mig_freq = mig_freq + self.mig_frac = mig_frac + self.immigration_enabled = immigration_enabled + self.immigration_frac = immigration_frac + self.immigration_freq = immigration_freq + + self.overlapping_generation = overlapping_generation + self.validate = validate + + # Elitism settings + self.n_elitist = int(frac_elitist * self.NINDIVIDUALS) + + # config individual creator and toolbox + self.__creator_toolbox_config() + # self.createIndividual = individualCreator + + # self.toolbox = toolbox self.seed = seed @@ -176,31 +205,24 @@ def __init__( self.plot_initialized = False self.fig_id = 0 - def __creator_toolbox_config(self, config_file_data: Dict): + def __creator_toolbox_config(self): """Initialize toolbox and individual creator based on config file.""" self.toolbox = base.Toolbox() # SELECTION - select_fun = eval(config_file_data["gp"]["select"]["fun"]) - select_args = eval(config_file_data["gp"]["select"]["kargs"]) - self.toolbox.register("select", select_fun, **select_args) + self.toolbox.register("select", eval(self.select_fun), **eval(self.select_args)) # MUTATION - expr_mut_fun = config_file_data["gp"]["mutate"]["expr_mut"] - expr_mut_kargs = eval(config_file_data["gp"]["mutate"]["expr_mut_kargs"]) - - self.toolbox.register("expr_mut", eval(expr_mut_fun), **expr_mut_kargs) - - mutate_fun = config_file_data["gp"]["mutate"]["fun"] - mutate_kargs = eval(config_file_data["gp"]["mutate"]["kargs"]) + self.toolbox.register( + "expr_mut", eval(self.expr_mut_fun), **eval(self.expr_mut_args) + ) - self.toolbox.register("mutate", eval(mutate_fun), **mutate_kargs) + self.toolbox.register("mutate", eval(self.mut_fun), **eval(self.mut_args)) # CROSSOVER - crossover_fun = config_file_data["gp"]["crossover"]["fun"] - crossover_kargs = eval(config_file_data["gp"]["crossover"]["kargs"]) - - self.toolbox.register("mate", eval(crossover_fun), **crossover_kargs) + self.toolbox.register( + "mate", eval(self.crossover_fun), **eval(self.crossover_args) + ) self.toolbox.decorate( "mate", gp.staticLimit(key=operator.attrgetter("height"), max_value=17) ) @@ -209,17 +231,19 @@ def __creator_toolbox_config(self, config_file_data: Dict): ) # INDIVIDUAL GENERATOR/CREATOR - min_ = config_file_data["gp"]["min_"] - max_ = config_file_data["gp"]["max_"] self.toolbox.register( - "expr", gp.genHalfAndHalf, pset=self.pset, min_=min_, max_=max_ + "expr", + gp.genHalfAndHalf, + pset=self.pset, + min_=self.min_height, + max_=self.max_height, ) self.toolbox.register( "expr_pop", gp.genHalfAndHalf, pset=self.pset, - min_=min_, - max_=max_, + min_=self.min_height, + max_=self.max_height, is_pop=True, ) creator.create("FitnessMin", base.Fitness, weights=(-1.0,)) diff --git a/src/alpine/gp/util.py b/src/alpine/gp/util.py index caf1c9c..13cbb85 100644 --- a/src/alpine/gp/util.py +++ b/src/alpine/gp/util.py @@ -1,30 +1,54 @@ +from .primitives import add_primitives_to_pset +from importlib import import_module + + +def add_primitives_to_pset_from_dict(pset, primitives_dict): + primitives_collection = dict() + imports = primitives_dict["imports"].items() + + for module_name, function_names in imports: + module = import_module(module_name) + for function_name in function_names: + primitive = getattr(module, function_name) + primitives_collection = primitives_collection | primitive + + add_primitives_to_pset( + pset, + primitives_dict["used"], + primitives_collection, + ) + + return pset + + def detect_nested_trigonometric_functions(equation): # List of trigonometric functions - trig_functions = ['sin', 'cos'] + trig_functions = ["sin", "cos"] nested = 0 # Flag to indicate if nested functions are found function_depth = 0 # Track depth within trigonometric function calls i = 0 while i < len(equation) and not nested: # Look for trigonometric function - trig_found = any(equation[i:i+len(trig)].lower() == - trig for trig in trig_functions) + trig_found = any( + equation[i : i + len(trig)].lower() == trig for trig in trig_functions + ) if trig_found: # If a trig function is found, look for its opening parenthesis j = i - while j < len(equation) and equation[j] not in ['(', ' ']: + while j < len(equation) and equation[j] not in ["(", " "]: j += 1 - if j < len(equation) and equation[j] == '(': + if j < len(equation) and equation[j] == "(": if function_depth > 0: # We are already inside a trig function, this is a nested trig # function nested = 1 function_depth += 1 i = j # Move i to the position of '(' - elif equation[i] == '(' and function_depth > 0: + elif equation[i] == "(" and function_depth > 0: # Increase depth if we're already in a trig function function_depth += 1 - elif equation[i] == ')': + elif equation[i] == ")": if function_depth > 0: # Leaving a trigonometric function or nested parentheses function_depth -= 1 diff --git a/tests/test_basic_sr.py b/tests/test_basic_sr.py index 2676297..5f5ca94 100644 --- a/tests/test_basic_sr.py +++ b/tests/test_basic_sr.py @@ -94,6 +94,7 @@ def test_basic_sr(set_test_dir): seed = [ "AddF(AddF(AddF(MulF(MulF(x, MulF(x, x)),x), MulF(x,MulF(x, x))), MulF(x, x)), x)" ] # noqa: E501 + gpsr = GPSymbolicRegressor( pset=pset, fitness=fitness.remote, From 1b1d5ce4fb243fa5354ec119156a73c9c77d6aff Mon Sep 17 00:00:00 2001 From: Alessandro Lucantonio Date: Wed, 5 Mar 2025 15:01:42 +0100 Subject: [PATCH 03/23] Fixing examples. --- examples/simple_sr.py | 11 +++++++---- src/alpine/gp/gpsymbreg.py | 40 -------------------------------------- src/alpine/gp/util.py | 36 ++++++++++++++++++++++++++++++++++ 3 files changed, 43 insertions(+), 44 deletions(-) diff --git a/examples/simple_sr.py b/examples/simple_sr.py index b232c46..a802f12 100644 --- a/examples/simple_sr.py +++ b/examples/simple_sr.py @@ -1,4 +1,3 @@ -import yaml import os from deap import gp from alpine.gp.gpsymbreg import GPSymbolicRegressor @@ -102,8 +101,8 @@ def fitness(individuals_str, toolbox, true_data, penalty): def main(): yamlfile = "simple_sr.yaml" filename = os.path.join(os.path.dirname(__file__), yamlfile) - with open(filename) as config_file: - config_file_data = yaml.safe_load(config_file) + + regressor_params, config_file_data = util.load_config_data(filename) pset = gp.PrimitiveSetTyped( "MAIN", @@ -114,6 +113,10 @@ def main(): ) pset.renameArguments(ARG0="x") + pset = util.add_primitives_to_pset_from_dict( + pset, config_file_data["gp"]["primitives"] + ) + penalty = config_file_data["gp"]["penalty"] common_data = {"penalty": penalty} @@ -123,9 +126,9 @@ def main(): error_metric=score.remote, predict_func=predict.remote, common_data=common_data, - config_file_data=config_file_data, print_log=True, batch_size=100, + **regressor_params ) train_data = Dataset("true_data", x, y) diff --git a/src/alpine/gp/gpsymbreg.py b/src/alpine/gp/gpsymbreg.py index 993eb1f..3476c71 100644 --- a/src/alpine/gp/gpsymbreg.py +++ b/src/alpine/gp/gpsymbreg.py @@ -33,8 +33,6 @@ class GPSymbolicRegressor: pset: set of primitives and terminals (loosely or strongly typed). predict_func: function that returns a prediction given an individual and a test `Dataset` as inputs. - toolbox: set to None if `config_file_data` is provided. - individualCreator: set to None if `config_file_data` is provided. NINDIVIDUALS: number of individuals in the parent population. NGEN: number of generations. num_islands: number of islands (for a multi-island model). @@ -91,13 +89,10 @@ def __init__( error_metric: Callable | None = None, predict_func: Callable | None = None, common_data: Dict | None = None, - # toolbox: base.Toolbox = None, - # individualCreator: gp.PrimitiveTree = None, validate: bool = False, preprocess_func: Callable | None = None, callback_func: Callable | None = None, seed: List[str] | None = None, - # config_file_data: Dict | None = None, plot_history: bool = False, print_log: bool = False, num_best_inds_str: int = 1, @@ -260,41 +255,6 @@ def __creator_toolbox_config(self): self.createIndividual = createIndividual - def __load_config_data(self, config_file_data: Dict): - """Load problem settings from YAML file.""" - self.NINDIVIDUALS = config_file_data["gp"]["NINDIVIDUALS"] - self.NGEN = config_file_data["gp"]["NGEN"] - self.num_islands = config_file_data["gp"]["multi_island"]["num_islands"] - self.mig_freq = config_file_data["gp"]["multi_island"]["migration"]["freq"] - self.mig_frac = config_file_data["gp"]["multi_island"]["migration"]["frac"] - self.crossover_prob = config_file_data["gp"]["crossover_prob"] - self.MUTPB = config_file_data["gp"]["MUTPB"] - self.n_elitist = int(config_file_data["gp"]["frac_elitist"] * self.NINDIVIDUALS) - self.overlapping_generation = config_file_data["gp"]["overlapping_generation"] - - # generate primitives collection - primitives_collection = dict() - imports = config_file_data["gp"]["primitives"]["imports"].items() - for module_name, function_names in imports: - module = import_module(module_name) - for function_name in function_names: - primitive = getattr(module, function_name) - primitives_collection = primitives_collection | primitive - - add_primitives_to_pset( - self.pset, - config_file_data["gp"]["primitives"]["used"], - primitives_collection, - ) - - self.__creator_toolbox_config(config_file_data=config_file_data) - - self.validate = config_file_data["gp"]["validate"] - - self.immigration_enabled = config_file_data["gp"]["immigration"]["enabled"] - self.immigration_freq = config_file_data["gp"]["immigration"]["freq"] - self.immigration_frac = config_file_data["gp"]["immigration"]["frac"] - def store_fit_error_common_args(self, data: Dict): """Store names and values of the arguments that are in common between the fitness and the error metric functions in the common object space. diff --git a/src/alpine/gp/util.py b/src/alpine/gp/util.py index 13cbb85..a468040 100644 --- a/src/alpine/gp/util.py +++ b/src/alpine/gp/util.py @@ -1,3 +1,4 @@ +import yaml from .primitives import add_primitives_to_pset from importlib import import_module @@ -21,6 +22,41 @@ def add_primitives_to_pset_from_dict(pset, primitives_dict): return pset +def load_config_data(filename): + """Load problem settings from YAML file.""" + with open(filename) as config_file: + config_file_data = yaml.safe_load(config_file) + + regressor_params = dict() + regressor_params["NINDIVIDUALS"] = config_file_data["gp"]["NINDIVIDUALS"] + regressor_params["NGEN"] = config_file_data["gp"]["NGEN"] + regressor_params["num_islands"] = config_file_data["gp"]["multi_island"][ + "num_islands" + ] + regressor_params["mig_freq"] = config_file_data["gp"]["multi_island"]["migration"][ + "freq" + ] + regressor_params["mig_frac"] = config_file_data["gp"]["multi_island"]["migration"][ + "frac" + ] + regressor_params["crossover_prob"] = config_file_data["gp"]["crossover_prob"] + regressor_params["MUTPB"] = config_file_data["gp"]["MUTPB"] + regressor_params["frac_elitist"] = config_file_data["gp"]["frac_elitist"] + regressor_params["overlapping_generation"] = config_file_data["gp"][ + "overlapping_generation" + ] + + regressor_params["validate"] = config_file_data["gp"]["validate"] + + regressor_params["immigration_enabled"] = config_file_data["gp"]["immigration"][ + "enabled" + ] + regressor_params["immigration_freq"] = config_file_data["gp"]["immigration"]["freq"] + regressor_params["immigration_frac"] = config_file_data["gp"]["immigration"]["frac"] + + return regressor_params, config_file_data + + def detect_nested_trigonometric_functions(equation): # List of trigonometric functions trig_functions = ["sin", "cos"] From 2f8cc6346847b0089a82b44b93e9e068a20e5dcf Mon Sep 17 00:00:00 2001 From: Alessandro Lucantonio Date: Wed, 5 Mar 2025 15:09:10 +0100 Subject: [PATCH 04/23] Fixed tests. --- src/alpine/gp/gpsymbreg.py | 4 +- tests/test_basic_sr.py | 16 ++++-- tests/test_poisson1d.py | 111 ++++++++++++++++++++++++------------- 3 files changed, 84 insertions(+), 47 deletions(-) diff --git a/src/alpine/gp/gpsymbreg.py b/src/alpine/gp/gpsymbreg.py index 3476c71..bd0a376 100644 --- a/src/alpine/gp/gpsymbreg.py +++ b/src/alpine/gp/gpsymbreg.py @@ -6,13 +6,11 @@ from typing import List, Dict, Callable from os.path import join import networkx as nx -from .primitives import add_primitives_to_pset from alpine.data import Dataset import os import ray import random from itertools import chain -from importlib import import_module # reducing the number of threads launched by fitness evaluations os.environ["MKL_NUM_THREADS"] = "1" @@ -65,7 +63,7 @@ def __init__( pset: gp.PrimitiveSet | gp.PrimitiveSetTyped, fitness: Callable, select_fun: str = "tools.selection.tournament_with_elitism", - select_args: str = "{'num_elitist': self.n_elitist, 'tournsize': 3, 'stochastic_tourn': { 'enabled': False, 'prob': [0.8, 0.2] }}", + select_args: str = "{'num_elitist': self.n_elitist, 'tournsize': 3, 'stochastic_tourn': { 'enabled': False, 'prob': [0.8, 0.2] }}", # noqa: E501 mut_fun: str = "gp.mutUniform", mut_args: str = "{'expr': self.toolbox.expr_mut, 'pset': self.pset}", expr_mut_fun: str = "gp.genHalfAndHalf", diff --git a/tests/test_basic_sr.py b/tests/test_basic_sr.py index 5f5ca94..97b5073 100644 --- a/tests/test_basic_sr.py +++ b/tests/test_basic_sr.py @@ -1,9 +1,9 @@ -import yaml import os from dctkit import config from deap import gp from alpine.gp.gpsymbreg import GPSymbolicRegressor from alpine.data import Dataset +from alpine.gp import util import jax.numpy as jnp import ray @@ -77,8 +77,8 @@ def fitness(individuals_str, toolbox, true_data): def test_basic_sr(set_test_dir): yamlfile = "test_basic_sr.yaml" filename = os.path.join(os.path.dirname(__file__), yamlfile) - with open(filename) as config_file: - config_file_data = yaml.safe_load(config_file) + + regressor_params, config_file_data = util.load_config_data(filename) pset = gp.PrimitiveSetTyped( "MAIN", @@ -90,10 +90,14 @@ def test_basic_sr(set_test_dir): pset.addPrimitive(jnp.add, [float, float], float, "AddF") pset.renameArguments(ARG0="x") + pset = util.add_primitives_to_pset_from_dict( + pset, config_file_data["gp"]["primitives"] + ) + common_data = {} seed = [ - "AddF(AddF(AddF(MulF(MulF(x, MulF(x, x)),x), MulF(x,MulF(x, x))), MulF(x, x)), x)" - ] # noqa: E501 + "AddF(AddF(AddF(MulF(MulF(x, MulF(x, x)),x), MulF(x,MulF(x, x))), MulF(x, x)), x)" # noqa: E501 + ] gpsr = GPSymbolicRegressor( pset=pset, @@ -101,9 +105,9 @@ def test_basic_sr(set_test_dir): error_metric=score.remote, predict_func=predict.remote, common_data=common_data, - config_file_data=config_file_data, seed=seed, batch_size=10, + **regressor_params ) train_data = Dataset("true_data", x, y) diff --git a/tests/test_poisson1d.py b/tests/test_poisson1d.py index 6fa0d2a..287bd05 100644 --- a/tests/test_poisson1d.py +++ b/tests/test_poisson1d.py @@ -10,10 +10,10 @@ import numpy as np import ray import math -import yaml from typing import Tuple, Callable, List import os import pytest +from alpine.gp import util # choose precision and whether to use GPU or CPU # needed for context of the plots at the end of the evolution @@ -25,15 +25,17 @@ def compile_individuals(toolbox, individuals_str_batch): def get_features_batch(individ_feature_extractors, individuals_str_batch): - features_batch = [[fe(i) for i in individuals_str_batch] - for fe in individ_feature_extractors] + features_batch = [ + [fe(i) for i in individuals_str_batch] for fe in individ_feature_extractors + ] indlen = features_batch[0] return indlen -def eval_MSE_sol(residual: Callable, D: Dataset, S: SimplicialComplex, - u_0: C.CochainP0) -> float: +def eval_MSE_sol( + residual: Callable, D: Dataset, S: SimplicialComplex, u_0: C.CochainP0 +) -> float: num_nodes = D.X.shape[1] @@ -44,7 +46,7 @@ def eval_MSE_sol(residual: Callable, D: Dataset, S: SimplicialComplex, # objective: squared norm of the residual of the equation + penalty on Dirichlet # boundary condition on the first node def obj(x, y): - penalty = 100.*x[0]**2 + penalty = 100.0 * x[0] ** 2 u = C.CochainP0(S, x) f = C.CochainP0(S, y) r = residual(u, f) @@ -53,24 +55,28 @@ def obj(x, y): prb = oc.OptimizationProblem(dim=num_nodes, state_dim=num_nodes, objfun=obj) - MSE = 0. + MSE = 0.0 u = [] for i, curr_y in enumerate(D.y): # set additional arguments of the objective function # (apart from the vector of unknowns) - args = {'y': curr_y} + args = {"y": curr_y} prb.set_obj_args(args) # minimize the objective - x = prb.solve(x0=u_0.coeffs.flatten(), - ftol_abs=1e-12, ftol_rel=1e-12, maxeval=1000) + x = prb.solve( + x0=u_0.coeffs.flatten(), ftol_abs=1e-12, ftol_rel=1e-12, maxeval=1000 + ) - if (prb.last_opt_result == 1 or prb.last_opt_result == 3 - or prb.last_opt_result == 4): + if ( + prb.last_opt_result == 1 + or prb.last_opt_result == 3 + or prb.last_opt_result == 4 + ): - current_err = np.linalg.norm(x-D.X[i, :])**2 + current_err = np.linalg.norm(x - D.X[i, :]) ** 2 else: current_err = math.nan @@ -82,18 +88,24 @@ def obj(x, y): u.append(x) - MSE *= 1/D.X.shape[0] + MSE *= 1 / D.X.shape[0] return MSE, u @ray.remote -def predict(individuals_str: list[str], toolbox, D: Dataset, S: SimplicialComplex, - u_0: C.CochainP0, penalty: dict) -> List: +def predict( + individuals_str: list[str], + toolbox, + D: Dataset, + S: SimplicialComplex, + u_0: C.CochainP0, + penalty: dict, +) -> List: callables = compile_individuals(toolbox, individuals_str) - u = [None]*len(individuals_str) + u = [None] * len(individuals_str) for i, ind in enumerate(callables): _, u[i] = eval_MSE_sol(ind, D, S, u_0) @@ -102,12 +114,18 @@ def predict(individuals_str: list[str], toolbox, D: Dataset, S: SimplicialComple @ray.remote -def score(individuals_str: list[str], toolbox, D: Dataset, S: SimplicialComplex, - u_0: C.CochainP0, penalty: dict) -> List: +def score( + individuals_str: list[str], + toolbox, + D: Dataset, + S: SimplicialComplex, + u_0: C.CochainP0, + penalty: dict, +) -> List: callables = compile_individuals(toolbox, individuals_str) - MSE = [None]*len(individuals_str) + MSE = [None] * len(individuals_str) for i, ind in enumerate(callables): MSE[i], _ = eval_MSE_sol(ind, D, S, u_0) @@ -116,33 +134,39 @@ def score(individuals_str: list[str], toolbox, D: Dataset, S: SimplicialComplex, @ray.remote -def fitness(individuals_str: list[str], toolbox, D: Dataset, S: SimplicialComplex, - u_0: C.CochainP0, penalty: dict) -> Tuple[float, ]: +def fitness( + individuals_str: list[str], + toolbox, + D: Dataset, + S: SimplicialComplex, + u_0: C.CochainP0, + penalty: dict, +) -> Tuple[float,]: callables = compile_individuals(toolbox, individuals_str) indlen = get_features_batch([len], individuals_str) - fitnesses = [None]*len(individuals_str) + fitnesses = [None] * len(individuals_str) for i, ind in enumerate(callables): MSE, _ = eval_MSE_sol(ind, D, S, u_0) # add penalty on length of the tree to promote simpler solutions - fitnesses[i] = (MSE + penalty["reg_param"]*indlen[i],) + fitnesses[i] = (MSE + penalty["reg_param"] * indlen[i],) return fitnesses -cases = ['poisson1d_1.yaml', 'poisson1d_2.yaml'] +cases = ["poisson1d_1.yaml", "poisson1d_2.yaml"] -@pytest.mark.parametrize('yamlfile', cases) +@pytest.mark.parametrize("yamlfile", cases) def test_poisson1d(set_test_dir, yamlfile): filename = os.path.join(os.path.dirname(__file__), yamlfile) - with open(filename) as config_file: - config_file_data = yaml.safe_load(config_file) + + regressor_params, config_file_data = util.load_config_data(filename) # generate mesh and dataset - mesh, _ = generate_line_mesh(num_nodes=11, L=1.) + mesh, _ = generate_line_mesh(num_nodes=11, L=1.0) S = build_complex_from_mesh(mesh) S.get_hodge_star() x = S.node_coords @@ -150,11 +174,11 @@ def test_poisson1d(set_test_dir, yamlfile): # generate training and test datasets # exact solution = x² - u = C.CochainP0(S, np.array(x[:, 0]**2, dtype=dctkit.float_dtype)) + u = C.CochainP0(S, np.array(x[:, 0] ** 2, dtype=dctkit.float_dtype)) # compute source term such that u solves the discrete Poisson equation # Delta u + f = 0, where Delta is the discrete Laplace-de Rham operator f = C.laplacian(u) - f.coeffs *= -1. + f.coeffs *= -1.0 X_train = np.array([u.coeffs.flatten()], dtype=dctkit.float_dtype) y_train = np.array([f.coeffs.flatten()], dtype=dctkit.float_dtype) @@ -169,18 +193,29 @@ def test_poisson1d(set_test_dir, yamlfile): pset.renameArguments(ARG0="u") pset.renameArguments(ARG1="f") + pset = util.add_primitives_to_pset_from_dict( + pset, config_file_data["gp"]["primitives"] + ) + seed_str = ["AddCP0(delP1(cobP0(u)),f)"] penalty = config_file_data["gp"]["penalty"] - common_params = {'S': S, 'u_0': u_0, 'penalty': penalty} + common_params = {"S": S, "u_0": u_0, "penalty": penalty} gpsr = gps.GPSymbolicRegressor( - pset=pset, fitness=fitness.remote, - error_metric=score.remote, predict_func=predict.remote, - config_file_data=config_file_data, print_log=True, - common_data=common_params, seed=seed_str, - plot_history=False, save_best_individual=True, - save_train_fit_history=True, output_path="./") + pset=pset, + fitness=fitness.remote, + error_metric=score.remote, + predict_func=predict.remote, + print_log=True, + common_data=common_params, + seed=seed_str, + plot_history=False, + save_best_individual=True, + save_train_fit_history=True, + output_path="./", + **regressor_params + ) train_data = Dataset("D", X_train, y_train) From 8834cd0f6ef602cde3a7df54522c0f1569372d00 Mon Sep 17 00:00:00 2001 From: Alessandro Lucantonio Date: Thu, 6 Mar 2025 12:58:37 +0100 Subject: [PATCH 05/23] Adapting fit, predict and score fn args. Still working on tests. --- bench/bench.py | 2 +- examples/simple_sr.py | 23 ++++++------- examples/simple_sr_noyaml.py | 23 ++++++------- src/alpine/gp/{gpsymbreg.py => regressor.py} | 36 +++++++++++--------- tests/test_basic_sr.py | 35 +++++++++++-------- tests/test_poisson1d.py | 32 +++++++++-------- 6 files changed, 79 insertions(+), 72 deletions(-) rename src/alpine/gp/{gpsymbreg.py => regressor.py} (96%) diff --git a/bench/bench.py b/bench/bench.py index 163f1e2..b1c8d11 100644 --- a/bench/bench.py +++ b/bench/bench.py @@ -1,7 +1,7 @@ # import matplotlib.pyplot as plt from deap import gp -from alpine.gp import gpsymbreg as gps +from alpine.gp import regressor as gps from alpine.data import Dataset from alpine.gp import util import numpy as np diff --git a/examples/simple_sr.py b/examples/simple_sr.py index a802f12..2621093 100644 --- a/examples/simple_sr.py +++ b/examples/simple_sr.py @@ -1,6 +1,6 @@ import os from deap import gp -from alpine.gp.gpsymbreg import GPSymbolicRegressor +from alpine.gp.regressor import GPSymbolicRegressor from alpine.data import Dataset import numpy as np import ray @@ -40,44 +40,44 @@ def get_features_batch( return individ_length, nested_trigs, num_trigs -def eval_MSE_sol(individual, true_data): +def eval_MSE_sol(individual, X, y): warnings.filterwarnings("ignore") - y_pred = individual(true_data.X) - MSE = np.mean(np.square(y_pred - true_data.y)) + y_pred = individual(X) + MSE = np.mean(np.square(y_pred - y)) if np.isnan(MSE): MSE = 1e5 return MSE, y_pred @ray.remote -def predict(individuals_str, toolbox, true_data, penalty): +def predict(individuals_str, toolbox, X_test, penalty): callables = compile_individuals(toolbox, individuals_str) u = [None] * len(individuals_str) for i, ind in enumerate(callables): - _, u[i] = eval_MSE_sol(ind, true_data) + _, u[i] = eval_MSE_sol(ind, X_test, None) return u @ray.remote -def score(individuals_str, toolbox, true_data, penalty): +def score(individuals_str, toolbox, X_test, y_test, penalty): callables = compile_individuals(toolbox, individuals_str) MSE = [None] * len(individuals_str) for i, ind in enumerate(callables): - MSE[i], _ = eval_MSE_sol(ind, true_data) + MSE[i], _ = eval_MSE_sol(ind, X_test, y_test) return MSE @ray.remote -def fitness(individuals_str, toolbox, true_data, penalty): +def fitness(individuals_str, toolbox, X_train, y_train, penalty): callables = compile_individuals(toolbox, individuals_str) individ_length, nested_trigs, num_trigs = get_features_batch(individuals_str) @@ -87,7 +87,7 @@ def fitness(individuals_str, toolbox, true_data, penalty): if individ_length[i] >= 50: fitnesses[i] = (1e8,) else: - MSE, _ = eval_MSE_sol(ind, true_data) + MSE, _ = eval_MSE_sol(ind, X_train, y_train) fitnesses[i] = ( MSE @@ -131,8 +131,7 @@ def main(): **regressor_params ) - train_data = Dataset("true_data", x, y) - gpsr.fit(train_data) + gpsr.fit(x, y) ray.shutdown() diff --git a/examples/simple_sr_noyaml.py b/examples/simple_sr_noyaml.py index 4b64e27..d2f5e23 100644 --- a/examples/simple_sr_noyaml.py +++ b/examples/simple_sr_noyaml.py @@ -1,5 +1,5 @@ from deap import gp -from alpine.gp.gpsymbreg import GPSymbolicRegressor +from alpine.gp.regressor import GPSymbolicRegressor from alpine.data import Dataset import numpy as np import ray @@ -39,44 +39,44 @@ def get_features_batch( return individ_length, nested_trigs, num_trigs -def eval_MSE_sol(individual, true_data): +def eval_MSE_sol(individual, X, y): warnings.filterwarnings("ignore") - y_pred = individual(true_data.X) - MSE = np.mean(np.square(y_pred - true_data.y)) + y_pred = individual(X) + MSE = np.mean(np.square(y_pred - y)) if np.isnan(MSE): MSE = 1e5 return MSE, y_pred @ray.remote -def predict(individuals_str, toolbox, true_data, penalty): +def predict(individuals_str, toolbox, X_test, penalty): callables = compile_individuals(toolbox, individuals_str) u = [None] * len(individuals_str) for i, ind in enumerate(callables): - _, u[i] = eval_MSE_sol(ind, true_data) + _, u[i] = eval_MSE_sol(ind, X_test, None) return u @ray.remote -def score(individuals_str, toolbox, true_data, penalty): +def score(individuals_str, toolbox, X_test, y_test, penalty): callables = compile_individuals(toolbox, individuals_str) MSE = [None] * len(individuals_str) for i, ind in enumerate(callables): - MSE[i], _ = eval_MSE_sol(ind, true_data) + MSE[i], _ = eval_MSE_sol(ind, X_test, y_test) return MSE @ray.remote -def fitness(individuals_str, toolbox, true_data, penalty): +def fitness(individuals_str, toolbox, X_train, y_train, penalty): callables = compile_individuals(toolbox, individuals_str) individ_length, nested_trigs, num_trigs = get_features_batch(individuals_str) @@ -86,7 +86,7 @@ def fitness(individuals_str, toolbox, true_data, penalty): if individ_length[i] >= 50: fitnesses[i] = (1e8,) else: - MSE, _ = eval_MSE_sol(ind, true_data) + MSE, _ = eval_MSE_sol(ind, X_train, y_train) fitnesses[i] = ( MSE @@ -145,8 +145,7 @@ def main(): batch_size=100, ) - train_data = Dataset("true_data", x, y) - gpsr.fit(train_data) + gpsr.fit(x, y) ray.shutdown() diff --git a/src/alpine/gp/gpsymbreg.py b/src/alpine/gp/regressor.py similarity index 96% rename from src/alpine/gp/gpsymbreg.py rename to src/alpine/gp/regressor.py index bd0a376..15119bb 100644 --- a/src/alpine/gp/gpsymbreg.py +++ b/src/alpine/gp/regressor.py @@ -11,6 +11,7 @@ import ray import random from itertools import chain +from sklearn.base import BaseEstimator, RegressorMixin # reducing the number of threads launched by fitness evaluations os.environ["MKL_NUM_THREADS"] = "1" @@ -24,7 +25,7 @@ ) -class GPSymbolicRegressor: +class GPSymbolicRegressor(RegressorMixin, BaseEstimator): """Symbolic regression problem via Genetic Programming. Args: @@ -130,7 +131,7 @@ def __init__( if common_data is not None: # FIXME: does everything work when the functions do not have common args? - self.store_fit_error_common_args(common_data) + self.__store_fit_error_common_args(common_data) self.NINDIVIDUALS = NINDIVIDUALS self.NGEN = NGEN @@ -161,9 +162,6 @@ def __init__( # config individual creator and toolbox self.__creator_toolbox_config() - # self.createIndividual = individualCreator - - # self.toolbox = toolbox self.seed = seed @@ -253,7 +251,7 @@ def __creator_toolbox_config(self): self.createIndividual = createIndividual - def store_fit_error_common_args(self, data: Dict): + def __store_fit_error_common_args(self, data: Dict): """Store names and values of the arguments that are in common between the fitness and the error metric functions in the common object space. @@ -262,7 +260,7 @@ def store_fit_error_common_args(self, data: Dict): """ self.__store_shared_objects("common", data) - def store_datasets(self, datasets: Dict[str, Dataset]): + def __store_datasets(self, datasets: Dict[str, Dataset]): """Store datasets with the corresponding label ("train", "val" or "test") in the common object space. The datasets are passed as parameters to the fitness, and possibly to the error metric and the prediction functions. @@ -272,12 +270,12 @@ def store_datasets(self, datasets: Dict[str, Dataset]): the validation and the test datasets, respectively. The associated values are `Dataset` objects. """ - for dataset_label in datasets.keys(): - dataset_name_data = {datasets[dataset_label].name: datasets[dataset_label]} - self.__store_shared_objects(dataset_label, dataset_name_data) + for dataset_label, dataset_data in datasets.items(): + self.__store_shared_objects(dataset_label, dataset_data) def __store_shared_objects(self, label: str, data: Dict): for key, value in data.items(): + # replace each item of the dataset with its obj ref data[key] = ray.put(value) self.data_store[label] = data @@ -414,31 +412,35 @@ def mapper(f, individuals, toolbox_ref): toolbox_ref = ray.put(self.toolbox) self.toolbox.register("map", mapper, toolbox_ref=toolbox_ref) - def fit(self, train_data: Dataset, val_data: Dataset | None = None): + def fit(self, X_train, y_train=None, X_val=None, y_val=None): """Fits the training data using GP-based symbolic regression.""" - if self.validate and val_data is not None: + train_data = {"X_train": X_train, "y_train": y_train} + if self.validate and X_val is not None: + val_data = {"X_val": X_val, "y_val": y_val} datasets = {"train": train_data, "val": val_data} else: datasets = {"train": train_data} - self.store_datasets(datasets) + self.__store_datasets(datasets) self.__register_fitness_func() if self.validate and self.error_metric is not None: self.__register_val_funcs() self.__run() - def predict(self, test_data: Dataset): + def predict(self, X_test): + test_data = {"X_test": X_test} datasets = {"test": test_data} - self.store_datasets(datasets) + self.__store_datasets(datasets) self.__register_predict_func() u_best = self.toolbox.map(self.toolbox.evaluate_test_sols, (self.best,))[0] return u_best - def score(self, test_data: Dataset): + def score(self, X_test, y_test): """Computes the error metric (passed to the `GPSymbolicRegressor` constructor) on a given dataset. """ + test_data = {"X_test": X_test, "y_test": y_test} datasets = {"test": test_data} - self.store_datasets(datasets) + self.__store_datasets(datasets) self.__register_score_func() score = self.toolbox.map(self.toolbox.evaluate_test_score, (self.best,))[0] return score diff --git a/tests/test_basic_sr.py b/tests/test_basic_sr.py index 97b5073..6a54a6c 100644 --- a/tests/test_basic_sr.py +++ b/tests/test_basic_sr.py @@ -1,7 +1,7 @@ import os from dctkit import config from deap import gp -from alpine.gp.gpsymbreg import GPSymbolicRegressor +from alpine.gp.regressor import GPSymbolicRegressor from alpine.data import Dataset from alpine.gp import util import jax.numpy as jnp @@ -21,53 +21,56 @@ def compile_individuals(toolbox, individuals_str_batch): y = x**4 + x**3 + x**2 + x -def eval_MSE_sol(individual, true_data): +def eval_MSE_sol(individual, X, y): import os os.environ["JAX_PLATFORMS"] = "cpu" config() # Evaluate the mean squared error between the expression # and the real function : x**4 + x**3 + x**2 + x - y_pred = individual(true_data.X) - MSE = jnp.sum(jnp.square(y_pred - true_data.y)) / len(true_data.X) - if jnp.isnan(MSE): - MSE = 1e5 + y_pred = individual(X) + MSE = None + + if y is not None: + MSE = jnp.mean(jnp.sum(jnp.square(y_pred - y))) + MSE = jnp.nan_to_num(MSE, nan=1e5) + return MSE, y_pred @ray.remote -def predict(individuals_str, toolbox, true_data): +def predict(individuals_str, toolbox, X_test): callables = compile_individuals(toolbox, individuals_str) u = [None] * len(individuals_str) for i, ind in enumerate(callables): - _, u[i] = eval_MSE_sol(ind, true_data) + _, u[i] = eval_MSE_sol(ind, X_test, None) return u @ray.remote -def score(individuals_str, toolbox, true_data): +def score(individuals_str, toolbox, X_test, y_test): callables = compile_individuals(toolbox, individuals_str) MSE = [None] * len(individuals_str) for i, ind in enumerate(callables): - MSE[i], _ = eval_MSE_sol(ind, true_data) + MSE[i], _ = eval_MSE_sol(ind, X_test, y_test) return MSE @ray.remote -def fitness(individuals_str, toolbox, true_data): +def fitness(individuals_str, toolbox, X_train, y_train): callables = compile_individuals(toolbox, individuals_str) fitnesses = [None] * len(individuals_str) for i, ind in enumerate(callables): - MSE, _ = eval_MSE_sol(ind, true_data) + MSE, _ = eval_MSE_sol(ind, X_train, y_train) fitnesses[i] = (MSE,) @@ -110,10 +113,12 @@ def test_basic_sr(set_test_dir): **regressor_params ) - train_data = Dataset("true_data", x, y) - gpsr.fit(train_data) + # train_data = Dataset("true_data", x, y) + gpsr.fit(x, y) + + fit_score = gpsr.score(x, y) - fit_score = gpsr.score(train_data) + y_pred = gpsr.predict(x) ray.shutdown() diff --git a/tests/test_poisson1d.py b/tests/test_poisson1d.py index 287bd05..48f2e7c 100644 --- a/tests/test_poisson1d.py +++ b/tests/test_poisson1d.py @@ -3,7 +3,7 @@ from dctkit.mesh.util import generate_line_mesh, build_complex_from_mesh from dctkit.math.opt import optctrl as oc from deap import gp -from alpine.gp import gpsymbreg as gps +from alpine.gp import regressor as gps from alpine.data import Dataset from dctkit import config import dctkit @@ -34,10 +34,10 @@ def get_features_batch(individ_feature_extractors, individuals_str_batch): def eval_MSE_sol( - residual: Callable, D: Dataset, S: SimplicialComplex, u_0: C.CochainP0 + residual: Callable, X, y, S: SimplicialComplex, u_0: C.CochainP0 ) -> float: - num_nodes = D.X.shape[1] + num_nodes = X.shape[1] # need to call config again before using JAX in energy evaluations to make sure that # the current worker has initialized JAX @@ -59,7 +59,7 @@ def obj(x, y): u = [] - for i, curr_y in enumerate(D.y): + for i, curr_y in enumerate(y): # set additional arguments of the objective function # (apart from the vector of unknowns) args = {"y": curr_y} @@ -76,7 +76,7 @@ def obj(x, y): or prb.last_opt_result == 4 ): - current_err = np.linalg.norm(x - D.X[i, :]) ** 2 + current_err = np.linalg.norm(x - X[i, :]) ** 2 else: current_err = math.nan @@ -88,7 +88,7 @@ def obj(x, y): u.append(x) - MSE *= 1 / D.X.shape[0] + MSE *= 1 / X.shape[0] return MSE, u @@ -97,7 +97,7 @@ def obj(x, y): def predict( individuals_str: list[str], toolbox, - D: Dataset, + X_test, S: SimplicialComplex, u_0: C.CochainP0, penalty: dict, @@ -108,7 +108,7 @@ def predict( u = [None] * len(individuals_str) for i, ind in enumerate(callables): - _, u[i] = eval_MSE_sol(ind, D, S, u_0) + _, u[i] = eval_MSE_sol(ind, X_test, None, S, u_0) return u @@ -117,7 +117,8 @@ def predict( def score( individuals_str: list[str], toolbox, - D: Dataset, + X_test, + y_test, S: SimplicialComplex, u_0: C.CochainP0, penalty: dict, @@ -128,7 +129,7 @@ def score( MSE = [None] * len(individuals_str) for i, ind in enumerate(callables): - MSE[i], _ = eval_MSE_sol(ind, D, S, u_0) + MSE[i], _ = eval_MSE_sol(ind, X_test, y_test, S, u_0) return MSE @@ -137,7 +138,8 @@ def score( def fitness( individuals_str: list[str], toolbox, - D: Dataset, + X_train, + y_train, S: SimplicialComplex, u_0: C.CochainP0, penalty: dict, @@ -148,7 +150,7 @@ def fitness( fitnesses = [None] * len(individuals_str) for i, ind in enumerate(callables): - MSE, _ = eval_MSE_sol(ind, D, S, u_0) + MSE, _ = eval_MSE_sol(ind, X_train, y_train, S, u_0) # add penalty on length of the tree to promote simpler solutions fitnesses[i] = (MSE + penalty["reg_param"] * indlen[i],) @@ -219,11 +221,11 @@ def test_poisson1d(set_test_dir, yamlfile): train_data = Dataset("D", X_train, y_train) - gpsr.fit(train_data, val_data=train_data) + gpsr.fit(X_train, y_train, X_val=X_train, y_val=y_train) - u_best = gpsr.predict(train_data) + u_best = gpsr.predict(X_train) - fit_score = gpsr.score(train_data) + fit_score = gpsr.score(X_train, y_train) gpsr.save_best_test_sols(train_data, "./") From 118a52a30de710867f5ec55b272d0c81f8672fc2 Mon Sep 17 00:00:00 2001 From: Alessandro Lucantonio Date: Fri, 7 Mar 2025 14:02:49 +0100 Subject: [PATCH 06/23] Updated fit, score, predict argument names. --- environment.yaml | 1 + src/alpine/gp/regressor.py | 8 ++--- tests/test_basic_sr.py | 12 +++---- tests/test_poisson1d.py | 66 ++++++++++++++++++++------------------ 4 files changed, 45 insertions(+), 42 deletions(-) diff --git a/environment.yaml b/environment.yaml index c77f8b0..4e938ca 100644 --- a/environment.yaml +++ b/environment.yaml @@ -27,3 +27,4 @@ dependencies: - tox - mygrad - tox-conda + - pmlb diff --git a/src/alpine/gp/regressor.py b/src/alpine/gp/regressor.py index 15119bb..b45f60b 100644 --- a/src/alpine/gp/regressor.py +++ b/src/alpine/gp/regressor.py @@ -414,9 +414,9 @@ def mapper(f, individuals, toolbox_ref): def fit(self, X_train, y_train=None, X_val=None, y_val=None): """Fits the training data using GP-based symbolic regression.""" - train_data = {"X_train": X_train, "y_train": y_train} + train_data = {"X": X_train, "y": y_train} if self.validate and X_val is not None: - val_data = {"X_val": X_val, "y_val": y_val} + val_data = {"X": X_val, "y": y_val} datasets = {"train": train_data, "val": val_data} else: datasets = {"train": train_data} @@ -427,7 +427,7 @@ def fit(self, X_train, y_train=None, X_val=None, y_val=None): self.__run() def predict(self, X_test): - test_data = {"X_test": X_test} + test_data = {"X": X_test} datasets = {"test": test_data} self.__store_datasets(datasets) self.__register_predict_func() @@ -438,7 +438,7 @@ def score(self, X_test, y_test): """Computes the error metric (passed to the `GPSymbolicRegressor` constructor) on a given dataset. """ - test_data = {"X_test": X_test, "y_test": y_test} + test_data = {"X": X_test, "y": y_test} datasets = {"test": test_data} self.__store_datasets(datasets) self.__register_score_func() diff --git a/tests/test_basic_sr.py b/tests/test_basic_sr.py index 6a54a6c..6dcbd72 100644 --- a/tests/test_basic_sr.py +++ b/tests/test_basic_sr.py @@ -39,38 +39,38 @@ def eval_MSE_sol(individual, X, y): @ray.remote -def predict(individuals_str, toolbox, X_test): +def predict(individuals_str, toolbox, X): callables = compile_individuals(toolbox, individuals_str) u = [None] * len(individuals_str) for i, ind in enumerate(callables): - _, u[i] = eval_MSE_sol(ind, X_test, None) + _, u[i] = eval_MSE_sol(ind, X, None) return u @ray.remote -def score(individuals_str, toolbox, X_test, y_test): +def score(individuals_str, toolbox, X, y): callables = compile_individuals(toolbox, individuals_str) MSE = [None] * len(individuals_str) for i, ind in enumerate(callables): - MSE[i], _ = eval_MSE_sol(ind, X_test, y_test) + MSE[i], _ = eval_MSE_sol(ind, X, y) return MSE @ray.remote -def fitness(individuals_str, toolbox, X_train, y_train): +def fitness(individuals_str, toolbox, X, y): callables = compile_individuals(toolbox, individuals_str) fitnesses = [None] * len(individuals_str) for i, ind in enumerate(callables): - MSE, _ = eval_MSE_sol(ind, X_train, y_train) + MSE, _ = eval_MSE_sol(ind, X, y) fitnesses[i] = (MSE,) diff --git a/tests/test_poisson1d.py b/tests/test_poisson1d.py index 48f2e7c..515c37a 100644 --- a/tests/test_poisson1d.py +++ b/tests/test_poisson1d.py @@ -37,7 +37,7 @@ def eval_MSE_sol( residual: Callable, X, y, S: SimplicialComplex, u_0: C.CochainP0 ) -> float: - num_nodes = X.shape[1] + num_nodes = S.num_nodes # need to call config again before using JAX in energy evaluations to make sure that # the current worker has initialized JAX @@ -57,47 +57,48 @@ def obj(x, y): MSE = 0.0 - u = [] + us = [] - for i, curr_y in enumerate(y): + for i, curr_force in enumerate(X): # set additional arguments of the objective function # (apart from the vector of unknowns) - args = {"y": curr_y} + args = {"y": curr_force} prb.set_obj_args(args) # minimize the objective - x = prb.solve( + u = prb.solve( x0=u_0.coeffs.flatten(), ftol_abs=1e-12, ftol_rel=1e-12, maxeval=1000 ) - if ( - prb.last_opt_result == 1 - or prb.last_opt_result == 3 - or prb.last_opt_result == 4 - ): + if y is not None: + if ( + prb.last_opt_result == 1 + or prb.last_opt_result == 3 + or prb.last_opt_result == 4 + ): - current_err = np.linalg.norm(x - X[i, :]) ** 2 - else: - current_err = math.nan + current_err = np.linalg.norm(u - y[i, :]) ** 2 + else: + current_err = math.nan - if math.isnan(current_err): - MSE = 1e5 - break + if math.isnan(current_err): + MSE = 1e5 + break - MSE += current_err + MSE += current_err - u.append(x) + us.append(u) - MSE *= 1 / X.shape[0] + MSE *= 1 / num_nodes - return MSE, u + return MSE, us @ray.remote def predict( individuals_str: list[str], toolbox, - X_test, + X, S: SimplicialComplex, u_0: C.CochainP0, penalty: dict, @@ -108,7 +109,7 @@ def predict( u = [None] * len(individuals_str) for i, ind in enumerate(callables): - _, u[i] = eval_MSE_sol(ind, X_test, None, S, u_0) + _, u[i] = eval_MSE_sol(ind, X, None, S, u_0) return u @@ -117,8 +118,8 @@ def predict( def score( individuals_str: list[str], toolbox, - X_test, - y_test, + X, + y, S: SimplicialComplex, u_0: C.CochainP0, penalty: dict, @@ -129,7 +130,7 @@ def score( MSE = [None] * len(individuals_str) for i, ind in enumerate(callables): - MSE[i], _ = eval_MSE_sol(ind, X_test, y_test, S, u_0) + MSE[i], _ = eval_MSE_sol(ind, X, y, S, u_0) return MSE @@ -138,8 +139,8 @@ def score( def fitness( individuals_str: list[str], toolbox, - X_train, - y_train, + X, + y, S: SimplicialComplex, u_0: C.CochainP0, penalty: dict, @@ -150,7 +151,7 @@ def fitness( fitnesses = [None] * len(individuals_str) for i, ind in enumerate(callables): - MSE, _ = eval_MSE_sol(ind, X_train, y_train, S, u_0) + MSE, _ = eval_MSE_sol(ind, X, y, S, u_0) # add penalty on length of the tree to promote simpler solutions fitnesses[i] = (MSE + penalty["reg_param"] * indlen[i],) @@ -181,8 +182,9 @@ def test_poisson1d(set_test_dir, yamlfile): # Delta u + f = 0, where Delta is the discrete Laplace-de Rham operator f = C.laplacian(u) f.coeffs *= -1.0 - X_train = np.array([u.coeffs.flatten()], dtype=dctkit.float_dtype) - y_train = np.array([f.coeffs.flatten()], dtype=dctkit.float_dtype) + + X_train = np.array([f.coeffs.flatten()], dtype=dctkit.float_dtype) + y_train = np.array([u.coeffs.flatten()], dtype=dctkit.float_dtype) # initial guess for the unknown of the Poisson problem (cochain of nodals values) u_0_vec = np.zeros(num_nodes, dtype=dctkit.float_dtype) @@ -219,7 +221,7 @@ def test_poisson1d(set_test_dir, yamlfile): **regressor_params ) - train_data = Dataset("D", X_train, y_train) + # train_data = Dataset("D", X_train, y_train) gpsr.fit(X_train, y_train, X_val=X_train, y_val=y_train) @@ -227,7 +229,7 @@ def test_poisson1d(set_test_dir, yamlfile): fit_score = gpsr.score(X_train, y_train) - gpsr.save_best_test_sols(train_data, "./") + # gpsr.save_best_test_sols(train_data, "./") ray.shutdown() assert np.allclose(u.coeffs.flatten(), np.ravel(u_best)) From 2574a0800c6c7f9530417a1023f40ac3695f621e Mon Sep 17 00:00:00 2001 From: Alessandro Lucantonio Date: Fri, 7 Mar 2025 15:20:49 +0100 Subject: [PATCH 07/23] Updated README. --- README.md | 112 ++++++++-------------------- bench/results/process_results.ipynb | 2 +- examples/simple_sr.py | 13 ++-- examples/simple_sr_noyaml.py | 13 ++-- src/alpine/gp/regressor.py | 4 +- tests/test_basic_sr.py | 4 - tests/test_poisson1d.py | 3 +- 7 files changed, 49 insertions(+), 102 deletions(-) diff --git a/README.md b/README.md index 09f1bc8..0f46578 100644 --- a/README.md +++ b/README.md @@ -6,10 +6,10 @@ _AlpineGP_ is a Python library for **symbolic regression** via _Genetic Programm It provides a high-level interface to the [`DEAP`](https://github.com/alucantonio/DEAP) library, including distributed computing functionalities. -Besides solving classical symbolic regression problems involving algebraic equations +Besides solving classical symbolic regression problems involving _algebraic equations_ (see, for example, the benchmark problems contained in the [SRBench](https://github.com/cavalab/srbench) repository), _AlpineGP_ is specifically -design to help identifying _symbolic_ models of _physical systems_ governed by **field equations**. +designed to help identifying _symbolic_ models of _physical systems_ governed by **field equations**. To this aim, it allows to exploit the **discrete calculus** framework defined and implemented in the library [`dctkit`](https://github.com/alucantonio/dctkit) as a natural and effective language to express physical models (i.e., conservation laws). @@ -24,7 +24,7 @@ elastica). Scripts to reproduce these benchmarks can be found [here](https://git - scikit-learn compatible interface; - hyperparameter configuration via YAML files; - support for custom operators (with/without strong-typing); -- benchmark suite (Nguyen and interface to SRBench) +- benchmark suite (Nguyen and SRBench) ## Installation @@ -67,100 +67,55 @@ $ ./bench.sh ``` Then process the results using the `process_results` notebook. -Results on [PMLB](https://epistasislab.github.io/pmlb/) datasets (average $R^2$ over 10 -test sets, no Friedman): - -| dataset | mean | median | std | -|:------------------------------|-----------:|-----------:|-----------:| -| 527_analcatdata_election2000 | 0.997727 | 0.999273 | 0.00357541 | -| 663_rabe_266 | 0.994945 | 0.995115 | 0.00134602 | -| 560_bodyfat | 0.988467 | 0.992938 | 0.0121634 | -| 505_tecator | 0.986861 | 0.986026 | 0.0039009 | -| 561_cpu | 0.957349 | 0.967161 | 0.0330056 | -| 690_visualizing_galaxy | 0.963404 | 0.964137 | 0.00867664 | -| 197_cpu_act | 0.94309 | 0.945666 | 0.00966613 | -| 227_cpu_small | 0.946096 | 0.945094 | 0.00812824 | -| 523_analcatdata_neavote | 0.936577 | 0.943564 | 0.0278365 | -| 1096_FacultySalaries | 0.662191 | 0.894004 | 0.525012 | -| 557_analcatdata_apnea1 | 0.881416 | 0.889496 | 0.0397044 | -| 230_machine_cpu | 0.778943 | 0.879675 | 0.273846 | -| 556_analcatdata_apnea2 | 0.863157 | 0.867148 | 0.0347729 | -| 1027_ESL | 0.858838 | 0.860647 | 0.0127587 | -| 695_chatfield_4 | 0.827457 | 0.830825 | 0.0677194 | -| 229_pwLinear | 0.810944 | 0.811717 | 0.0453826 | -| 210_cloud | 0.761678 | 0.786611 | 0.159399 | -| 529_pollen | 0.787219 | 0.782358 | 0.0118861 | -| 1089_USCrime | 0.739218 | 0.756442 | 0.117112 | -| 503_wind | 0.747271 | 0.745787 | 0.0088297 | -| 712_chscase_geyser1 | 0.751443 | 0.745605 | 0.0549794 | -| 519_vinnie | 0.728873 | 0.719948 | 0.0377254 | -| 228_elusage | 0.621403 | 0.714127 | 0.216677 | -| 659_sleuth_ex1714 | 0.562146 | 0.702428 | 0.309503 | -| 666_rmftsa_ladata | 0.679718 | 0.672306 | 0.0620477 | -| 225_puma8NH | 0.66854 | 0.667771 | 0.0127414 | -| 706_sleuth_case1202 | 0.418764 | 0.568134 | 0.43742 | -| 1029_LEV | 0.557169 | 0.560547 | 0.0330229 | -| 547_no2 | 0.50562 | 0.502983 | 0.0920748 | -| 485_analcatdata_vehicle | 0.244083 | 0.47083 | 0.702171 | -| 192_vineyard | 0.381856 | 0.38018 | 0.200867 | -| 1030_ERA | 0.373955 | 0.373216 | 0.0453621 | -| 1028_SWD | 0.335559 | 0.343532 | 0.0556771 | -| 542_pollution | 0.170091 | 0.279329 | 0.254557 | -| 665_sleuth_case2002 | 0.242165 | 0.25769 | 0.146767 | -| 522_pm10 | 0.235107 | 0.233109 | 0.0445476 | -| 678_visualizing_environmental | 0.0604016 | 0.193514 | 0.358373 | -| 687_sleuth_ex1605 | -0.0707247 | -0.0740387 | 0.372597 | - -**Median test $R^2$: 0.7683**. - ## Usage Setting up a symbolic regression problem in _AlpineGP_ involves several key steps: 1. Define the function that computes the prediction associated to an _individual_ (model expression tree). Its arguments may be a _function_ obtained by parsing the -individual tree and possibly other parameters, such as the dataset needed to evaluate -the model. It returns both an _error metric_ between the prediction and the data and -the prediction itself. +individual tree and possibly other parameters, such as the features (`X`) needed to evaluate +the model. It returns both the error between the predictions and the labels (`y`) and +the predictions themselves. ```python -def eval_MSE_sol(individual, dataset): +def eval_MSE_sol(individual, X, y): # ... return MSE, prediction ``` -1. Define the functions that return the **prediction** and the **fitness** - associated to an individual. These functions **must** have the same - arguments. In particular: - - the first argument is **always** the batch of trees to be evaluated by the - current worker; - - the second argument **must** be the `toolbox` object used to compile the - individual trees into callable functions; - - the third argument **must** be the dataset needed for the evaluation of the - individuals. +2. Define the functions that return the **prediction** and the **fitness** + associated to an individual. These functions **must** have at least the following + arguments in the first three positions: + - the list of trees to be evaluated by the current worker; + - the `toolbox` object used to compile the individual trees into callable functions; + - the dataset features needed for the evaluation of the individuals. The name of the argument **must** be `X`. + Additionally, the fourth argument of the **fitness** function **must** be the dataset + labels, called `y`. For unsupervised problems, `None` can be passed for the labels to the `fit` + method of the regressor. Both functions **must** be decorated with `ray.remote` to support - distributed evaluation (multiprocessing). + distributed evaluation (multiprocessing). Any additional arguments can be set using + the `common_data` argument of the `GPSymbolicRegressor` object (see below). ```python @ray.remote -def predict(trees, toolbox, data): +def predict(trees, toolbox, X): callables = compile_individuals(toolbox, trees) preds = [None]*len(trees) for i, ind in enumerate(callables): - _, preds[i] = eval_MSE_sol(ind, data) + _, preds[i] = eval_MSE_sol(ind, X, None) return preds @ray.remote -def fitness(trees, toolbox, true_data): +def fitness(trees, toolbox, X, y): callables = compile_individuals(toolbox, trees) fitnesses = [None]*len(trees) for i, ind in enumerate(callables): - MSE, _ = eval_MSE_sol(ind, data) + MSE, _ = eval_MSE_sol(ind, X, y) # each fitness MUST be a tuple (required by DEAP) fitnesses[i] = (MSE,) @@ -168,11 +123,15 @@ def fitness(trees, toolbox, true_data): return fitnesses ``` -3. Set and solve the symbolic regression problem. +3. Set up and solve the symbolic regression problem. The configuration of the + `GPSymbolicRegressor` object can be specified via the arguments of its constructor + (see the API docs), or loaded from a YAML file. ```python -# read parameters from YAML file -with open("ex1.yaml") as config_file: - config_file_data = yaml.safe_load(config_file) +# read config parameters from YAML file +yamlfile = "ex1.yaml" +filename = os.path.join(os.path.dirname(__file__), yamlfile) + +regressor_params, config_file_data = util.load_config_data(filename) # ... # ... @@ -192,18 +151,13 @@ common_params = {'penalty': penalty} gpsr = gps.GPSymbolicRegressor(pset=pset, fitness=fitness.remote, predict_func=predict.remote, common_data=common_params, print_log=True, - config_file_data=config_file_data) - -# wrap tensors corresponding to train and test data into Dataset objects (to be passed to -# fit and predict methods) -train_data = Dataset("D", X_train, y_train) -test_data = Dataset("D", X_test, y_test) + **regressor_params) # solve the symbolic regression problem -gpsr.fit(train_data) +gpsr.fit(X_train, y_train) # compute the prediction on the test dataset given by the best model found during the SR -pred_test = gpsr.predict(test_data) +pred_test = gpsr.predict(X_test) ``` A complete example notebook can be found in the `examples` directory. Also check the diff --git a/bench/results/process_results.ipynb b/bench/results/process_results.ipynb index bea3d37..1c1f5ab 100644 --- a/bench/results/process_results.ipynb +++ b/bench/results/process_results.ipynb @@ -374,7 +374,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.12.8" + "version": "3.12.5" } }, "nbformat": 4, diff --git a/examples/simple_sr.py b/examples/simple_sr.py index 2621093..ecfc338 100644 --- a/examples/simple_sr.py +++ b/examples/simple_sr.py @@ -1,7 +1,6 @@ import os from deap import gp from alpine.gp.regressor import GPSymbolicRegressor -from alpine.data import Dataset import numpy as np import ray import warnings @@ -51,33 +50,33 @@ def eval_MSE_sol(individual, X, y): @ray.remote -def predict(individuals_str, toolbox, X_test, penalty): +def predict(individuals_str, toolbox, X, penalty): callables = compile_individuals(toolbox, individuals_str) u = [None] * len(individuals_str) for i, ind in enumerate(callables): - _, u[i] = eval_MSE_sol(ind, X_test, None) + _, u[i] = eval_MSE_sol(ind, X, None) return u @ray.remote -def score(individuals_str, toolbox, X_test, y_test, penalty): +def score(individuals_str, toolbox, X, y, penalty): callables = compile_individuals(toolbox, individuals_str) MSE = [None] * len(individuals_str) for i, ind in enumerate(callables): - MSE[i], _ = eval_MSE_sol(ind, X_test, y_test) + MSE[i], _ = eval_MSE_sol(ind, X, y) return MSE @ray.remote -def fitness(individuals_str, toolbox, X_train, y_train, penalty): +def fitness(individuals_str, toolbox, X, y, penalty): callables = compile_individuals(toolbox, individuals_str) individ_length, nested_trigs, num_trigs = get_features_batch(individuals_str) @@ -87,7 +86,7 @@ def fitness(individuals_str, toolbox, X_train, y_train, penalty): if individ_length[i] >= 50: fitnesses[i] = (1e8,) else: - MSE, _ = eval_MSE_sol(ind, X_train, y_train) + MSE, _ = eval_MSE_sol(ind, X, y) fitnesses[i] = ( MSE diff --git a/examples/simple_sr_noyaml.py b/examples/simple_sr_noyaml.py index d2f5e23..32f8460 100644 --- a/examples/simple_sr_noyaml.py +++ b/examples/simple_sr_noyaml.py @@ -1,6 +1,5 @@ from deap import gp from alpine.gp.regressor import GPSymbolicRegressor -from alpine.data import Dataset import numpy as np import ray import warnings @@ -50,33 +49,33 @@ def eval_MSE_sol(individual, X, y): @ray.remote -def predict(individuals_str, toolbox, X_test, penalty): +def predict(individuals_str, toolbox, X, penalty): callables = compile_individuals(toolbox, individuals_str) u = [None] * len(individuals_str) for i, ind in enumerate(callables): - _, u[i] = eval_MSE_sol(ind, X_test, None) + _, u[i] = eval_MSE_sol(ind, X, None) return u @ray.remote -def score(individuals_str, toolbox, X_test, y_test, penalty): +def score(individuals_str, toolbox, X, y, penalty): callables = compile_individuals(toolbox, individuals_str) MSE = [None] * len(individuals_str) for i, ind in enumerate(callables): - MSE[i], _ = eval_MSE_sol(ind, X_test, y_test) + MSE[i], _ = eval_MSE_sol(ind, X, y) return MSE @ray.remote -def fitness(individuals_str, toolbox, X_train, y_train, penalty): +def fitness(individuals_str, toolbox, X, y, penalty): callables = compile_individuals(toolbox, individuals_str) individ_length, nested_trigs, num_trigs = get_features_batch(individuals_str) @@ -86,7 +85,7 @@ def fitness(individuals_str, toolbox, X_train, y_train, penalty): if individ_length[i] >= 50: fitnesses[i] = (1e8,) else: - MSE, _ = eval_MSE_sol(ind, X_train, y_train) + MSE, _ = eval_MSE_sol(ind, X, y) fitnesses[i] = ( MSE diff --git a/src/alpine/gp/regressor.py b/src/alpine/gp/regressor.py index b45f60b..2442b82 100644 --- a/src/alpine/gp/regressor.py +++ b/src/alpine/gp/regressor.py @@ -738,7 +738,7 @@ def save_train_fit_history(self, output_path: str): if self.validate: np.save(join(output_path, "val_fit_history.npy"), self.val_fit_history) - def save_best_test_sols(self, test_data: Dataset, output_path: str): + def save_best_test_sols(self, X_test, output_path: str): """Compute and save the predictions corresponding to the best individual at the end of the evolution, evaluated over the test dataset. @@ -747,7 +747,7 @@ def save_best_test_sols(self, test_data: Dataset, output_path: str): output_path: path where the predictions should be saved (one .npy file for each sample in the test dataset). """ - best_test_sols = self.predict(test_data) + best_test_sols = self.predict(X_test) for i, sol in enumerate(best_test_sols): np.save(join(output_path, "best_sol_test_" + str(i) + ".npy"), sol) diff --git a/tests/test_basic_sr.py b/tests/test_basic_sr.py index 6dcbd72..6688a01 100644 --- a/tests/test_basic_sr.py +++ b/tests/test_basic_sr.py @@ -2,7 +2,6 @@ from dctkit import config from deap import gp from alpine.gp.regressor import GPSymbolicRegressor -from alpine.data import Dataset from alpine.gp import util import jax.numpy as jnp import ray @@ -113,13 +112,10 @@ def test_basic_sr(set_test_dir): **regressor_params ) - # train_data = Dataset("true_data", x, y) gpsr.fit(x, y) fit_score = gpsr.score(x, y) - y_pred = gpsr.predict(x) - ray.shutdown() assert fit_score <= 1e-12 diff --git a/tests/test_poisson1d.py b/tests/test_poisson1d.py index 515c37a..96da59a 100644 --- a/tests/test_poisson1d.py +++ b/tests/test_poisson1d.py @@ -4,7 +4,6 @@ from dctkit.math.opt import optctrl as oc from deap import gp from alpine.gp import regressor as gps -from alpine.data import Dataset from dctkit import config import dctkit import numpy as np @@ -229,7 +228,7 @@ def test_poisson1d(set_test_dir, yamlfile): fit_score = gpsr.score(X_train, y_train) - # gpsr.save_best_test_sols(train_data, "./") + gpsr.save_best_test_sols(X_train, "./") ray.shutdown() assert np.allclose(u.coeffs.flatten(), np.ravel(u_best)) From eea250f218313b3814bc3223430900d944dbbf9e Mon Sep 17 00:00:00 2001 From: Alessandro Lucantonio Date: Fri, 7 Mar 2025 15:25:31 +0100 Subject: [PATCH 08/23] Minor changes to actions. --- .github/workflows/tests.yml | 2 +- README.md | 15 ++++++++------- 2 files changed, 9 insertions(+), 8 deletions(-) diff --git a/.github/workflows/tests.yml b/.github/workflows/tests.yml index 91b8542..a463ddd 100644 --- a/.github/workflows/tests.yml +++ b/.github/workflows/tests.yml @@ -9,7 +9,7 @@ jobs: runs-on: ubuntu-latest strategy: matrix: - python-version: ['3.10'] + python-version: ['3.12'] steps: - uses: actions/checkout@v3 diff --git a/README.md b/README.md index 0f46578..b8f8c37 100644 --- a/README.md +++ b/README.md @@ -88,13 +88,14 @@ def eval_MSE_sol(individual, X, y): arguments in the first three positions: - the list of trees to be evaluated by the current worker; - the `toolbox` object used to compile the individual trees into callable functions; - - the dataset features needed for the evaluation of the individuals. The name of the argument **must** be `X`. - Additionally, the fourth argument of the **fitness** function **must** be the dataset - labels, called `y`. For unsupervised problems, `None` can be passed for the labels to the `fit` - method of the regressor. - Both functions **must** be decorated with `ray.remote` to support - distributed evaluation (multiprocessing). Any additional arguments can be set using - the `common_data` argument of the `GPSymbolicRegressor` object (see below). + - the dataset features needed for the evaluation of the individuals. The name of the + argument **must** be `X`. + +Additionally, the fourth argument of the **fitness** function **must** be the dataset +labels, called `y`. For unsupervised problems, `None` can be passed for the labels to +the `fit` method of the regressor. Both functions **must** be decorated with `ray.remote` to support +distributed evaluation (multiprocessing). Any additional arguments can be set using +the `common_data` argument of the `GPSymbolicRegressor` object (see below). ```python @ray.remote def predict(trees, toolbox, X): From 7d904c38c8e92358c6b44a1f6cac2a7b19b29b18 Mon Sep 17 00:00:00 2001 From: Alessandro Lucantonio Date: Fri, 7 Mar 2025 15:31:58 +0100 Subject: [PATCH 09/23] Trying to add setuptools into github action. --- .github/workflows/tests.yml | 1 + 1 file changed, 1 insertion(+) diff --git a/.github/workflows/tests.yml b/.github/workflows/tests.yml index a463ddd..ac3b3f6 100644 --- a/.github/workflows/tests.yml +++ b/.github/workflows/tests.yml @@ -24,6 +24,7 @@ jobs: conda config --set solver libmamba python -m pip install tox-conda python -m pip install flake8 + python -m pip install setuptools - name: Linting with flake8 run: | flake8 From 818fdde40cc9132e9b58aca6a8550dd39fff8fad Mon Sep 17 00:00:00 2001 From: Alessandro Lucantonio Date: Mon, 10 Mar 2025 07:47:10 +0100 Subject: [PATCH 10/23] Updated benchmark results (no seed in deap). --- bench/bench.py | 59 +- bench/results/1027_ESL.csv | 18 +- bench/results/1028_SWD.csv | 20 +- bench/results/1029_LEV.csv | 20 +- bench/results/1030_ERA.csv | 20 +- bench/results/1089_USCrime.csv | 20 +- bench/results/1096_FacultySalaries.csv | 20 +- bench/results/192_vineyard.csv | 20 +- bench/results/197_cpu_act.csv | 20 +- bench/results/210_cloud.csv | 20 +- bench/results/225_puma8NH.csv | 20 +- bench/results/227_cpu_small.csv | 20 +- bench/results/228_elusage.csv | 18 +- bench/results/229_pwLinear.csv | 20 +- bench/results/230_machine_cpu.csv | 20 +- bench/results/485_analcatdata_vehicle.csv | 20 +- bench/results/503_wind.csv | 20 +- bench/results/505_tecator.csv | 20 +- bench/results/519_vinnie.csv | 20 +- bench/results/522_pm10.csv | 20 +- bench/results/523_analcatdata_neavote.csv | 14 +- .../results/527_analcatdata_election2000.csv | 18 +- bench/results/529_pollen.csv | 18 +- bench/results/542_pollution.csv | 20 +- bench/results/547_no2.csv | 20 +- bench/results/556_analcatdata_apnea2.csv | 20 +- bench/results/557_analcatdata_apnea1.csv | 20 +- bench/results/560_bodyfat.csv | 20 +- bench/results/561_cpu.csv | 20 +- bench/results/659_sleuth_ex1714.csv | 20 +- bench/results/663_rabe_266.csv | 20 +- bench/results/665_sleuth_case2002.csv | 20 +- bench/results/666_rmftsa_ladata.csv | 20 +- .../results/678_visualizing_environmental.csv | 20 +- bench/results/687_sleuth_ex1605.csv | 20 +- bench/results/690_visualizing_galaxy.csv | 20 +- bench/results/695_chatfield_4.csv | 20 +- bench/results/706_sleuth_case1202.csv | 20 +- bench/results/712_chscase_geyser1.csv | 20 +- bench/results/alpinegp-blackbox_results.csv | 746 +++++++++--------- bench/results/process_results.ipynb | 264 +++---- 41 files changed, 896 insertions(+), 919 deletions(-) diff --git a/bench/bench.py b/bench/bench.py index b1c8d11..2fc23b5 100644 --- a/bench/bench.py +++ b/bench/bench.py @@ -2,11 +2,9 @@ from deap import gp from alpine.gp import regressor as gps -from alpine.data import Dataset from alpine.gp import util import numpy as np import ray -import yaml import time @@ -33,15 +31,15 @@ def check_nested_trig_fn(ind): return util.detect_nested_trigonometric_functions(str(ind)) -def eval_model(individual, D, consts=[]): +def eval_model(individual, X, consts=[]): warnings.filterwarnings("ignore") - y_pred = individual(*D.X, consts) + y_pred = individual(*X, consts) return y_pred -def compute_MSE(individual, D, consts=[]): - y_pred = eval_model(individual, D, consts) - MSE = np.mean((D.y - y_pred) ** 2) +def compute_MSE(individual, X, y, consts=[]): + y_pred = eval_model(individual, X, consts) + MSE = np.mean((y - y_pred) ** 2) if np.isnan(MSE) or np.isinf(MSE): MSE = 1e8 @@ -66,7 +64,7 @@ def compile_individual_with_consts(tree, toolbox, special_term_name="a"): return individual, const_idx -def eval_MSE_and_tune_constants(tree, toolbox, D): +def eval_MSE_and_tune_constants(tree, toolbox, X, y): individual, num_consts = compile_individual_with_consts(tree, toolbox) if num_consts > 0: @@ -75,8 +73,8 @@ def eval_MSE_and_tune_constants(tree, toolbox, D): # outside? def eval_MSE(consts): warnings.filterwarnings("ignore") - y_pred = individual(*D.X, consts) - total_err = np.mean((D.y - y_pred) ** 2) + y_pred = individual(*X, consts) + total_err = np.mean((y - y_pred) ** 2) return total_err @@ -113,7 +111,7 @@ def get_bounds(self): if np.isinf(MSE) or np.isnan(MSE): MSE = 1e8 else: - MSE = compute_MSE(individual, D) + MSE = compute_MSE(individual, X, y) consts = [] return MSE, consts @@ -133,31 +131,31 @@ def get_features_batch( @ray.remote(num_cpus=num_cpus) -def predict(individuals_str_batch, toolbox, dataset, penalty, fitness_scale): +def predict(individuals_str_batch, toolbox, X, penalty, fitness_scale): predictions = [None] * len(individuals_str_batch) for i, tree in enumerate(individuals_str_batch): callable, _ = compile_individual_with_consts(tree, toolbox) - predictions[i] = eval_model(callable, dataset, consts=tree.consts) + predictions[i] = eval_model(callable, X, consts=tree.consts) return predictions @ray.remote(num_cpus=num_cpus) -def compute_MSEs(individuals_str_batch, toolbox, dataset, penalty, fitness_scale): +def compute_MSEs(individuals_str_batch, toolbox, X, y, penalty, fitness_scale): total_errs = [None] * len(individuals_str_batch) for i, tree in enumerate(individuals_str_batch): callable, _ = compile_individual_with_consts(tree, toolbox) - total_errs[i] = compute_MSE(callable, dataset, consts=tree.consts) + total_errs[i] = compute_MSE(callable, X, y, consts=tree.consts) return total_errs @ray.remote(num_cpus=num_cpus) -def compute_attributes(individuals_str_batch, toolbox, dataset, penalty, fitness_scale): +def compute_attributes(individuals_str_batch, toolbox, X, y, penalty, fitness_scale): attributes = [None] * len(individuals_str_batch) @@ -170,7 +168,7 @@ def compute_attributes(individuals_str_batch, toolbox, dataset, penalty, fitness consts = None fitness = (1e8,) else: - MSE, consts = eval_MSE_and_tune_constants(tree, toolbox, dataset) + MSE, consts = eval_MSE_and_tune_constants(tree, toolbox, X, y) fitness = ( fitness_scale * ( @@ -192,8 +190,7 @@ def assign_attributes(individuals, attributes): def eval(problem, cfgfile, seed=42): - with open(cfgfile) as config_file: - config_file_data = yaml.safe_load(config_file) + regressor_params, config_file_data = util.load_config_data(cfgfile) scaleXy = config_file_data["gp"]["scaleXy"] @@ -219,6 +216,10 @@ def eval(problem, cfgfile, seed=42): else: pset = gp.PrimitiveSetTyped("Main", [float] * num_variables, float) + pset = util.add_primitives_to_pset_from_dict( + pset, config_file_data["gp"]["primitives"] + ) + batch_size = config_file_data["gp"]["batch_size"] if config_file_data["gp"]["use_constants"]: pset.addTerminal(object, float, "a") @@ -244,25 +245,25 @@ def eval(problem, cfgfile, seed=42): callback_func=callback_func, print_log=False, num_best_inds_str=1, - config_file_data=config_file_data, save_best_individual=False, output_path="./", seed=None, batch_size=batch_size, + **regressor_params, ) - train_data = Dataset("dataset", X_train_scaled, y_train_scaled) - test_data = Dataset("dataset", X_test_scaled, y_test) + # train_data = Dataset("dataset", X_train_scaled, y_train_scaled) + # test_data = Dataset("dataset", X_test_scaled, y_test) if num_variables > 1: - train_data.X = [train_data.X[:, i] for i in range(num_variables)] - test_data.X = [test_data.X[:, i] for i in range(num_variables)] + X_train = [X_train_scaled[:, i] for i in range(num_variables)] + X_test = [X_test_scaled[:, i] for i in range(num_variables)] else: - train_data.X = [train_data.X] - test_data.X = [test_data.X] + X_train = [X_train_scaled] + X_test = [X_test_scaled] tic = time.time() - gpsr.fit(train_data) + gpsr.fit(X_train, y_train_scaled) toc = time.time() if hasattr(gpsr.best, "consts"): @@ -274,7 +275,7 @@ def eval(problem, cfgfile, seed=42): ) print("Individuals per sec = ", individuals_per_sec) - u_best = gpsr.predict(test_data) + u_best = gpsr.predict(X_test) # print(u_best) # print(y_test) @@ -292,7 +293,7 @@ def eval(problem, cfgfile, seed=42): print("MSE on the test set = ", MSE) print("R^2 on the test set = ", r2_test) - pred_train = gpsr.predict(train_data) + pred_train = gpsr.predict(X_train) if scaleXy: pred_train = scaler_y.inverse_transform(pred_train.reshape(-1, 1)).flatten() diff --git a/bench/results/1027_ESL.csv b/bench/results/1027_ESL.csv index 5090a07..9fe7a3e 100644 --- a/bench/results/1027_ESL.csv +++ b/bench/results/1027_ESL.csv @@ -1,11 +1,11 @@ problem;trial;r2_train;r2_test;seed -1027_ESL;1;0.8689338256921862;0.8499074519865497;29802 -1027_ESL;2;0.867758019976077;0.8551105370726506;22118 -1027_ESL;3;0.8674199139497978;0.8649631203850897;860 -1027_ESL;4;0.8633000304601383;0.8785615546171678;15795 -1027_ESL;5;0.8698221273060668;0.8600147256664417;21575 -1027_ESL;6;0.8696495006874415;0.8514352495870234;5390 +1027_ESL;1;0.8748909209247202;0.8555242762139903;29802 +1027_ESL;2;0.8727993056735073;0.8687545863990538;22118 +1027_ESL;3;0.8655788225418036;0.8922231312905544;860 +1027_ESL;4;0.867303061983018;0.8916465151833094;15795 +1027_ESL;5;0.8736669432607488;0.8273969888641126;21575 +1027_ESL;6;0.8730603777627965;0.8521808175811904;5390 1027_ESL;7;0.8736255583112233;0.8612788857378055;11964 -1027_ESL;8;0.8693744204394314;0.8642829832699518;6265 -1027_ESL;9;0.8677081484872072;0.8706866048077533;23654 -1027_ESL;10;0.8826240353573471;0.8321416684717158;11284 +1027_ESL;8;0.8694653006180479;0.8643633497900758;6265 +1027_ESL;9;0.871058107389737;0.858477048277418;23654 +1027_ESL;10;0.8800564058940745;0.8278137293745264;11284 diff --git a/bench/results/1028_SWD.csv b/bench/results/1028_SWD.csv index 57623f5..1bfea0f 100644 --- a/bench/results/1028_SWD.csv +++ b/bench/results/1028_SWD.csv @@ -1,11 +1,11 @@ problem;trial;r2_train;r2_test;seed -1028_SWD;1;0.41974736363097087;0.38591151944299507;29802 -1028_SWD;2;0.4517804313326744;0.2690526640849469;22118 -1028_SWD;3;0.40896369588755643;0.40529205916548894;860 -1028_SWD;4;0.42619313209794185;0.344849508584405;15795 -1028_SWD;5;0.43293630350349965;0.293534364945674;21575 -1028_SWD;6;0.42481715418024246;0.34221390442067867;5390 -1028_SWD;7;0.43529973436124514;0.3155602583077638;11964 -1028_SWD;8;0.45109071433080694;0.24163364087292183;6265 -1028_SWD;9;0.411954827512946;0.4035198144230855;23654 -1028_SWD;10;0.41881380364353205;0.35402482965759763;11284 +1028_SWD;1;0.4373125219729864;0.3591733546209136;29802 +1028_SWD;2;0.45067849050801434;0.27734018436872376;22118 +1028_SWD;3;0.432540658536116;0.39640604600169704;860 +1028_SWD;4;0.4383346360723498;0.35539252873090454;15795 +1028_SWD;5;0.4351837209211017;0.34356249269993755;21575 +1028_SWD;6;0.45077777466457414;0.331246467426907;5390 +1028_SWD;7;0.45774565018987534;0.31090479799041437;11964 +1028_SWD;8;0.4623140825381532;0.2911353301464765;6265 +1028_SWD;9;0.41985780125866;0.3945376646322808;23654 +1028_SWD;10;0.4362368631680913;0.33558903136597407;11284 diff --git a/bench/results/1029_LEV.csv b/bench/results/1029_LEV.csv index e78f00f..8c5d44b 100644 --- a/bench/results/1029_LEV.csv +++ b/bench/results/1029_LEV.csv @@ -1,11 +1,11 @@ problem;trial;r2_train;r2_test;seed -1029_LEV;1;0.5879780782214657;0.5204583881052793;29802 -1029_LEV;2;0.5789393164986216;0.5676559936258676;22118 -1029_LEV;3;0.5654880596620051;0.6014544173446551;860 -1029_LEV;4;0.6009750767452274;0.5121487884369427;15795 -1029_LEV;5;0.5820556772161638;0.5630880040408148;21575 -1029_LEV;6;0.579662627460287;0.5580064249328927;5390 -1029_LEV;7;0.5628308404387818;0.6082523494360423;11964 -1029_LEV;8;0.5919320061434583;0.5239553108909693;6265 -1029_LEV;9;0.5869421867148492;0.5407588303901149;23654 -1029_LEV;10;0.5610457958242968;0.5759109213194484;11284 +1029_LEV;1;0.5844889767108543;0.5241174789850113;29802 +1029_LEV;2;0.5755651903150614;0.549427209698127;22118 +1029_LEV;3;0.566429372563344;0.6028440243460665;860 +1029_LEV;4;0.6004406005484593;0.508368018104046;15795 +1029_LEV;5;0.5862117839676109;0.5581402773171422;21575 +1029_LEV;6;0.5816449064797491;0.5569798462643671;5390 +1029_LEV;7;0.5606708615537527;0.6073771024576097;11964 +1029_LEV;8;0.5942136638927331;0.5204119748868217;6265 +1029_LEV;9;0.5866779188098024;0.5442624434496006;23654 +1029_LEV;10;0.5647757142106716;0.6027015429520087;11284 diff --git a/bench/results/1030_ERA.csv b/bench/results/1030_ERA.csv index 63b244e..9db9361 100644 --- a/bench/results/1030_ERA.csv +++ b/bench/results/1030_ERA.csv @@ -1,11 +1,11 @@ problem;trial;r2_train;r2_test;seed -1030_ERA;1;0.355946366178447;0.45130876083441573;29802 -1030_ERA;2;0.37290093828465165;0.4120055153836605;22118 -1030_ERA;3;0.3705570168279426;0.4195764121745853;860 -1030_ERA;4;0.38834742453119286;0.38122326189069244;15795 -1030_ERA;5;0.3890787660989249;0.3897748494190886;21575 -1030_ERA;6;0.39790727005128923;0.3652092964276171;5390 -1030_ERA;7;0.39686209532437067;0.34974236928922453;11964 -1030_ERA;8;0.40678859411790114;0.31814387585475756;6265 -1030_ERA;9;0.3923063405726871;0.3400834590518873;23654 -1030_ERA;10;0.405026041012461;0.3124839043779347;11284 +1030_ERA;1;0.3682264585639765;0.45000370188951255;29802 +1030_ERA;2;0.3703982945293156;0.4484664334429165;22118 +1030_ERA;3;0.3694747321551356;0.43183742625673993;860 +1030_ERA;4;0.3890483637162687;0.3854357692512782;15795 +1030_ERA;5;0.38444234563787705;0.39243117425859775;21575 +1030_ERA;6;0.39800028913849717;0.3775160066188339;5390 +1030_ERA;7;0.39553784297250216;0.370361659063548;11964 +1030_ERA;8;0.40500650637303737;0.3288319726806165;6265 +1030_ERA;9;0.3989662123825213;0.3501745992492785;23654 +1030_ERA;10;0.3986027031981597;0.3161933285379288;11284 diff --git a/bench/results/1089_USCrime.csv b/bench/results/1089_USCrime.csv index 70afd1f..c686129 100644 --- a/bench/results/1089_USCrime.csv +++ b/bench/results/1089_USCrime.csv @@ -1,11 +1,11 @@ problem;trial;r2_train;r2_test;seed -1089_USCrime;1;0.9484139509443629;0.8493853523348904;29802 -1089_USCrime;2;0.9572058470347506;0.81935543633297;22118 -1089_USCrime;3;0.9520102598873852;0.4540434382838121;860 -1089_USCrime;4;0.9513053045283997;0.74447123868373;15795 -1089_USCrime;5;0.9418779030982185;0.7130163733303321;21575 -1089_USCrime;6;0.943966044212423;0.8377965052540635;5390 -1089_USCrime;7;0.9583551955947374;0.7145147539996739;11964 -1089_USCrime;8;0.9563572028892029;0.7684135499103156;6265 -1089_USCrime;9;0.9507182479552773;0.66950062207029;23654 -1089_USCrime;10;0.9449242331234132;0.8216779253190278;11284 +1089_USCrime;1;0.969037608964617;0.7483367280948074;29802 +1089_USCrime;2;0.9686601906897321;0.7314333167731497;22118 +1089_USCrime;3;0.9628279995212059;0.496644706237765;860 +1089_USCrime;4;0.9661516464623952;0.7308406230327349;15795 +1089_USCrime;5;0.9279961242910364;0.9028921075202149;21575 +1089_USCrime;6;0.9620435535455483;0.8035834139223468;5390 +1089_USCrime;7;0.9721744001998275;0.4316886473558669;11964 +1089_USCrime;8;0.9667952188063021;0.776848816174635;6265 +1089_USCrime;9;0.9679690797919094;0.7503110908067083;23654 +1089_USCrime;10;0.9573742889963757;0.7093714233399555;11284 diff --git a/bench/results/1096_FacultySalaries.csv b/bench/results/1096_FacultySalaries.csv index 1513654..027b6ee 100644 --- a/bench/results/1096_FacultySalaries.csv +++ b/bench/results/1096_FacultySalaries.csv @@ -1,11 +1,11 @@ problem;trial;r2_train;r2_test;seed -1096_FacultySalaries;1;0.9575028902765571;0.8184481267043824;29802 -1096_FacultySalaries;2;0.9568076672378055;0.917434133842035;22118 -1096_FacultySalaries;3;0.9669940054640627;0.8934372879712575;860 -1096_FacultySalaries;4;0.9996757503744214;0.1871532006638934;15795 -1096_FacultySalaries;5;0.9515616447901358;0.8945706203228898;21575 -1096_FacultySalaries;6;0.9981737422612984;-0.6885712121152572;5390 -1096_FacultySalaries;7;0.9795965157536484;0.8997591808890962;11964 -1096_FacultySalaries;8;0.977426037817008;0.8391699685985927;6265 -1096_FacultySalaries;9;0.9684482341572983;0.9618854615626562;23654 -1096_FacultySalaries;10;0.963343554614357;0.8986187469236837;11284 +1096_FacultySalaries;1;0.9757811801086493;0.8468690082448429;29802 +1096_FacultySalaries;2;0.9747072184400877;0.5559189268086008;22118 +1096_FacultySalaries;3;0.9602495975861516;0.6315686809951979;860 +1096_FacultySalaries;4;0.9996757503744214;0.18715319947834508;15795 +1096_FacultySalaries;5;0.9733547803551156;0.9447964356602097;21575 +1096_FacultySalaries;6;0.998173742238267;-0.6885706555756188;5390 +1096_FacultySalaries;7;0.9787889343295242;0.9232090362546187;11964 +1096_FacultySalaries;8;0.9648100352407598;0.8257600154636463;6265 +1096_FacultySalaries;9;0.9792443170894077;0.960647215075818;23654 +1096_FacultySalaries;10;0.9813331788413356;0.8844476997285342;11284 diff --git a/bench/results/192_vineyard.csv b/bench/results/192_vineyard.csv index 89bd708..f8b042b 100644 --- a/bench/results/192_vineyard.csv +++ b/bench/results/192_vineyard.csv @@ -1,11 +1,11 @@ problem;trial;r2_train;r2_test;seed -192_vineyard;1;0.8758445698235056;0.2236397944668479;29802 -192_vineyard;2;0.858616424851334;0.13271060525956901;22118 -192_vineyard;3;0.8537137523938062;0.4780805511251951;860 -192_vineyard;4;0.8692894908232239;0.3758608681462412;15795 -192_vineyard;5;0.87608336889845;0.4455659090090176;21575 -192_vineyard;6;0.8776088928103271;0.3460864519849878;5390 -192_vineyard;7;0.8384373653693135;0.7195907291058308;11964 -192_vineyard;8;0.8223940400933747;0.08607142817359648;6265 -192_vineyard;9;0.8425098503334608;0.6264516080030693;23654 -192_vineyard;10;0.8929706868907198;0.38449967469030966;11284 +192_vineyard;1;0.8708853934183917;-0.4248209450726521;29802 +192_vineyard;2;0.8569606516417794;-0.42892415246500093;22118 +192_vineyard;3;0.8829022683683329;0.09498433432956144;860 +192_vineyard;4;0.8956247954424682;0.15671556194470315;15795 +192_vineyard;5;0.8912368002572013;0.22225606881786264;21575 +192_vineyard;6;0.8829681580907572;0.2039008023908344;5390 +192_vineyard;7;0.8457254813152156;0.47914507962376474;11964 +192_vineyard;8;0.8352999677222162;0.5832638534915283;6265 +192_vineyard;9;0.8619800115440981;0.5736448841534942;23654 +192_vineyard;10;0.8912822500084658;0.5882196088719382;11284 diff --git a/bench/results/197_cpu_act.csv b/bench/results/197_cpu_act.csv index d2e68e9..8e3c4cb 100644 --- a/bench/results/197_cpu_act.csv +++ b/bench/results/197_cpu_act.csv @@ -1,11 +1,11 @@ problem;trial;r2_train;r2_test;seed -197_cpu_act;1;0.9387934899514239;0.9451106322160039;29802 -197_cpu_act;2;0.9333639615224261;0.9290571513295527;22118 -197_cpu_act;3;0.9530034270392485;0.9503027779498951;860 -197_cpu_act;4;0.9470401775214541;0.9451628075750368;15795 -197_cpu_act;5;0.9427855190899835;0.9461699935502532;21575 -197_cpu_act;6;0.9471825707278558;0.9506166699567961;5390 -197_cpu_act;7;0.9564062751447067;0.9520205277468109;11964 -197_cpu_act;8;0.9540432896569563;0.9512028699925679;6265 -197_cpu_act;9;0.9382877197734281;0.9361704125648529;23654 -197_cpu_act;10;0.9374270754243184;0.9250902535585799;11284 +197_cpu_act;1;0.9457665877454394;0.9436749078434319;29802 +197_cpu_act;2;0.9421764711644052;0.9358882728244855;22118 +197_cpu_act;3;0.946313954464179;0.9471130286100041;860 +197_cpu_act;4;0.8496781432845921;0.8582846660255161;15795 +197_cpu_act;5;0.9517077250675727;0.9530442805581179;21575 +197_cpu_act;6;0.9477005153417889;0.9502016383212369;5390 +197_cpu_act;7;0.9486233817092022;0.938278739172296;11964 +197_cpu_act;8;0.9597354596296003;0.9555905049338825;6265 +197_cpu_act;9;0.9648960107234625;0.9634820840877398;23654 +197_cpu_act;10;0.9577340006506451;0.95184836883823;11284 diff --git a/bench/results/210_cloud.csv b/bench/results/210_cloud.csv index f881679..671b300 100644 --- a/bench/results/210_cloud.csv +++ b/bench/results/210_cloud.csv @@ -1,11 +1,11 @@ problem;trial;r2_train;r2_test;seed -210_cloud;1;0.9247636146137266;0.6455604410596569;29802 -210_cloud;2;0.9343808533567021;0.8770082489331003;22118 -210_cloud;3;0.9311210811645867;0.7127490256169635;860 -210_cloud;4;0.934936620286743;0.9362826105251004;15795 -210_cloud;5;0.9482920134801041;0.9082722075947439;21575 -210_cloud;6;0.942302095803146;0.7227604098373297;5390 -210_cloud;7;0.9505642703201787;0.8504613720142944;11964 -210_cloud;8;0.9360740181207674;0.6395164814517422;6265 -210_cloud;9;0.9298362563595642;0.8894142340721412;23654 -210_cloud;10;0.9517720645617664;0.43475344549546946;11284 +210_cloud;1;0.9362762610302084;0.8691901588735473;29802 +210_cloud;2;0.932913153743925;0.8880995514125988;22118 +210_cloud;3;0.9342923431663721;0.7491585711117132;860 +210_cloud;4;0.9439602033234796;0.8988071127272331;15795 +210_cloud;5;0.9530786572643501;0.8442892691784296;21575 +210_cloud;6;0.9451596735432771;0.8376976230836759;5390 +210_cloud;7;0.9474825821384996;0.8673292587973871;11964 +210_cloud;8;0.9364317165249529;0.5617672429308037;6265 +210_cloud;9;0.9265135160166141;0.9456328880138059;23654 +210_cloud;10;0.9565081594889666;0.39315858636610057;11284 diff --git a/bench/results/225_puma8NH.csv b/bench/results/225_puma8NH.csv index 9dadd8a..214bda0 100644 --- a/bench/results/225_puma8NH.csv +++ b/bench/results/225_puma8NH.csv @@ -1,11 +1,11 @@ problem;trial;r2_train;r2_test;seed -225_puma8NH;1;0.6705324281799827;0.6469760131302198;29802 -225_puma8NH;2;0.6529416250592445;0.6648667830014574;22118 -225_puma8NH;3;0.668872418199597;0.6691440462049962;860 -225_puma8NH;4;0.6698248185101057;0.669578706012151;15795 -225_puma8NH;5;0.6806162807788343;0.6962678620000038;21575 -225_puma8NH;6;0.6557342826799404;0.6791402248205318;5390 -225_puma8NH;7;0.6705031948803878;0.668675527501577;11964 -225_puma8NH;8;0.6696418552266217;0.6668672891980192;6265 -225_puma8NH;9;0.6809417218834979;0.6641877090939536;23654 -225_puma8NH;10;0.6778308230365331;0.6596977025598376;11284 +225_puma8NH;1;0.682850204435814;0.6789211747963135;29802 +225_puma8NH;2;0.6754836444737324;0.6930220241212716;22118 +225_puma8NH;3;0.6761584240959573;0.6747595339118583;860 +225_puma8NH;4;0.6685405891987423;0.6638774570350612;15795 +225_puma8NH;5;0.6778781214915794;0.692070342886109;21575 +225_puma8NH;6;0.6753263424574281;0.6922572411975378;5390 +225_puma8NH;7;0.6209830017302066;0.6273467897767163;11964 +225_puma8NH;8;0.6777014266663415;0.6799644712287625;6265 +225_puma8NH;9;0.6822693621746627;0.666476328548008;23654 +225_puma8NH;10;0.6814646567138007;0.6671700746406766;11284 diff --git a/bench/results/227_cpu_small.csv b/bench/results/227_cpu_small.csv index 752484b..75c8bfc 100644 --- a/bench/results/227_cpu_small.csv +++ b/bench/results/227_cpu_small.csv @@ -1,11 +1,11 @@ problem;trial;r2_train;r2_test;seed -227_cpu_small;1;0.9540370888709729;0.9609058793159982;29802 -227_cpu_small;2;0.9488562687403959;0.9436280757904487;22118 -227_cpu_small;3;0.9504429109686965;0.9520600530975872;860 -227_cpu_small;4;0.9461015247186403;0.9447682989399491;15795 -227_cpu_small;5;0.9533298056263266;0.9529900399919129;21575 -227_cpu_small;6;0.9473725234398761;0.9502877717844496;5390 -227_cpu_small;7;0.9482536236025996;0.939552292545259;11964 -227_cpu_small;8;0.9457544827848126;0.9454205410011709;6265 -227_cpu_small;9;0.9455531784001469;0.9339190396781905;23654 -227_cpu_small;10;0.9476693676687382;0.9374238976803586;11284 +227_cpu_small;1;0.9511459412072559;0.9560957918072145;29802 +227_cpu_small;2;0.9567873549993458;0.9540203418129014;22118 +227_cpu_small;3;0.953209617799769;0.9529617190481161;860 +227_cpu_small;4;0.9547184045361405;0.9564869218621034;15795 +227_cpu_small;5;0.9522737226764634;0.9530777748357575;21575 +227_cpu_small;6;0.9496651602539978;0.9526532237472574;5390 +227_cpu_small;7;0.9480917341278418;0.9394283620886283;11964 +227_cpu_small;8;0.9569652130042856;0.957357703480119;6265 +227_cpu_small;9;0.9578391420110457;0.9528086472247773;23654 +227_cpu_small;10;0.9408407489212279;0.9244148967255907;11284 diff --git a/bench/results/228_elusage.csv b/bench/results/228_elusage.csv index eaacfac..838225e 100644 --- a/bench/results/228_elusage.csv +++ b/bench/results/228_elusage.csv @@ -1,11 +1,11 @@ problem;trial;r2_train;r2_test;seed -228_elusage;1;0.9135086327699563;0.37504569393812115;29802 -228_elusage;2;0.8970128288923932;0.35103778213392034;22118 -228_elusage;3;0.8886296814417715;0.7196904542426055;860 -228_elusage;4;0.8704194422338686;0.6653540966273213;15795 -228_elusage;5;0.8917017093876557;0.8306453559482805;21575 +228_elusage;1;0.9311419726673622;0.2350684821794946;29802 +228_elusage;2;0.9025813528838752;0.6364760689448354;22118 +228_elusage;3;0.9110242745696653;0.5792736949769591;860 +228_elusage;4;0.9053214518223174;0.7273002285110786;15795 +228_elusage;5;0.8985627851722835;0.8097852281164157;21575 228_elusage;6;0.8936773561445938;0.7160706477254897;5390 -228_elusage;7;0.8900002753872707;0.8345048286306371;11964 -228_elusage;8;0.9003394088878686;0.7726511311327471;6265 -228_elusage;9;0.9209220925229495;0.7121838299498349;23654 -228_elusage;10;0.9349071079768176;0.236846671035417;11284 +228_elusage;7;0.8997277559837891;0.7979770642133817;11964 +228_elusage;8;0.9030298795272532;0.7540088410298927;6265 +228_elusage;9;0.9150788651356546;0.7846289337518353;23654 +228_elusage;10;0.9403025397412127;0.201718727319101;11284 diff --git a/bench/results/229_pwLinear.csv b/bench/results/229_pwLinear.csv index 68d750c..bb98d9e 100644 --- a/bench/results/229_pwLinear.csv +++ b/bench/results/229_pwLinear.csv @@ -1,11 +1,11 @@ problem;trial;r2_train;r2_test;seed -229_pwLinear;1;0.8866837244005004;0.8437360911399274;29802 -229_pwLinear;2;0.8651340740201751;0.8154600805428582;22118 -229_pwLinear;3;0.8780570695512598;0.8521667614863495;860 -229_pwLinear;4;0.8692410221184963;0.7966514010453299;15795 -229_pwLinear;5;0.871139368125647;0.8615298223960618;21575 -229_pwLinear;6;0.8476702830738368;0.7375750682294989;5390 -229_pwLinear;7;0.8688776993339421;0.8070841755676031;11964 -229_pwLinear;8;0.8700464875140115;0.8526734480098382;6265 -229_pwLinear;9;0.8761042568651296;0.8079745580972737;23654 -229_pwLinear;10;0.8622494412617301;0.7345898519017362;11284 +229_pwLinear;1;0.887097489437163;0.8468543043562392;29802 +229_pwLinear;2;0.8968688038944086;0.8903898549781959;22118 +229_pwLinear;3;0.898360896148988;0.8861303015560574;860 +229_pwLinear;4;0.9083144011127211;0.8661698780989783;15795 +229_pwLinear;5;0.9108269241904139;0.8685153004743829;21575 +229_pwLinear;6;0.8967604755237106;0.8618639847110794;5390 +229_pwLinear;7;0.908072954770001;0.8262182908899607;11964 +229_pwLinear;8;0.9131158810203344;0.8696165164408076;6265 +229_pwLinear;9;0.8940989954069053;0.8593211470909712;23654 +229_pwLinear;10;0.8996144256003755;0.7904577840031168;11284 diff --git a/bench/results/230_machine_cpu.csv b/bench/results/230_machine_cpu.csv index 7ce4242..fb53e2a 100644 --- a/bench/results/230_machine_cpu.csv +++ b/bench/results/230_machine_cpu.csv @@ -1,11 +1,11 @@ problem;trial;r2_train;r2_test;seed -230_machine_cpu;1;0.9480533853347944;0.8992187802149694;29802 -230_machine_cpu;2;0.9522645085936048;0.9225522264452863;22118 -230_machine_cpu;3;0.9470518003228451;0.8981659347479933;860 -230_machine_cpu;4;0.95506023413372;0.8888056875723519;15795 -230_machine_cpu;5;0.9385079910061537;0.7774941119015837;21575 -230_machine_cpu;6;0.9380208946077939;0.9072710334554801;5390 -230_machine_cpu;7;0.9684008083925838;0.013214346890453976;11964 -230_machine_cpu;8;0.9450825586427529;0.7851694917284495;6265 -230_machine_cpu;9;0.95029831052323;0.8705442088244373;23654 -230_machine_cpu;10;0.949887276067157;0.8269905463822742;11284 +230_machine_cpu;1;0.9534204477578466;0.9425716802553034;29802 +230_machine_cpu;2;0.9534857556685918;0.9205756992252286;22118 +230_machine_cpu;3;0.9641909352375859;0.8914275532122926;860 +230_machine_cpu;4;0.9570109315062694;0.9189267900502148;15795 +230_machine_cpu;5;0.9376701255761473;0.6273145810017067;21575 +230_machine_cpu;6;0.9598033161097971;-0.7349430092852289;5390 +230_machine_cpu;7;0.9678673237923466;0.6179567298323092;11964 +230_machine_cpu;8;0.9519821921693107;-0.058671938296462445;6265 +230_machine_cpu;9;0.9554845489984238;0.9365405947760388;23654 +230_machine_cpu;10;0.9570806497148374;0.5510813323223882;11284 diff --git a/bench/results/485_analcatdata_vehicle.csv b/bench/results/485_analcatdata_vehicle.csv index f3135a8..b3f0683 100644 --- a/bench/results/485_analcatdata_vehicle.csv +++ b/bench/results/485_analcatdata_vehicle.csv @@ -1,11 +1,11 @@ problem;trial;r2_train;r2_test;seed -485_analcatdata_vehicle;1;0.9528804885155496;0.12462529915348497;29802 -485_analcatdata_vehicle;2;0.9193082516437283;0.7750424433097705;22118 -485_analcatdata_vehicle;3;0.9435440299115576;0.2868891930248415;860 -485_analcatdata_vehicle;4;0.9264417878034139;-0.3336271372677764;15795 -485_analcatdata_vehicle;5;0.9213890151626498;0.643247847518012;21575 -485_analcatdata_vehicle;6;0.9566428252568013;-1.4892996326257548;5390 -485_analcatdata_vehicle;7;0.9272197383492176;0.346071719920654;11964 -485_analcatdata_vehicle;8;0.9510147018118773;0.7884736384082681;6265 -485_analcatdata_vehicle;9;0.9204605428326906;0.7038232035808454;23654 -485_analcatdata_vehicle;10;0.9082079384379624;0.5955875525497412;11284 +485_analcatdata_vehicle;1;0.9634778391300125;-0.11935196018010763;29802 +485_analcatdata_vehicle;2;0.9166635964968202;0.738115759107596;22118 +485_analcatdata_vehicle;3;0.9581223359446156;0.22490388311863863;860 +485_analcatdata_vehicle;4;0.9434113587122004;-0.09002631205464096;15795 +485_analcatdata_vehicle;5;0.9344680302958008;0.4562061291302909;21575 +485_analcatdata_vehicle;6;0.966115635104376;-1.54536586208844;5390 +485_analcatdata_vehicle;7;0.9243469464676076;-0.13493443901992674;11964 +485_analcatdata_vehicle;8;0.9624226065048201;0.38377459453213936;6265 +485_analcatdata_vehicle;9;0.9074311394484039;0.7667116771104026;23654 +485_analcatdata_vehicle;10;0.9531144867120844;0.944325808202833;11284 diff --git a/bench/results/503_wind.csv b/bench/results/503_wind.csv index 589aff4..2c2692a 100644 --- a/bench/results/503_wind.csv +++ b/bench/results/503_wind.csv @@ -1,11 +1,11 @@ problem;trial;r2_train;r2_test;seed -503_wind;1;0.7588522944498123;0.7407479193988407;29802 -503_wind;2;0.7396776236929041;0.7682653284848018;22118 -503_wind;3;0.7615843309597021;0.7410864079085691;860 -503_wind;4;0.7486995174805298;0.7419028061049862;15795 -503_wind;5;0.7566183637744479;0.7509473874232699;21575 -503_wind;6;0.7478737982529451;0.744456502771982;5390 -503_wind;7;0.7518204636981045;0.7517045259363225;11964 -503_wind;8;0.7485226285602495;0.7370083668827148;6265 -503_wind;9;0.7499772956159411;0.7494742142933108;23654 -503_wind;10;0.7641226246553331;0.7471169274375125;11284 +503_wind;1;0.7658257110596496;0.7513438937058385;29802 +503_wind;2;0.7512686937236537;0.7803039547604709;22118 +503_wind;3;0.7685394489029556;0.7539174994433234;860 +503_wind;4;0.7529862535285444;0.7465336965191283;15795 +503_wind;5;0.7543560769127313;0.7466733117882052;21575 +503_wind;6;0.7682431860933203;0.7608158068041664;5390 +503_wind;7;0.7677307665246877;0.7657598529325538;11964 +503_wind;8;0.7545945506267047;0.7440009171727796;6265 +503_wind;9;0.7654443094871098;0.7552019874033794;23654 +503_wind;10;0.7718736265175183;0.7514531599518441;11284 diff --git a/bench/results/505_tecator.csv b/bench/results/505_tecator.csv index 000b58a..40109a8 100644 --- a/bench/results/505_tecator.csv +++ b/bench/results/505_tecator.csv @@ -1,11 +1,11 @@ problem;trial;r2_train;r2_test;seed -505_tecator;1;0.9854549276326078;0.9824194712517073;29802 -505_tecator;2;0.9900042822567748;0.9888052939802175;22118 -505_tecator;3;0.9856845643580413;0.9870546490414863;860 -505_tecator;4;0.9908573959466794;0.9856740255484958;15795 -505_tecator;5;0.9846678490714278;0.9845605298374047;21575 -505_tecator;6;0.9890515021634928;0.9919294021697467;5390 -505_tecator;7;0.9875095460959086;0.99457541711821;11964 -505_tecator;8;0.9909905285623747;0.9863775705197315;6265 -505_tecator;9;0.9849433268275737;0.9838444019508767;23654 -505_tecator;10;0.9850237224054832;0.983372108736585;11284 +505_tecator;1;0.9927454620736509;0.9893053879987541;29802 +505_tecator;2;0.9879329328341779;0.9875747440972731;22118 +505_tecator;3;0.9880041142690137;0.9908431622650101;860 +505_tecator;4;0.9865817685899063;0.9733492090347696;15795 +505_tecator;5;0.9813563895788477;0.9818628960287505;21575 +505_tecator;6;0.9873087042639579;0.9830617334029935;5390 +505_tecator;7;0.9871679365587726;0.9925495243949886;11964 +505_tecator;8;0.9924076490023038;0.9880707404120649;6265 +505_tecator;9;0.9823654866433313;0.9796968419455596;23654 +505_tecator;10;0.9889690723394612;0.9900780662217898;11284 diff --git a/bench/results/519_vinnie.csv b/bench/results/519_vinnie.csv index 8643132..ba57eee 100644 --- a/bench/results/519_vinnie.csv +++ b/bench/results/519_vinnie.csv @@ -1,11 +1,11 @@ problem;trial;r2_train;r2_test;seed -519_vinnie;1;0.778064332036953;0.6748797138263742;29802 -519_vinnie;2;0.7670949723082622;0.7011612077288334;22118 -519_vinnie;3;0.7404644581023311;0.7810698905726365;860 -519_vinnie;4;0.7557205123459001;0.7422420153961651;15795 -519_vinnie;5;0.7713875709908657;0.6900299487722288;21575 -519_vinnie;6;0.771845486331053;0.7108348972477119;5390 -519_vinnie;7;0.7572075674163686;0.7147332995367801;11964 -519_vinnie;8;0.7429626211025838;0.7790843508176507;6265 -519_vinnie;9;0.7490936656635323;0.769534352985476;23654 -519_vinnie;10;0.7635739787535201;0.7251627064505871;11284 +519_vinnie;1;0.778064332036953;0.6748797138294724;29802 +519_vinnie;2;0.7670949723082623;0.7011612082750185;22118 +519_vinnie;3;0.7404644581023311;0.7810698905254343;860 +519_vinnie;4;0.7557205123459001;0.7422420153780349;15795 +519_vinnie;5;0.7753683470514572;0.6823505255947264;21575 +519_vinnie;6;0.767887083813139;0.712071518327847;5390 +519_vinnie;7;0.7572075674163686;0.7147332995223332;11964 +519_vinnie;8;0.7542153901449076;0.7587488135901574;6265 +519_vinnie;9;0.7467373888739486;0.7694240036664656;23654 +519_vinnie;10;0.7654651757060934;0.709533343161777;11284 diff --git a/bench/results/522_pm10.csv b/bench/results/522_pm10.csv index 3cc3919..c1f9b0a 100644 --- a/bench/results/522_pm10.csv +++ b/bench/results/522_pm10.csv @@ -1,11 +1,11 @@ problem;trial;r2_train;r2_test;seed -522_pm10;1;0.3794629609652548;0.23026217474125876;29802 -522_pm10;2;0.3570901484992305;0.17469754534535442;22118 -522_pm10;3;0.3858768939919651;0.22089941955788794;860 -522_pm10;4;0.40768989512635856;0.2175177323761076;15795 -522_pm10;5;0.36976729777293194;0.17113501390378316;21575 -522_pm10;6;0.3428926408914911;0.2687256805922681;5390 -522_pm10;7;0.2977757786654729;0.3227886923695936;11964 -522_pm10;8;0.38178504083381426;0.23595564673110503;6265 -522_pm10;9;0.37190883423115706;0.25238282154873437;23654 -522_pm10;10;0.3409559374024058;0.256704861294187;11284 +522_pm10;1;0.3916341590588258;0.17209848107968018;29802 +522_pm10;2;0.35792961622561625;0.30397871102618823;22118 +522_pm10;3;0.4056678696094712;0.19323591913835292;860 +522_pm10;4;0.43280165362554635;0.1816551573427727;15795 +522_pm10;5;0.36451120577785767;0.2521331139314824;21575 +522_pm10;6;0.3837705593480236;0.29914054537556134;5390 +522_pm10;7;0.36168075537858246;0.2292153742058889;11964 +522_pm10;8;0.39298781249188663;0.21544925318803332;6265 +522_pm10;9;0.3805038157810552;0.2684606306177675;23654 +522_pm10;10;0.356206203126046;0.31306398249856837;11284 diff --git a/bench/results/523_analcatdata_neavote.csv b/bench/results/523_analcatdata_neavote.csv index 31c28e7..11ac6f1 100644 --- a/bench/results/523_analcatdata_neavote.csv +++ b/bench/results/523_analcatdata_neavote.csv @@ -1,11 +1,11 @@ problem;trial;r2_train;r2_test;seed 523_analcatdata_neavote;1;0.9582300334612134;0.9386714303591461;29802 -523_analcatdata_neavote;2;0.9638237885042875;0.9152095576783321;22118 -523_analcatdata_neavote;3;0.9637411693056734;0.911374339211446;860 -523_analcatdata_neavote;4;0.9536128203238623;0.9484574870437463;15795 -523_analcatdata_neavote;5;0.9528400818699697;0.9555480753370238;21575 +523_analcatdata_neavote;2;0.9638237885042875;0.9152095576795443;22118 +523_analcatdata_neavote;3;0.9637411693056734;0.9113743393154917;860 +523_analcatdata_neavote;4;0.9536128203238623;0.9484574870125133;15795 +523_analcatdata_neavote;5;0.9528400818699697;0.9555480753168316;21575 523_analcatdata_neavote;6;0.9469477274013106;0.9694100916316685;5390 -523_analcatdata_neavote;7;0.9656529293299565;0.9032444495818659;11964 +523_analcatdata_neavote;7;0.9656529293299565;0.9032444495722483;11964 523_analcatdata_neavote;8;0.9475894369194252;0.8977943324063037;6265 -523_analcatdata_neavote;9;0.9534332217559968;0.951120289477556;23654 -523_analcatdata_neavote;10;0.9466394799975569;0.9749431974558286;11284 +523_analcatdata_neavote;9;0.9534332217559968;0.9511202894395492;23654 +523_analcatdata_neavote;10;0.9466394799975569;0.9749431974411712;11284 diff --git a/bench/results/527_analcatdata_election2000.csv b/bench/results/527_analcatdata_election2000.csv index ea201cf..970c86a 100644 --- a/bench/results/527_analcatdata_election2000.csv +++ b/bench/results/527_analcatdata_election2000.csv @@ -1,11 +1,11 @@ problem;trial;r2_train;r2_test;seed -527_analcatdata_election2000;1;0.999144300466917;0.9992466998776212;29802 -527_analcatdata_election2000;2;0.9990232530092271;0.9999001075236819;22118 -527_analcatdata_election2000;3;0.9998277174836239;0.9980663629600459;860 -527_analcatdata_election2000;4;0.9991430940015354;0.9993349896951212;15795 -527_analcatdata_election2000;5;0.9990466772472578;0.9992996263636201;21575 -527_analcatdata_election2000;6;0.9997656762303844;0.9945511921408846;5390 -527_analcatdata_election2000;7;0.9991289509891338;0.999578298400491;11964 -527_analcatdata_election2000;8;0.9969921980601382;0.9992001493211001;6265 +527_analcatdata_election2000;1;0.999144300466917;0.9992466998829244;29802 +527_analcatdata_election2000;2;0.9990232530092271;0.9999001075222042;22118 +527_analcatdata_election2000;3;0.9998277174836239;0.9980663629616184;860 +527_analcatdata_election2000;4;0.998392150905119;0.9884068132502254;15795 +527_analcatdata_election2000;5;0.9977115019194641;0.9809805075038193;21575 +527_analcatdata_election2000;6;0.9997656762303844;0.9945511921405661;5390 +527_analcatdata_election2000;7;0.9977647930464765;0.9956179204909581;11964 +527_analcatdata_election2000;8;0.996513739803008;0.990214315771051;6265 527_analcatdata_election2000;9;0.9972831883429463;0.9885665336066449;23654 -527_analcatdata_election2000;10;0.9991493426875013;0.9995293788943556;11284 +527_analcatdata_election2000;10;0.9926124817775727;0.9874342630385744;11284 diff --git a/bench/results/529_pollen.csv b/bench/results/529_pollen.csv index 4266b21..3f966a9 100644 --- a/bench/results/529_pollen.csv +++ b/bench/results/529_pollen.csv @@ -1,11 +1,11 @@ problem;trial;r2_train;r2_test;seed -529_pollen;1;0.7936416714066328;0.7762755533706083;29802 -529_pollen;2;0.796441760348724;0.7858222762585879;22118 -529_pollen;3;0.7991947939416362;0.7785509246582917;860 -529_pollen;4;0.7932467920271737;0.796104280090784;15795 -529_pollen;5;0.7842935305604609;0.803699353717669;21575 +529_pollen;1;0.7936416714066328;0.7762755533706103;29802 +529_pollen;2;0.7964417603487056;0.7858222739606016;22118 +529_pollen;3;0.7991947939416348;0.7785509261564657;860 +529_pollen;4;0.7932467920271723;0.7961042799605124;15795 +529_pollen;5;0.7842935305604609;0.803699353603365;21575 529_pollen;6;0.7939167974013728;0.7750304204729883;5390 -529_pollen;7;0.7986415668983129;0.7788932489809886;11964 -529_pollen;8;0.7929956829937825;0.7969014128961736;6265 -529_pollen;9;0.7930964536245204;0.7764647903809114;23654 -529_pollen;10;0.7903534660577021;0.8044480932738134;11284 +529_pollen;7;0.798641566896627;0.7788932670570133;11964 +529_pollen;8;0.7929956828151925;0.7969016425934032;6265 +529_pollen;9;0.7930964536245204;0.7764647903809226;23654 +529_pollen;10;0.7903534660554659;0.8044481477208689;11284 diff --git a/bench/results/542_pollution.csv b/bench/results/542_pollution.csv index 7c3d968..05795a9 100644 --- a/bench/results/542_pollution.csv +++ b/bench/results/542_pollution.csv @@ -1,11 +1,11 @@ problem;trial;r2_train;r2_test;seed -542_pollution;1;0.8843090161282399;0.05607053038809462;29802 -542_pollution;2;0.848075985418739;0.3281553390994669;22118 -542_pollution;3;0.8658478989803343;-0.08321163170177104;860 -542_pollution;4;0.8558789844642788;0.4006788617242786;15795 -542_pollution;5;0.8294760057362275;0.4256112830204646;21575 -542_pollution;6;0.8225265855736608;-0.2164999401404657;5390 -542_pollution;7;0.8307172638181622;0.3085715201318613;11964 -542_pollution;8;0.8524654882645428;0.4170939452042327;6265 -542_pollution;9;0.8546598719974106;0.2500854971061002;23654 -542_pollution;10;0.8935182596205669;-0.18564325205885357;11284 +542_pollution;1;0.8958123319267871;0.2467419921731796;29802 +542_pollution;2;0.8553987122384332;0.06681060546122841;22118 +542_pollution;3;0.901749416217652;-1.0646079898145016;860 +542_pollution;4;0.8212024550371452;0.5712230275066958;15795 +542_pollution;5;0.8308810750601717;0.47660399106887996;21575 +542_pollution;6;0.859618323819151;-6.3823947444965325;5390 +542_pollution;7;0.852300513461618;0.6079421810351266;11964 +542_pollution;8;0.8868310026363309;0.2580559082696334;6265 +542_pollution;9;0.884318021505661;0.48255254547870563;23654 +542_pollution;10;0.9217287073607583;-0.3652033825026302;11284 diff --git a/bench/results/547_no2.csv b/bench/results/547_no2.csv index f3352e1..84cb404 100644 --- a/bench/results/547_no2.csv +++ b/bench/results/547_no2.csv @@ -1,11 +1,11 @@ problem;trial;r2_train;r2_test;seed -547_no2;1;0.5967818808233845;0.5117018659094545;29802 -547_no2;2;0.5918477650269622;0.48527793365846084;22118 -547_no2;3;0.566715054474527;0.5593972629913926;860 -547_no2;4;0.6062002123888404;0.4718711968127075;15795 -547_no2;5;0.5632592952088873;0.5926921185615248;21575 -547_no2;6;0.6071457437094566;0.2855862060309705;5390 -547_no2;7;0.6256525259080508;0.47387942548679274;11964 -547_no2;8;0.5361250034663553;0.6003916764510544;6265 -547_no2;9;0.5667133241764515;0.5811427774008566;23654 -547_no2;10;0.5982345252966745;0.49426355757686147;11284 +547_no2;1;0.6027579041070649;0.43658099155451946;29802 +547_no2;2;0.6263829279922758;0.3742690727090351;22118 +547_no2;3;0.5773914505135069;0.5688552557706618;860 +547_no2;4;0.623561868307309;0.3786968263091799;15795 +547_no2;5;0.5708335374653255;0.6944555489309868;21575 +547_no2;6;0.6333658568772302;0.3930638879362218;5390 +547_no2;7;0.606079323420359;0.41810913316252274;11964 +547_no2;8;0.5933405445421667;0.5525235957536274;6265 +547_no2;9;0.5797021946144171;0.4658611076735708;23654 +547_no2;10;0.6157384872219682;0.4557987023033202;11284 diff --git a/bench/results/556_analcatdata_apnea2.csv b/bench/results/556_analcatdata_apnea2.csv index 25fbcf5..73309a0 100644 --- a/bench/results/556_analcatdata_apnea2.csv +++ b/bench/results/556_analcatdata_apnea2.csv @@ -1,11 +1,11 @@ problem;trial;r2_train;r2_test;seed -556_analcatdata_apnea2;1;0.8900070301834665;0.8595879221942337;29802 -556_analcatdata_apnea2;2;0.8758624694790041;0.88941288735778;22118 -556_analcatdata_apnea2;3;0.8665487309615658;0.8581717391435798;860 -556_analcatdata_apnea2;4;0.8886758268480706;0.893920655303757;15795 -556_analcatdata_apnea2;5;0.9162397692621279;0.8339633597457752;21575 -556_analcatdata_apnea2;6;0.867878903871961;0.9060204818046558;5390 -556_analcatdata_apnea2;7;0.895186942418516;0.8318367154488906;11964 -556_analcatdata_apnea2;8;0.8825170446454766;0.8747090101308928;6265 -556_analcatdata_apnea2;9;0.8735327174323235;0.8894505601989673;23654 -556_analcatdata_apnea2;10;0.8891486534240983;0.794500452106343;11284 +556_analcatdata_apnea2;1;0.9061466197187625;0.8446457521446504;29802 +556_analcatdata_apnea2;2;0.9016054699918509;0.8766708937802739;22118 +556_analcatdata_apnea2;3;0.8843539559161282;0.8775424432511525;860 +556_analcatdata_apnea2;4;0.8952595763020793;0.8625884790785422;15795 +556_analcatdata_apnea2;5;0.9136214810272898;0.8302649665981329;21575 +556_analcatdata_apnea2;6;0.8738571786519445;0.8973438896474054;5390 +556_analcatdata_apnea2;7;0.9354432219431367;0.8626424210401881;11964 +556_analcatdata_apnea2;8;0.8925069835747588;0.87321111585474;6265 +556_analcatdata_apnea2;9;0.9081902622651675;0.8788416431394969;23654 +556_analcatdata_apnea2;10;0.8992562268602847;0.8318869294207427;11284 diff --git a/bench/results/557_analcatdata_apnea1.csv b/bench/results/557_analcatdata_apnea1.csv index d09cd2a..1203022 100644 --- a/bench/results/557_analcatdata_apnea1.csv +++ b/bench/results/557_analcatdata_apnea1.csv @@ -1,11 +1,11 @@ problem;trial;r2_train;r2_test;seed -557_analcatdata_apnea1;1;0.8784986043605206;0.8935157686701525;29802 -557_analcatdata_apnea1;2;0.8805194148319184;0.8854764949913403;22118 -557_analcatdata_apnea1;3;0.8946427063893206;0.9266201317649982;860 -557_analcatdata_apnea1;4;0.8947701057761045;0.912149951414244;15795 -557_analcatdata_apnea1;5;0.9005877753632446;0.8576382929684554;21575 -557_analcatdata_apnea1;6;0.8807628306155243;0.8681637272730844;5390 -557_analcatdata_apnea1;7;0.8856828797455032;0.9116664827122621;11964 -557_analcatdata_apnea1;8;0.9026977882337626;0.8330659053504004;6265 -557_analcatdata_apnea1;9;0.9039998899807843;0.8065397993549838;23654 -557_analcatdata_apnea1;10;0.8767152319664884;0.9193210372003123;11284 +557_analcatdata_apnea1;1;0.8897175339043949;0.9165943930743574;29802 +557_analcatdata_apnea1;2;0.9021908051446247;0.8394505776216514;22118 +557_analcatdata_apnea1;3;0.8979506523943239;0.872037388692701;860 +557_analcatdata_apnea1;4;0.8968447450294665;0.8891633727055719;15795 +557_analcatdata_apnea1;5;0.9135776932586674;0.820866000875306;21575 +557_analcatdata_apnea1;6;0.8859520249350878;0.8400029545273097;5390 +557_analcatdata_apnea1;7;0.9014973550121034;0.9109113022959324;11964 +557_analcatdata_apnea1;8;0.9130402487323359;0.7964036194623167;6265 +557_analcatdata_apnea1;9;0.8825839228597379;0.8044546043084179;23654 +557_analcatdata_apnea1;10;0.9153874936470442;0.9104344086531545;11284 diff --git a/bench/results/560_bodyfat.csv b/bench/results/560_bodyfat.csv index 062e746..e28b184 100644 --- a/bench/results/560_bodyfat.csv +++ b/bench/results/560_bodyfat.csv @@ -1,11 +1,11 @@ problem;trial;r2_train;r2_test;seed -560_bodyfat;1;0.9902453470772457;0.9908636724386908;29802 -560_bodyfat;2;0.9879227731101468;0.997295801972364;22118 -560_bodyfat;3;0.9877795170598371;0.9647598579980914;860 -560_bodyfat;4;0.9933388192517554;0.9798362796285576;15795 -560_bodyfat;5;0.9884082345394183;0.9987709661807211;21575 -560_bodyfat;6;0.9853465377818014;0.9717951838022242;5390 -560_bodyfat;7;0.9898225525524794;0.9904450422634472;11964 -560_bodyfat;8;0.9872747781111895;0.9985958897705474;6265 -560_bodyfat;9;0.99069869274096;0.9972905797968374;23654 -560_bodyfat;10;0.9894082531733411;0.9950128265303042;11284 +560_bodyfat;1;0.9892294726831697;0.9905519230178037;29802 +560_bodyfat;2;0.989569996973957;-1.0023905305340368;22118 +560_bodyfat;3;0.9890353622478567;0.023626450583330794;860 +560_bodyfat;4;0.9952749801955;0.9797754559364098;15795 +560_bodyfat;5;0.9895968122812513;0.9468185929193085;21575 +560_bodyfat;6;0.9893111554082745;0.9932278402428694;5390 +560_bodyfat;7;0.9919130376162125;0.9615800934213271;11964 +560_bodyfat;8;0.987453072407826;0.9974674810236527;6265 +560_bodyfat;9;0.9896179946304043;0.967619761736427;23654 +560_bodyfat;10;0.9885884513447457;0.09616905703993339;11284 diff --git a/bench/results/561_cpu.csv b/bench/results/561_cpu.csv index 936a931..df8c59a 100644 --- a/bench/results/561_cpu.csv +++ b/bench/results/561_cpu.csv @@ -1,11 +1,11 @@ problem;trial;r2_train;r2_test;seed -561_cpu;1;0.9889874897385083;0.9875416354431112;29802 -561_cpu;2;0.9924563172967208;0.9396453485837796;22118 -561_cpu;3;0.9930591478466321;0.9831763593573561;860 -561_cpu;4;0.9893079920516502;0.9888341331548184;15795 -561_cpu;5;0.9935164587177503;0.9505509198754009;21575 -561_cpu;6;0.9955894748121411;0.918160058340207;5390 -561_cpu;7;0.9876294231496052;0.8900879100469693;11964 -561_cpu;8;0.9935519611884656;0.9724056733619404;6265 -561_cpu;9;0.9946691362554361;0.9619172509418947;23654 -561_cpu;10;0.9921256198282833;0.9811681931170799;11284 +561_cpu;1;0.9939434872378528;0.9909580678683556;29802 +561_cpu;2;0.9927063636282192;0.9841059476228269;22118 +561_cpu;3;0.9938280882903348;0.9789681103137009;860 +561_cpu;4;0.9960421650522949;0.9934299494920014;15795 +561_cpu;5;0.9945652852459991;0.9633380989778488;21575 +561_cpu;6;0.989688483747185;0.955603115137449;5390 +561_cpu;7;0.9899779973992745;0.9831773412565957;11964 +561_cpu;8;0.9928190414632616;0.9482278058668813;6265 +561_cpu;9;0.9902802892301434;0.9779337464591602;23654 +561_cpu;10;0.9894021678274891;0.991181823498297;11284 diff --git a/bench/results/659_sleuth_ex1714.csv b/bench/results/659_sleuth_ex1714.csv index 7b55182..5ca803f 100644 --- a/bench/results/659_sleuth_ex1714.csv +++ b/bench/results/659_sleuth_ex1714.csv @@ -1,11 +1,11 @@ problem;trial;r2_train;r2_test;seed -659_sleuth_ex1714;1;0.955631483663761;0.8588762691821581;29802 -659_sleuth_ex1714;2;0.9577368053775003;0.7076235219367734;22118 -659_sleuth_ex1714;3;0.9480302263728545;0.7856830058088728;860 -659_sleuth_ex1714;4;0.93783038009895;-0.03549263497234967;15795 -659_sleuth_ex1714;5;0.9429742878432266;0.6395754889429887;21575 -659_sleuth_ex1714;6;0.9565731658644198;0.7380147538082807;5390 -659_sleuth_ex1714;7;0.9522519738523143;0.34587983063579286;11964 -659_sleuth_ex1714;8;0.9655504465392534;0.697231851315812;6265 -659_sleuth_ex1714;9;0.9674817580761291;0.11417870284199172;23654 -659_sleuth_ex1714;10;0.950989782949168;0.769886517145795;11284 +659_sleuth_ex1714;1;0.9609782379126177;0.7738547024385989;29802 +659_sleuth_ex1714;2;0.969242709954809;0.6401645902839946;22118 +659_sleuth_ex1714;3;0.9643327522367209;0.5176917760048607;860 +659_sleuth_ex1714;4;0.9456569263739503;0.1232782768054056;15795 +659_sleuth_ex1714;5;0.9531173883640386;0.8926118961732403;21575 +659_sleuth_ex1714;6;0.9616041132706629;0.6870470468283217;5390 +659_sleuth_ex1714;7;0.9477206350870038;0.1938125960829673;11964 +659_sleuth_ex1714;8;0.9731525673698604;0.5402913285930655;6265 +659_sleuth_ex1714;9;0.9771698947506777;0.5814105342091516;23654 +659_sleuth_ex1714;10;0.9507390534943635;0.8233435932311819;11284 diff --git a/bench/results/663_rabe_266.csv b/bench/results/663_rabe_266.csv index 2aa7913..96bae95 100644 --- a/bench/results/663_rabe_266.csv +++ b/bench/results/663_rabe_266.csv @@ -1,11 +1,11 @@ problem;trial;r2_train;r2_test;seed -663_rabe_266;1;0.9963318792951193;0.9944571659647574;29802 -663_rabe_266;2;0.9960449686025733;0.9951265004058345;22118 -663_rabe_266;3;0.9966286261213915;0.9951027571311135;860 -663_rabe_266;4;0.9980283604497546;0.9972051185877722;15795 -663_rabe_266;5;0.9958296721669573;0.9932032595035839;21575 -663_rabe_266;6;0.9956347362733132;0.9927044269697708;5390 -663_rabe_266;7;0.9957878590760463;0.9951744042206477;11964 -663_rabe_266;8;0.9966844579109547;0.9943710863118349;6265 -663_rabe_266;9;0.9963026569250134;0.9961567400987226;23654 -663_rabe_266;10;0.996311236079154;0.9959438870293857;11284 +663_rabe_266;1;0.9963318792951193;0.994457165938687;29802 +663_rabe_266;2;0.9960449686025733;0.9951265004153557;22118 +663_rabe_266;3;0.9963055683249298;0.9935596386998714;860 +663_rabe_266;4;0.9957290303285985;0.9973499220193377;15795 +663_rabe_266;5;0.9958296721669573;0.9932032594851475;21575 +663_rabe_266;6;0.9968077976878242;0.9936874782858696;5390 +663_rabe_266;7;0.9957878590679023;0.9951743564833472;11964 +663_rabe_266;8;0.9966844579109547;0.994371086309436;6265 +663_rabe_266;9;0.9963026317788026;0.9961527987423283;23654 +663_rabe_266;10;0.9961231806109219;0.9966431606874361;11284 diff --git a/bench/results/665_sleuth_case2002.csv b/bench/results/665_sleuth_case2002.csv index a3b1d06..05fb07f 100644 --- a/bench/results/665_sleuth_case2002.csv +++ b/bench/results/665_sleuth_case2002.csv @@ -1,11 +1,11 @@ problem;trial;r2_train;r2_test;seed -665_sleuth_case2002;1;0.5386482882378207;0.24593876365788836;29802 -665_sleuth_case2002;2;0.5282785192646382;0.2397042172423517;22118 -665_sleuth_case2002;3;0.5580656642185025;0.10463663421433811;860 -665_sleuth_case2002;4;0.549546366604154;0.19442335256825205;15795 -665_sleuth_case2002;5;0.5216823748456061;0.43892903472058775;21575 -665_sleuth_case2002;6;0.5734923225583336;-0.08085715644080982;5390 -665_sleuth_case2002;7;0.5054522322359531;0.30673620899549503;11964 -665_sleuth_case2002;8;0.4895210515104428;0.37179713890081234;6265 -665_sleuth_case2002;9;0.5709392917639837;0.33089812327687984;23654 -665_sleuth_case2002;10;0.5540077609268654;0.2694416446480783;11284 +665_sleuth_case2002;1;0.5492211374383427;0.3194138282629678;29802 +665_sleuth_case2002;2;0.546745989043543;0.21008592168238338;22118 +665_sleuth_case2002;3;0.5762720332288613;0.13946337038133216;860 +665_sleuth_case2002;4;0.5610368593566109;0.1341108952068708;15795 +665_sleuth_case2002;5;0.5492452364910855;0.0683331185031153;21575 +665_sleuth_case2002;6;0.6413743558551446;-0.377276578246055;5390 +665_sleuth_case2002;7;0.5737449858522283;0.22390317995008813;11964 +665_sleuth_case2002;8;0.5274098980367952;0.4262864029398262;6265 +665_sleuth_case2002;9;0.5778163802903606;0.2567767502630014;23654 +665_sleuth_case2002;10;0.5842290081086378;0.24280785807701788;11284 diff --git a/bench/results/666_rmftsa_ladata.csv b/bench/results/666_rmftsa_ladata.csv index 80c9ff2..0e99a61 100644 --- a/bench/results/666_rmftsa_ladata.csv +++ b/bench/results/666_rmftsa_ladata.csv @@ -1,11 +1,11 @@ problem;trial;r2_train;r2_test;seed -666_rmftsa_ladata;1;0.635795536254633;0.8052730185834623;29802 -666_rmftsa_ladata;2;0.7122424163451371;0.6858247330526992;22118 -666_rmftsa_ladata;3;0.7047980560565774;0.6622455790067168;860 -666_rmftsa_ladata;4;0.6495189621052757;0.7532232508498939;15795 -666_rmftsa_ladata;5;0.6685725509721406;0.5796774291602063;21575 -666_rmftsa_ladata;6;0.6901354461007139;0.6520663656889047;5390 -666_rmftsa_ladata;7;0.6862850333194601;0.6737574687112443;11964 -666_rmftsa_ladata;8;0.6647868770036494;0.681951154525291;6265 -666_rmftsa_ladata;9;0.71057324802021;0.6708551649199037;23654 -666_rmftsa_ladata;10;0.6745481781678346;0.6323073462147712;11284 +666_rmftsa_ladata;1;0.6345104157800793;0.7344866942491635;29802 +666_rmftsa_ladata;2;0.6900416533782323;0.6627937668059906;22118 +666_rmftsa_ladata;3;0.7127135332110708;0.6613380696838017;860 +666_rmftsa_ladata;4;0.6789046076342398;0.6423861112380842;15795 +666_rmftsa_ladata;5;0.6881297780627693;0.7556658746141396;21575 +666_rmftsa_ladata;6;0.6762118758309784;0.6641734127855163;5390 +666_rmftsa_ladata;7;0.6937229663267199;0.7003993423348064;11964 +666_rmftsa_ladata;8;0.6988398491093812;0.7022461102188366;6265 +666_rmftsa_ladata;9;0.7243314580519724;0.6019749041902247;23654 +666_rmftsa_ladata;10;0.709078469774263;0.6597080468600234;11284 diff --git a/bench/results/678_visualizing_environmental.csv b/bench/results/678_visualizing_environmental.csv index 1815f02..4d35654 100644 --- a/bench/results/678_visualizing_environmental.csv +++ b/bench/results/678_visualizing_environmental.csv @@ -1,11 +1,11 @@ problem;trial;r2_train;r2_test;seed -678_visualizing_environmental;1;0.5884564006241517;-0.2348037422701108;29802 -678_visualizing_environmental;2;0.48415524658057696;0.3842110530561066;22118 -678_visualizing_environmental;3;0.5435252560116295;0.20052013439615268;860 -678_visualizing_environmental;4;0.5299229740381244;0.12194169699888824;15795 -678_visualizing_environmental;5;0.5551504264560185;-0.3712208383149671;21575 -678_visualizing_environmental;6;0.595332876812991;-0.6583139042777237;5390 -678_visualizing_environmental;7;0.5473903713630541;0.4109713932589413;11964 -678_visualizing_environmental;8;0.5404951931756969;0.18650779631586867;6265 -678_visualizing_environmental;9;0.5460068888571925;0.2661496903961954;23654 -678_visualizing_environmental;10;0.4883595042931925;0.29805223307133444;11284 +678_visualizing_environmental;1;0.5972132050982807;-0.2691281646090735;29802 +678_visualizing_environmental;2;0.5451590348898154;0.29711313315317933;22118 +678_visualizing_environmental;3;0.56902138448535;0.23013583078717514;860 +678_visualizing_environmental;4;0.5898295572207202;0.0343088465083462;15795 +678_visualizing_environmental;5;0.574319633056;0.33814077156144795;21575 +678_visualizing_environmental;6;0.5914459084642444;-0.15939534526203247;5390 +678_visualizing_environmental;7;0.591023304454775;0.11941208415804583;11964 +678_visualizing_environmental;8;0.5686600697238989;0.2939150349167159;6265 +678_visualizing_environmental;9;0.57138752562582;0.08965206856588359;23654 +678_visualizing_environmental;10;0.5243190310745487;0.10409164841313134;11284 diff --git a/bench/results/687_sleuth_ex1605.csv b/bench/results/687_sleuth_ex1605.csv index 4923078..c044ad8 100644 --- a/bench/results/687_sleuth_ex1605.csv +++ b/bench/results/687_sleuth_ex1605.csv @@ -1,11 +1,11 @@ problem;trial;r2_train;r2_test;seed -687_sleuth_ex1605;1;0.8495225517226452;-0.20907073423769873;29802 -687_sleuth_ex1605;2;0.8302934704669824;-0.019767227334987814;22118 -687_sleuth_ex1605;3;0.8554387292430528;-0.5101712102891569;860 -687_sleuth_ex1605;4;0.8851763491357718;-0.07475906449653569;15795 -687_sleuth_ex1605;5;0.8551501509727353;0.4647904504020848;21575 -687_sleuth_ex1605;6;0.82751098311587;0.03904718162701937;5390 -687_sleuth_ex1605;7;0.8497680167066046;-0.6331928938854514;11964 -687_sleuth_ex1605;8;0.8386440179859521;-0.0733182612052583;6265 -687_sleuth_ex1605;9;0.8490793057505749;-0.24405522827672343;23654 -687_sleuth_ex1605;10;0.8136200535218876;0.5532496829644273;11284 +687_sleuth_ex1605;1;0.8718270563850774;-0.6547755175162167;29802 +687_sleuth_ex1605;2;0.8531662032211784;-0.35659349742932767;22118 +687_sleuth_ex1605;3;0.8787897121574618;-0.562926370813355;860 +687_sleuth_ex1605;4;0.8795784345873178;0.22976759448151607;15795 +687_sleuth_ex1605;5;0.8546839224319733;0.14050030292704807;21575 +687_sleuth_ex1605;6;0.8330682490678795;0.16299880612342776;5390 +687_sleuth_ex1605;7;0.864090809061551;-0.011624582933786876;11964 +687_sleuth_ex1605;8;0.88971201597581;0.3297924594025772;6265 +687_sleuth_ex1605;9;0.8424499635366642;0.42024926042429933;23654 +687_sleuth_ex1605;10;0.7744674839492438;0.38155433021038054;11284 diff --git a/bench/results/690_visualizing_galaxy.csv b/bench/results/690_visualizing_galaxy.csv index f83243e..79fc27c 100644 --- a/bench/results/690_visualizing_galaxy.csv +++ b/bench/results/690_visualizing_galaxy.csv @@ -1,11 +1,11 @@ problem;trial;r2_train;r2_test;seed -690_visualizing_galaxy;1;0.9657924238163759;0.9692267789680069;29802 -690_visualizing_galaxy;2;0.9684304798316039;0.9513042187460473;22118 -690_visualizing_galaxy;3;0.9723687540923115;0.9593230554018062;860 -690_visualizing_galaxy;4;0.9726620914118438;0.9481456106753172;15795 -690_visualizing_galaxy;5;0.968541041961348;0.9652952342395551;21575 -690_visualizing_galaxy;6;0.9666392794448734;0.9753510613379904;5390 -690_visualizing_galaxy;7;0.968136612967248;0.972321626761227;11964 -690_visualizing_galaxy;8;0.9698466434731323;0.9629781529693879;6265 -690_visualizing_galaxy;9;0.9723399319001489;0.9621859070430977;23654 -690_visualizing_galaxy;10;0.9730768455209553;0.9679069669256528;11284 +690_visualizing_galaxy;1;0.9686209956213531;0.9670061231589103;29802 +690_visualizing_galaxy;2;0.9683175555375249;0.9625924665977439;22118 +690_visualizing_galaxy;3;0.9715457930361828;0.9605940090530395;860 +690_visualizing_galaxy;4;0.9719111294690328;0.9435767220139748;15795 +690_visualizing_galaxy;5;0.9730243298381769;0.9703149463405523;21575 +690_visualizing_galaxy;6;0.9617289298869817;0.96884985018978;5390 +690_visualizing_galaxy;7;0.9666790041592244;0.9700733847808086;11964 +690_visualizing_galaxy;8;0.9707683183934963;0.9625978775015374;6265 +690_visualizing_galaxy;9;0.9721491607808469;0.9662219340918892;23654 +690_visualizing_galaxy;10;0.9700803657416998;0.9680733344575125;11284 diff --git a/bench/results/695_chatfield_4.csv b/bench/results/695_chatfield_4.csv index 4c64f26..4d5d272 100644 --- a/bench/results/695_chatfield_4.csv +++ b/bench/results/695_chatfield_4.csv @@ -1,11 +1,11 @@ problem;trial;r2_train;r2_test;seed -695_chatfield_4;1;0.889338638638973;0.6712876868398956;29802 -695_chatfield_4;2;0.8891385193444622;0.8133318073857972;22118 -695_chatfield_4;3;0.8959330470201513;0.8638920226305525;860 -695_chatfield_4;4;0.9069733951182516;0.8341736360417948;15795 -695_chatfield_4;5;0.8901567134570865;0.8347789240019021;21575 -695_chatfield_4;6;0.9028356641925952;0.8731786004110782;5390 -695_chatfield_4;7;0.911245466966881;0.8127943655466395;11964 -695_chatfield_4;8;0.9014491936361207;0.8046912640022696;6265 -695_chatfield_4;9;0.8997528967063544;0.8274768932184939;23654 -695_chatfield_4;10;0.8723962019509058;0.9389690536101603;11284 +695_chatfield_4;1;0.8941977653741806;0.8883597198197268;29802 +695_chatfield_4;2;0.8882091877186941;0.8739422734487662;22118 +695_chatfield_4;3;0.8894303323652747;0.8609088344955279;860 +695_chatfield_4;4;0.9062329318929632;0.8347461658640825;15795 +695_chatfield_4;5;0.8976470974404709;0.8355215466854498;21575 +695_chatfield_4;6;0.8895321603737939;0.8820638704604157;5390 +695_chatfield_4;7;0.9122410532591531;0.8473693769788438;11964 +695_chatfield_4;8;0.9094944091510064;0.7979131371584133;6265 +695_chatfield_4;9;0.9008635547395554;0.8431840596120737;23654 +695_chatfield_4;10;0.8746773899204544;0.9095994717250626;11284 diff --git a/bench/results/706_sleuth_case1202.csv b/bench/results/706_sleuth_case1202.csv index 80f2d56..d32b28f 100644 --- a/bench/results/706_sleuth_case1202.csv +++ b/bench/results/706_sleuth_case1202.csv @@ -1,11 +1,11 @@ problem;trial;r2_train;r2_test;seed -706_sleuth_case1202;1;0.8708780145975402;0.5029341366457823;29802 -706_sleuth_case1202;2;0.8702861404017129;-0.714353254344172;22118 -706_sleuth_case1202;3;0.8632509557029328;0.42458447563006685;860 -706_sleuth_case1202;4;0.851712602605018;0.7237397749860872;15795 -706_sleuth_case1202;5;0.8429490129179875;0.41317059547370194;21575 -706_sleuth_case1202;6;0.8531468568283646;0.6333342844981529;5390 -706_sleuth_case1202;7;0.8831230710960768;0.1465440420940225;11964 -706_sleuth_case1202;8;0.8651726914712707;0.7335129112746571;6265 -706_sleuth_case1202;9;0.8742773800571287;0.6741149410469701;23654 -706_sleuth_case1202;10;0.8293451383786524;0.6500547037868788;11284 +706_sleuth_case1202;1;0.872780569380189;0.5933428865833363;29802 +706_sleuth_case1202;2;0.8895487175691678;-0.694852576867028;22118 +706_sleuth_case1202;3;0.8890023819978435;0.7564937379435485;860 +706_sleuth_case1202;4;0.8747620877275443;0.6792072113225716;15795 +706_sleuth_case1202;5;0.8732040788366771;0.5581729635294904;21575 +706_sleuth_case1202;6;0.8711702842978113;0.5872482245860058;5390 +706_sleuth_case1202;7;0.9314755325822177;0.35771697114132417;11964 +706_sleuth_case1202;8;0.8670877618235682;0.6895766946647329;6265 +706_sleuth_case1202;9;0.8873340634211149;0.4157069242564825;23654 +706_sleuth_case1202;10;0.8536867097875782;0.4081863608187326;11284 diff --git a/bench/results/712_chscase_geyser1.csv b/bench/results/712_chscase_geyser1.csv index f743130..bf73b00 100644 --- a/bench/results/712_chscase_geyser1.csv +++ b/bench/results/712_chscase_geyser1.csv @@ -1,11 +1,11 @@ problem;trial;r2_train;r2_test;seed -712_chscase_geyser1;1;0.7705871248788703;0.7980513073160329;29802 -712_chscase_geyser1;2;0.7988591117820882;0.741365218709072;22118 -712_chscase_geyser1;3;0.8163163888854488;0.6996943904013326;860 -712_chscase_geyser1;4;0.7851525852634501;0.8373410230442351;15795 -712_chscase_geyser1;5;0.8003797222022638;0.7397483767337463;21575 -712_chscase_geyser1;6;0.8205183180253184;0.6446513590574385;5390 -712_chscase_geyser1;7;0.7934556678038416;0.7692677908326963;11964 -712_chscase_geyser1;8;0.7999815825779817;0.7498444979754404;6265 -712_chscase_geyser1;9;0.8112813593889564;0.7317810373468396;23654 -712_chscase_geyser1;10;0.7775883935888415;0.8026894753234743;11284 +712_chscase_geyser1;1;0.7773192392477157;0.8123381321666208;29802 +712_chscase_geyser1;2;0.8049561872706668;0.7484151014696171;22118 +712_chscase_geyser1;3;0.8181890338664037;0.7197434595427028;860 +712_chscase_geyser1;4;0.7875954672463915;0.8132289640461307;15795 +712_chscase_geyser1;5;0.8003855990344403;0.7389572396700299;21575 +712_chscase_geyser1;6;0.8182920050431579;0.6468466329800981;5390 +712_chscase_geyser1;7;0.7946654592986703;0.7796703646555044;11964 +712_chscase_geyser1;8;0.8035598158759186;0.7509546960039325;6265 +712_chscase_geyser1;9;0.8308004069799495;0.6946371831894398;23654 +712_chscase_geyser1;10;0.7843964725366162;0.7894675563975533;11284 diff --git a/bench/results/alpinegp-blackbox_results.csv b/bench/results/alpinegp-blackbox_results.csv index 9f65e97..2e1e294 100644 --- a/bench/results/alpinegp-blackbox_results.csv +++ b/bench/results/alpinegp-blackbox_results.csv @@ -1,381 +1,381 @@ dataset,trial,r2_train,r2_zero_test,seed,algorithm -678_visualizing_environmental,1,0.5884564006241517,-0.2348037422701108,29802,AlpineGP -678_visualizing_environmental,2,0.4841552465805769,0.3842110530561066,22118,AlpineGP -678_visualizing_environmental,3,0.5435252560116295,0.2005201343961526,860,AlpineGP -678_visualizing_environmental,4,0.5299229740381244,0.1219416969988882,15795,AlpineGP -678_visualizing_environmental,5,0.5551504264560185,-0.3712208383149671,21575,AlpineGP -678_visualizing_environmental,6,0.595332876812991,-0.6583139042777237,5390,AlpineGP -678_visualizing_environmental,7,0.5473903713630541,0.4109713932589413,11964,AlpineGP -678_visualizing_environmental,8,0.5404951931756969,0.1865077963158686,6265,AlpineGP -678_visualizing_environmental,9,0.5460068888571925,0.2661496903961954,23654,AlpineGP -678_visualizing_environmental,10,0.4883595042931925,0.2980522330713344,11284,AlpineGP -687_sleuth_ex1605,1,0.8495225517226452,-0.2090707342376987,29802,AlpineGP -687_sleuth_ex1605,2,0.8302934704669824,-0.0197672273349878,22118,AlpineGP -687_sleuth_ex1605,3,0.8554387292430528,-0.5101712102891569,860,AlpineGP -687_sleuth_ex1605,4,0.8851763491357718,-0.0747590644965356,15795,AlpineGP -687_sleuth_ex1605,5,0.8551501509727353,0.4647904504020848,21575,AlpineGP -687_sleuth_ex1605,6,0.82751098311587,0.0390471816270193,5390,AlpineGP -687_sleuth_ex1605,7,0.8497680167066046,-0.6331928938854514,11964,AlpineGP -687_sleuth_ex1605,8,0.8386440179859521,-0.0733182612052583,6265,AlpineGP -687_sleuth_ex1605,9,0.8490793057505749,-0.2440552282767234,23654,AlpineGP -687_sleuth_ex1605,10,0.8136200535218876,0.5532496829644273,11284,AlpineGP -659_sleuth_ex1714,1,0.955631483663761,0.8588762691821581,29802,AlpineGP -659_sleuth_ex1714,2,0.9577368053775004,0.7076235219367734,22118,AlpineGP -659_sleuth_ex1714,3,0.9480302263728544,0.7856830058088728,860,AlpineGP -659_sleuth_ex1714,4,0.93783038009895,-0.0354926349723496,15795,AlpineGP -659_sleuth_ex1714,5,0.9429742878432266,0.6395754889429887,21575,AlpineGP -659_sleuth_ex1714,6,0.9565731658644198,0.7380147538082807,5390,AlpineGP -659_sleuth_ex1714,7,0.9522519738523144,0.3458798306357928,11964,AlpineGP -659_sleuth_ex1714,8,0.9655504465392534,0.697231851315812,6265,AlpineGP -659_sleuth_ex1714,9,0.9674817580761292,0.1141787028419917,23654,AlpineGP -659_sleuth_ex1714,10,0.950989782949168,0.769886517145795,11284,AlpineGP -561_cpu,1,0.9889874897385084,0.9875416354431112,29802,AlpineGP -561_cpu,2,0.9924563172967208,0.9396453485837796,22118,AlpineGP -561_cpu,3,0.993059147846632,0.983176359357356,860,AlpineGP -561_cpu,4,0.9893079920516502,0.9888341331548184,15795,AlpineGP -561_cpu,5,0.9935164587177504,0.9505509198754007,21575,AlpineGP -561_cpu,6,0.9955894748121412,0.918160058340207,5390,AlpineGP -561_cpu,7,0.9876294231496052,0.8900879100469693,11964,AlpineGP -561_cpu,8,0.9935519611884656,0.9724056733619404,6265,AlpineGP -561_cpu,9,0.994669136255436,0.9619172509418948,23654,AlpineGP -561_cpu,10,0.9921256198282832,0.98116819311708,11284,AlpineGP -529_pollen,1,0.7936416714066328,0.7762755533706083,29802,AlpineGP -529_pollen,2,0.796441760348724,0.7858222762585879,22118,AlpineGP -529_pollen,3,0.7991947939416362,0.7785509246582917,860,AlpineGP -529_pollen,4,0.7932467920271737,0.796104280090784,15795,AlpineGP -529_pollen,5,0.7842935305604609,0.803699353717669,21575,AlpineGP +678_visualizing_environmental,1,0.5972132050982807,-0.2691281646090735,29802,AlpineGP +678_visualizing_environmental,2,0.5451590348898154,0.2971131331531793,22118,AlpineGP +678_visualizing_environmental,3,0.56902138448535,0.2301358307871751,860,AlpineGP +678_visualizing_environmental,4,0.5898295572207202,0.0343088465083462,15795,AlpineGP +678_visualizing_environmental,5,0.574319633056,0.3381407715614479,21575,AlpineGP +678_visualizing_environmental,6,0.5914459084642444,-0.1593953452620324,5390,AlpineGP +678_visualizing_environmental,7,0.591023304454775,0.1194120841580458,11964,AlpineGP +678_visualizing_environmental,8,0.5686600697238989,0.2939150349167159,6265,AlpineGP +678_visualizing_environmental,9,0.57138752562582,0.0896520685658835,23654,AlpineGP +678_visualizing_environmental,10,0.5243190310745487,0.1040916484131313,11284,AlpineGP +687_sleuth_ex1605,1,0.8718270563850774,-0.6547755175162167,29802,AlpineGP +687_sleuth_ex1605,2,0.8531662032211784,-0.3565934974293276,22118,AlpineGP +687_sleuth_ex1605,3,0.8787897121574618,-0.562926370813355,860,AlpineGP +687_sleuth_ex1605,4,0.8795784345873178,0.229767594481516,15795,AlpineGP +687_sleuth_ex1605,5,0.8546839224319733,0.140500302927048,21575,AlpineGP +687_sleuth_ex1605,6,0.8330682490678795,0.1629988061234277,5390,AlpineGP +687_sleuth_ex1605,7,0.864090809061551,-0.0116245829337868,11964,AlpineGP +687_sleuth_ex1605,8,0.88971201597581,0.3297924594025772,6265,AlpineGP +687_sleuth_ex1605,9,0.8424499635366642,0.4202492604242993,23654,AlpineGP +687_sleuth_ex1605,10,0.7744674839492438,0.3815543302103805,11284,AlpineGP +659_sleuth_ex1714,1,0.9609782379126176,0.7738547024385989,29802,AlpineGP +659_sleuth_ex1714,2,0.969242709954809,0.6401645902839946,22118,AlpineGP +659_sleuth_ex1714,3,0.9643327522367208,0.5176917760048607,860,AlpineGP +659_sleuth_ex1714,4,0.9456569263739504,0.1232782768054056,15795,AlpineGP +659_sleuth_ex1714,5,0.9531173883640386,0.8926118961732403,21575,AlpineGP +659_sleuth_ex1714,6,0.9616041132706628,0.6870470468283217,5390,AlpineGP +659_sleuth_ex1714,7,0.9477206350870038,0.1938125960829673,11964,AlpineGP +659_sleuth_ex1714,8,0.9731525673698604,0.5402913285930655,6265,AlpineGP +659_sleuth_ex1714,9,0.9771698947506776,0.5814105342091516,23654,AlpineGP +659_sleuth_ex1714,10,0.9507390534943636,0.8233435932311819,11284,AlpineGP +561_cpu,1,0.9939434872378528,0.9909580678683556,29802,AlpineGP +561_cpu,2,0.9927063636282192,0.9841059476228268,22118,AlpineGP +561_cpu,3,0.9938280882903348,0.9789681103137008,860,AlpineGP +561_cpu,4,0.9960421650522948,0.9934299494920014,15795,AlpineGP +561_cpu,5,0.9945652852459992,0.9633380989778488,21575,AlpineGP +561_cpu,6,0.989688483747185,0.955603115137449,5390,AlpineGP +561_cpu,7,0.9899779973992744,0.9831773412565956,11964,AlpineGP +561_cpu,8,0.9928190414632616,0.9482278058668812,6265,AlpineGP +561_cpu,9,0.9902802892301434,0.9779337464591602,23654,AlpineGP +561_cpu,10,0.9894021678274892,0.991181823498297,11284,AlpineGP +529_pollen,1,0.7936416714066328,0.7762755533706103,29802,AlpineGP +529_pollen,2,0.7964417603487056,0.7858222739606016,22118,AlpineGP +529_pollen,3,0.7991947939416348,0.7785509261564657,860,AlpineGP +529_pollen,4,0.7932467920271723,0.7961042799605124,15795,AlpineGP +529_pollen,5,0.7842935305604609,0.803699353603365,21575,AlpineGP 529_pollen,6,0.7939167974013728,0.7750304204729883,5390,AlpineGP -529_pollen,7,0.7986415668983129,0.7788932489809886,11964,AlpineGP -529_pollen,8,0.7929956829937825,0.7969014128961736,6265,AlpineGP -529_pollen,9,0.7930964536245204,0.7764647903809114,23654,AlpineGP -529_pollen,10,0.7903534660577021,0.8044480932738134,11284,AlpineGP -503_wind,1,0.7588522944498123,0.7407479193988407,29802,AlpineGP -503_wind,2,0.7396776236929041,0.7682653284848018,22118,AlpineGP -503_wind,3,0.7615843309597021,0.7410864079085691,860,AlpineGP -503_wind,4,0.7486995174805298,0.7419028061049862,15795,AlpineGP -503_wind,5,0.7566183637744479,0.7509473874232699,21575,AlpineGP -503_wind,6,0.7478737982529451,0.744456502771982,5390,AlpineGP -503_wind,7,0.7518204636981045,0.7517045259363225,11964,AlpineGP -503_wind,8,0.7485226285602495,0.7370083668827148,6265,AlpineGP -503_wind,9,0.7499772956159411,0.7494742142933108,23654,AlpineGP -503_wind,10,0.7641226246553331,0.7471169274375125,11284,AlpineGP -1029_LEV,1,0.5879780782214657,0.5204583881052793,29802,AlpineGP -1029_LEV,2,0.5789393164986216,0.5676559936258676,22118,AlpineGP -1029_LEV,3,0.5654880596620051,0.6014544173446551,860,AlpineGP -1029_LEV,4,0.6009750767452274,0.5121487884369427,15795,AlpineGP -1029_LEV,5,0.5820556772161638,0.5630880040408148,21575,AlpineGP -1029_LEV,6,0.579662627460287,0.5580064249328927,5390,AlpineGP -1029_LEV,7,0.5628308404387818,0.6082523494360423,11964,AlpineGP -1029_LEV,8,0.5919320061434583,0.5239553108909693,6265,AlpineGP -1029_LEV,9,0.5869421867148492,0.5407588303901149,23654,AlpineGP -1029_LEV,10,0.5610457958242968,0.5759109213194484,11284,AlpineGP -522_pm10,1,0.3794629609652548,0.2302621747412587,29802,AlpineGP -522_pm10,2,0.3570901484992305,0.1746975453453544,22118,AlpineGP -522_pm10,3,0.3858768939919651,0.2208994195578879,860,AlpineGP -522_pm10,4,0.4076898951263585,0.2175177323761076,15795,AlpineGP -522_pm10,5,0.3697672977729319,0.1711350139037831,21575,AlpineGP -522_pm10,6,0.3428926408914911,0.2687256805922681,5390,AlpineGP -522_pm10,7,0.2977757786654729,0.3227886923695936,11964,AlpineGP -522_pm10,8,0.3817850408338142,0.235955646731105,6265,AlpineGP -522_pm10,9,0.371908834231157,0.2523828215487343,23654,AlpineGP -522_pm10,10,0.3409559374024058,0.256704861294187,11284,AlpineGP -542_pollution,1,0.8843090161282399,0.0560705303880946,29802,AlpineGP -542_pollution,2,0.848075985418739,0.3281553390994669,22118,AlpineGP -542_pollution,3,0.8658478989803343,-0.083211631701771,860,AlpineGP -542_pollution,4,0.8558789844642788,0.4006788617242786,15795,AlpineGP -542_pollution,5,0.8294760057362275,0.4256112830204646,21575,AlpineGP -542_pollution,6,0.8225265855736608,-0.2164999401404657,5390,AlpineGP -542_pollution,7,0.8307172638181622,0.3085715201318613,11964,AlpineGP -542_pollution,8,0.8524654882645428,0.4170939452042327,6265,AlpineGP -542_pollution,9,0.8546598719974106,0.2500854971061002,23654,AlpineGP -542_pollution,10,0.8935182596205669,-0.1856432520588535,11284,AlpineGP -1027_ESL,1,0.8689338256921862,0.8499074519865497,29802,AlpineGP -1027_ESL,2,0.867758019976077,0.8551105370726506,22118,AlpineGP -1027_ESL,3,0.8674199139497978,0.8649631203850897,860,AlpineGP -1027_ESL,4,0.8633000304601383,0.8785615546171678,15795,AlpineGP -1027_ESL,5,0.8698221273060668,0.8600147256664417,21575,AlpineGP -1027_ESL,6,0.8696495006874415,0.8514352495870234,5390,AlpineGP +529_pollen,7,0.798641566896627,0.7788932670570133,11964,AlpineGP +529_pollen,8,0.7929956828151925,0.7969016425934032,6265,AlpineGP +529_pollen,9,0.7930964536245204,0.7764647903809226,23654,AlpineGP +529_pollen,10,0.7903534660554659,0.8044481477208689,11284,AlpineGP +503_wind,1,0.7658257110596496,0.7513438937058385,29802,AlpineGP +503_wind,2,0.7512686937236537,0.7803039547604709,22118,AlpineGP +503_wind,3,0.7685394489029556,0.7539174994433234,860,AlpineGP +503_wind,4,0.7529862535285444,0.7465336965191283,15795,AlpineGP +503_wind,5,0.7543560769127313,0.7466733117882052,21575,AlpineGP +503_wind,6,0.7682431860933203,0.7608158068041664,5390,AlpineGP +503_wind,7,0.7677307665246877,0.7657598529325538,11964,AlpineGP +503_wind,8,0.7545945506267047,0.7440009171727796,6265,AlpineGP +503_wind,9,0.7654443094871098,0.7552019874033794,23654,AlpineGP +503_wind,10,0.7718736265175183,0.7514531599518441,11284,AlpineGP +1029_LEV,1,0.5844889767108543,0.5241174789850113,29802,AlpineGP +1029_LEV,2,0.5755651903150614,0.549427209698127,22118,AlpineGP +1029_LEV,3,0.566429372563344,0.6028440243460665,860,AlpineGP +1029_LEV,4,0.6004406005484593,0.508368018104046,15795,AlpineGP +1029_LEV,5,0.5862117839676109,0.5581402773171422,21575,AlpineGP +1029_LEV,6,0.5816449064797491,0.5569798462643671,5390,AlpineGP +1029_LEV,7,0.5606708615537527,0.6073771024576097,11964,AlpineGP +1029_LEV,8,0.5942136638927331,0.5204119748868217,6265,AlpineGP +1029_LEV,9,0.5866779188098024,0.5442624434496006,23654,AlpineGP +1029_LEV,10,0.5647757142106716,0.6027015429520087,11284,AlpineGP +522_pm10,1,0.3916341590588258,0.1720984810796801,29802,AlpineGP +522_pm10,2,0.3579296162256162,0.3039787110261882,22118,AlpineGP +522_pm10,3,0.4056678696094712,0.1932359191383529,860,AlpineGP +522_pm10,4,0.4328016536255463,0.1816551573427727,15795,AlpineGP +522_pm10,5,0.3645112057778576,0.2521331139314824,21575,AlpineGP +522_pm10,6,0.3837705593480236,0.2991405453755613,5390,AlpineGP +522_pm10,7,0.3616807553785824,0.2292153742058889,11964,AlpineGP +522_pm10,8,0.3929878124918866,0.2154492531880333,6265,AlpineGP +522_pm10,9,0.3805038157810552,0.2684606306177675,23654,AlpineGP +522_pm10,10,0.356206203126046,0.3130639824985683,11284,AlpineGP +542_pollution,1,0.8958123319267871,0.2467419921731796,29802,AlpineGP +542_pollution,2,0.8553987122384332,0.0668106054612284,22118,AlpineGP +542_pollution,3,0.901749416217652,-1.0646079898145016,860,AlpineGP +542_pollution,4,0.8212024550371452,0.5712230275066958,15795,AlpineGP +542_pollution,5,0.8308810750601717,0.4766039910688799,21575,AlpineGP +542_pollution,6,0.859618323819151,-6.3823947444965325,5390,AlpineGP +542_pollution,7,0.852300513461618,0.6079421810351266,11964,AlpineGP +542_pollution,8,0.8868310026363309,0.2580559082696334,6265,AlpineGP +542_pollution,9,0.884318021505661,0.4825525454787056,23654,AlpineGP +542_pollution,10,0.9217287073607584,-0.3652033825026302,11284,AlpineGP +1027_ESL,1,0.8748909209247202,0.8555242762139903,29802,AlpineGP +1027_ESL,2,0.8727993056735073,0.8687545863990538,22118,AlpineGP +1027_ESL,3,0.8655788225418036,0.8922231312905544,860,AlpineGP +1027_ESL,4,0.867303061983018,0.8916465151833094,15795,AlpineGP +1027_ESL,5,0.8736669432607488,0.8273969888641126,21575,AlpineGP +1027_ESL,6,0.8730603777627965,0.8521808175811904,5390,AlpineGP 1027_ESL,7,0.8736255583112233,0.8612788857378055,11964,AlpineGP -1027_ESL,8,0.8693744204394314,0.8642829832699518,6265,AlpineGP -1027_ESL,9,0.8677081484872072,0.8706866048077533,23654,AlpineGP -1027_ESL,10,0.8826240353573471,0.8321416684717158,11284,AlpineGP -1028_SWD,1,0.4197473636309708,0.385911519442995,29802,AlpineGP -1028_SWD,2,0.4517804313326744,0.2690526640849469,22118,AlpineGP -1028_SWD,3,0.4089636958875564,0.4052920591654889,860,AlpineGP -1028_SWD,4,0.4261931320979418,0.344849508584405,15795,AlpineGP -1028_SWD,5,0.4329363035034996,0.293534364945674,21575,AlpineGP -1028_SWD,6,0.4248171541802424,0.3422139044206786,5390,AlpineGP -1028_SWD,7,0.4352997343612451,0.3155602583077638,11964,AlpineGP -1028_SWD,8,0.4510907143308069,0.2416336408729218,6265,AlpineGP -1028_SWD,9,0.411954827512946,0.4035198144230855,23654,AlpineGP -1028_SWD,10,0.418813803643532,0.3540248296575976,11284,AlpineGP -695_chatfield_4,1,0.889338638638973,0.6712876868398956,29802,AlpineGP -695_chatfield_4,2,0.8891385193444622,0.8133318073857972,22118,AlpineGP -695_chatfield_4,3,0.8959330470201513,0.8638920226305525,860,AlpineGP -695_chatfield_4,4,0.9069733951182516,0.8341736360417948,15795,AlpineGP -695_chatfield_4,5,0.8901567134570865,0.8347789240019021,21575,AlpineGP -695_chatfield_4,6,0.9028356641925952,0.8731786004110782,5390,AlpineGP -695_chatfield_4,7,0.911245466966881,0.8127943655466395,11964,AlpineGP -695_chatfield_4,8,0.9014491936361207,0.8046912640022696,6265,AlpineGP -695_chatfield_4,9,0.8997528967063544,0.8274768932184939,23654,AlpineGP -695_chatfield_4,10,0.8723962019509058,0.9389690536101604,11284,AlpineGP -225_puma8NH,1,0.6705324281799827,0.6469760131302198,29802,AlpineGP -225_puma8NH,2,0.6529416250592445,0.6648667830014574,22118,AlpineGP -225_puma8NH,3,0.668872418199597,0.6691440462049962,860,AlpineGP -225_puma8NH,4,0.6698248185101057,0.669578706012151,15795,AlpineGP -225_puma8NH,5,0.6806162807788343,0.6962678620000038,21575,AlpineGP -225_puma8NH,6,0.6557342826799404,0.6791402248205318,5390,AlpineGP -225_puma8NH,7,0.6705031948803878,0.668675527501577,11964,AlpineGP -225_puma8NH,8,0.6696418552266217,0.6668672891980192,6265,AlpineGP -225_puma8NH,9,0.6809417218834979,0.6641877090939536,23654,AlpineGP -225_puma8NH,10,0.6778308230365331,0.6596977025598376,11284,AlpineGP -227_cpu_small,1,0.9540370888709728,0.9609058793159982,29802,AlpineGP -227_cpu_small,2,0.948856268740396,0.9436280757904488,22118,AlpineGP -227_cpu_small,3,0.9504429109686964,0.9520600530975872,860,AlpineGP -227_cpu_small,4,0.9461015247186404,0.9447682989399492,15795,AlpineGP -227_cpu_small,5,0.9533298056263266,0.9529900399919128,21575,AlpineGP -227_cpu_small,6,0.947372523439876,0.9502877717844496,5390,AlpineGP -227_cpu_small,7,0.9482536236025996,0.939552292545259,11964,AlpineGP -227_cpu_small,8,0.9457544827848126,0.9454205410011708,6265,AlpineGP -227_cpu_small,9,0.9455531784001469,0.9339190396781905,23654,AlpineGP -227_cpu_small,10,0.9476693676687382,0.9374238976803586,11284,AlpineGP -229_pwLinear,1,0.8866837244005004,0.8437360911399274,29802,AlpineGP -229_pwLinear,2,0.8651340740201751,0.8154600805428582,22118,AlpineGP -229_pwLinear,3,0.8780570695512598,0.8521667614863495,860,AlpineGP -229_pwLinear,4,0.8692410221184963,0.7966514010453299,15795,AlpineGP -229_pwLinear,5,0.871139368125647,0.8615298223960618,21575,AlpineGP -229_pwLinear,6,0.8476702830738368,0.7375750682294989,5390,AlpineGP -229_pwLinear,7,0.8688776993339421,0.8070841755676031,11964,AlpineGP -229_pwLinear,8,0.8700464875140115,0.8526734480098382,6265,AlpineGP -229_pwLinear,9,0.8761042568651296,0.8079745580972737,23654,AlpineGP -229_pwLinear,10,0.8622494412617301,0.7345898519017362,11284,AlpineGP -712_chscase_geyser1,1,0.7705871248788703,0.7980513073160329,29802,AlpineGP -712_chscase_geyser1,2,0.7988591117820882,0.741365218709072,22118,AlpineGP -712_chscase_geyser1,3,0.8163163888854488,0.6996943904013326,860,AlpineGP -712_chscase_geyser1,4,0.7851525852634501,0.8373410230442351,15795,AlpineGP -712_chscase_geyser1,5,0.8003797222022638,0.7397483767337463,21575,AlpineGP -712_chscase_geyser1,6,0.8205183180253184,0.6446513590574385,5390,AlpineGP -712_chscase_geyser1,7,0.7934556678038416,0.7692677908326963,11964,AlpineGP -712_chscase_geyser1,8,0.7999815825779817,0.7498444979754404,6265,AlpineGP -712_chscase_geyser1,9,0.8112813593889564,0.7317810373468396,23654,AlpineGP -712_chscase_geyser1,10,0.7775883935888415,0.8026894753234743,11284,AlpineGP -547_no2,1,0.5967818808233845,0.5117018659094545,29802,AlpineGP -547_no2,2,0.5918477650269622,0.4852779336584608,22118,AlpineGP -547_no2,3,0.566715054474527,0.5593972629913926,860,AlpineGP -547_no2,4,0.6062002123888404,0.4718711968127075,15795,AlpineGP -547_no2,5,0.5632592952088873,0.5926921185615248,21575,AlpineGP -547_no2,6,0.6071457437094566,0.2855862060309705,5390,AlpineGP -547_no2,7,0.6256525259080508,0.4738794254867927,11964,AlpineGP -547_no2,8,0.5361250034663553,0.6003916764510544,6265,AlpineGP -547_no2,9,0.5667133241764515,0.5811427774008566,23654,AlpineGP -547_no2,10,0.5982345252966745,0.4942635575768614,11284,AlpineGP -1096_FacultySalaries,1,0.9575028902765572,0.8184481267043824,29802,AlpineGP -1096_FacultySalaries,2,0.9568076672378056,0.917434133842035,22118,AlpineGP -1096_FacultySalaries,3,0.9669940054640628,0.8934372879712575,860,AlpineGP -1096_FacultySalaries,4,0.9996757503744214,0.1871532006638934,15795,AlpineGP -1096_FacultySalaries,5,0.9515616447901358,0.8945706203228898,21575,AlpineGP -1096_FacultySalaries,6,0.9981737422612984,-0.6885712121152572,5390,AlpineGP -1096_FacultySalaries,7,0.9795965157536484,0.8997591808890962,11964,AlpineGP -1096_FacultySalaries,8,0.977426037817008,0.8391699685985927,6265,AlpineGP -1096_FacultySalaries,9,0.9684482341572984,0.9618854615626562,23654,AlpineGP -1096_FacultySalaries,10,0.963343554614357,0.8986187469236837,11284,AlpineGP -666_rmftsa_ladata,1,0.635795536254633,0.8052730185834623,29802,AlpineGP -666_rmftsa_ladata,2,0.7122424163451371,0.6858247330526992,22118,AlpineGP -666_rmftsa_ladata,3,0.7047980560565774,0.6622455790067168,860,AlpineGP -666_rmftsa_ladata,4,0.6495189621052757,0.7532232508498939,15795,AlpineGP -666_rmftsa_ladata,5,0.6685725509721406,0.5796774291602063,21575,AlpineGP -666_rmftsa_ladata,6,0.6901354461007139,0.6520663656889047,5390,AlpineGP -666_rmftsa_ladata,7,0.6862850333194601,0.6737574687112443,11964,AlpineGP -666_rmftsa_ladata,8,0.6647868770036494,0.681951154525291,6265,AlpineGP -666_rmftsa_ladata,9,0.71057324802021,0.6708551649199037,23654,AlpineGP -666_rmftsa_ladata,10,0.6745481781678346,0.6323073462147712,11284,AlpineGP -192_vineyard,1,0.8758445698235056,0.2236397944668479,29802,AlpineGP -192_vineyard,2,0.858616424851334,0.132710605259569,22118,AlpineGP -192_vineyard,3,0.8537137523938062,0.4780805511251951,860,AlpineGP -192_vineyard,4,0.8692894908232239,0.3758608681462412,15795,AlpineGP -192_vineyard,5,0.87608336889845,0.4455659090090176,21575,AlpineGP -192_vineyard,6,0.8776088928103271,0.3460864519849878,5390,AlpineGP -192_vineyard,7,0.8384373653693135,0.7195907291058308,11964,AlpineGP -192_vineyard,8,0.8223940400933747,0.0860714281735964,6265,AlpineGP -192_vineyard,9,0.8425098503334608,0.6264516080030693,23654,AlpineGP -192_vineyard,10,0.8929706868907198,0.3844996746903096,11284,AlpineGP -519_vinnie,1,0.778064332036953,0.6748797138263742,29802,AlpineGP -519_vinnie,2,0.7670949723082622,0.7011612077288334,22118,AlpineGP -519_vinnie,3,0.7404644581023311,0.7810698905726365,860,AlpineGP -519_vinnie,4,0.7557205123459001,0.7422420153961651,15795,AlpineGP -519_vinnie,5,0.7713875709908657,0.6900299487722288,21575,AlpineGP -519_vinnie,6,0.771845486331053,0.7108348972477119,5390,AlpineGP -519_vinnie,7,0.7572075674163686,0.7147332995367801,11964,AlpineGP -519_vinnie,8,0.7429626211025838,0.7790843508176507,6265,AlpineGP -519_vinnie,9,0.7490936656635323,0.769534352985476,23654,AlpineGP -519_vinnie,10,0.7635739787535201,0.7251627064505871,11284,AlpineGP -527_analcatdata_election2000,1,0.999144300466917,0.9992466998776212,29802,AlpineGP -527_analcatdata_election2000,2,0.9990232530092272,0.999900107523682,22118,AlpineGP -527_analcatdata_election2000,3,0.999827717483624,0.998066362960046,860,AlpineGP -527_analcatdata_election2000,4,0.9991430940015354,0.9993349896951212,15795,AlpineGP -527_analcatdata_election2000,5,0.9990466772472578,0.99929962636362,21575,AlpineGP -527_analcatdata_election2000,6,0.9997656762303844,0.9945511921408846,5390,AlpineGP -527_analcatdata_election2000,7,0.9991289509891338,0.999578298400491,11964,AlpineGP -527_analcatdata_election2000,8,0.9969921980601382,0.9992001493211,6265,AlpineGP +1027_ESL,8,0.8694653006180479,0.8643633497900758,6265,AlpineGP +1027_ESL,9,0.871058107389737,0.858477048277418,23654,AlpineGP +1027_ESL,10,0.8800564058940745,0.8278137293745264,11284,AlpineGP +1028_SWD,1,0.4373125219729864,0.3591733546209136,29802,AlpineGP +1028_SWD,2,0.4506784905080143,0.2773401843687237,22118,AlpineGP +1028_SWD,3,0.432540658536116,0.396406046001697,860,AlpineGP +1028_SWD,4,0.4383346360723498,0.3553925287309045,15795,AlpineGP +1028_SWD,5,0.4351837209211017,0.3435624926999375,21575,AlpineGP +1028_SWD,6,0.4507777746645741,0.331246467426907,5390,AlpineGP +1028_SWD,7,0.4577456501898753,0.3109047979904143,11964,AlpineGP +1028_SWD,8,0.4623140825381532,0.2911353301464765,6265,AlpineGP +1028_SWD,9,0.41985780125866,0.3945376646322808,23654,AlpineGP +1028_SWD,10,0.4362368631680913,0.335589031365974,11284,AlpineGP +695_chatfield_4,1,0.8941977653741806,0.8883597198197268,29802,AlpineGP +695_chatfield_4,2,0.8882091877186941,0.8739422734487662,22118,AlpineGP +695_chatfield_4,3,0.8894303323652747,0.8609088344955279,860,AlpineGP +695_chatfield_4,4,0.9062329318929632,0.8347461658640825,15795,AlpineGP +695_chatfield_4,5,0.8976470974404709,0.8355215466854498,21575,AlpineGP +695_chatfield_4,6,0.8895321603737939,0.8820638704604157,5390,AlpineGP +695_chatfield_4,7,0.9122410532591532,0.8473693769788438,11964,AlpineGP +695_chatfield_4,8,0.9094944091510064,0.7979131371584133,6265,AlpineGP +695_chatfield_4,9,0.9008635547395554,0.8431840596120737,23654,AlpineGP +695_chatfield_4,10,0.8746773899204544,0.9095994717250626,11284,AlpineGP +225_puma8NH,1,0.682850204435814,0.6789211747963135,29802,AlpineGP +225_puma8NH,2,0.6754836444737324,0.6930220241212716,22118,AlpineGP +225_puma8NH,3,0.6761584240959573,0.6747595339118583,860,AlpineGP +225_puma8NH,4,0.6685405891987423,0.6638774570350612,15795,AlpineGP +225_puma8NH,5,0.6778781214915794,0.692070342886109,21575,AlpineGP +225_puma8NH,6,0.6753263424574281,0.6922572411975378,5390,AlpineGP +225_puma8NH,7,0.6209830017302066,0.6273467897767163,11964,AlpineGP +225_puma8NH,8,0.6777014266663415,0.6799644712287625,6265,AlpineGP +225_puma8NH,9,0.6822693621746627,0.666476328548008,23654,AlpineGP +225_puma8NH,10,0.6814646567138007,0.6671700746406766,11284,AlpineGP +227_cpu_small,1,0.951145941207256,0.9560957918072144,29802,AlpineGP +227_cpu_small,2,0.9567873549993458,0.9540203418129014,22118,AlpineGP +227_cpu_small,3,0.953209617799769,0.952961719048116,860,AlpineGP +227_cpu_small,4,0.9547184045361404,0.9564869218621034,15795,AlpineGP +227_cpu_small,5,0.9522737226764634,0.9530777748357576,21575,AlpineGP +227_cpu_small,6,0.9496651602539978,0.9526532237472574,5390,AlpineGP +227_cpu_small,7,0.9480917341278418,0.9394283620886285,11964,AlpineGP +227_cpu_small,8,0.9569652130042856,0.957357703480119,6265,AlpineGP +227_cpu_small,9,0.9578391420110456,0.9528086472247772,23654,AlpineGP +227_cpu_small,10,0.940840748921228,0.9244148967255909,11284,AlpineGP +229_pwLinear,1,0.887097489437163,0.8468543043562392,29802,AlpineGP +229_pwLinear,2,0.8968688038944086,0.8903898549781959,22118,AlpineGP +229_pwLinear,3,0.898360896148988,0.8861303015560574,860,AlpineGP +229_pwLinear,4,0.9083144011127212,0.8661698780989783,15795,AlpineGP +229_pwLinear,5,0.910826924190414,0.8685153004743829,21575,AlpineGP +229_pwLinear,6,0.8967604755237106,0.8618639847110794,5390,AlpineGP +229_pwLinear,7,0.908072954770001,0.8262182908899607,11964,AlpineGP +229_pwLinear,8,0.9131158810203344,0.8696165164408076,6265,AlpineGP +229_pwLinear,9,0.8940989954069053,0.8593211470909712,23654,AlpineGP +229_pwLinear,10,0.8996144256003755,0.7904577840031168,11284,AlpineGP +712_chscase_geyser1,1,0.7773192392477157,0.8123381321666208,29802,AlpineGP +712_chscase_geyser1,2,0.8049561872706668,0.7484151014696171,22118,AlpineGP +712_chscase_geyser1,3,0.8181890338664037,0.7197434595427028,860,AlpineGP +712_chscase_geyser1,4,0.7875954672463915,0.8132289640461307,15795,AlpineGP +712_chscase_geyser1,5,0.8003855990344403,0.7389572396700299,21575,AlpineGP +712_chscase_geyser1,6,0.8182920050431579,0.6468466329800981,5390,AlpineGP +712_chscase_geyser1,7,0.7946654592986703,0.7796703646555044,11964,AlpineGP +712_chscase_geyser1,8,0.8035598158759186,0.7509546960039325,6265,AlpineGP +712_chscase_geyser1,9,0.8308004069799495,0.6946371831894398,23654,AlpineGP +712_chscase_geyser1,10,0.7843964725366162,0.7894675563975533,11284,AlpineGP +547_no2,1,0.6027579041070649,0.4365809915545194,29802,AlpineGP +547_no2,2,0.6263829279922758,0.3742690727090351,22118,AlpineGP +547_no2,3,0.5773914505135069,0.5688552557706618,860,AlpineGP +547_no2,4,0.623561868307309,0.3786968263091799,15795,AlpineGP +547_no2,5,0.5708335374653255,0.6944555489309868,21575,AlpineGP +547_no2,6,0.6333658568772302,0.3930638879362218,5390,AlpineGP +547_no2,7,0.606079323420359,0.4181091331625227,11964,AlpineGP +547_no2,8,0.5933405445421667,0.5525235957536274,6265,AlpineGP +547_no2,9,0.5797021946144171,0.4658611076735708,23654,AlpineGP +547_no2,10,0.6157384872219682,0.4557987023033202,11284,AlpineGP +1096_FacultySalaries,1,0.9757811801086492,0.8468690082448429,29802,AlpineGP +1096_FacultySalaries,2,0.9747072184400876,0.5559189268086008,22118,AlpineGP +1096_FacultySalaries,3,0.9602495975861516,0.6315686809951979,860,AlpineGP +1096_FacultySalaries,4,0.9996757503744214,0.187153199478345,15795,AlpineGP +1096_FacultySalaries,5,0.9733547803551156,0.9447964356602097,21575,AlpineGP +1096_FacultySalaries,6,0.998173742238267,-0.6885706555756188,5390,AlpineGP +1096_FacultySalaries,7,0.9787889343295242,0.9232090362546188,11964,AlpineGP +1096_FacultySalaries,8,0.9648100352407598,0.8257600154636463,6265,AlpineGP +1096_FacultySalaries,9,0.9792443170894076,0.960647215075818,23654,AlpineGP +1096_FacultySalaries,10,0.9813331788413356,0.8844476997285342,11284,AlpineGP +666_rmftsa_ladata,1,0.6345104157800793,0.7344866942491635,29802,AlpineGP +666_rmftsa_ladata,2,0.6900416533782323,0.6627937668059906,22118,AlpineGP +666_rmftsa_ladata,3,0.7127135332110708,0.6613380696838017,860,AlpineGP +666_rmftsa_ladata,4,0.6789046076342398,0.6423861112380842,15795,AlpineGP +666_rmftsa_ladata,5,0.6881297780627693,0.7556658746141396,21575,AlpineGP +666_rmftsa_ladata,6,0.6762118758309784,0.6641734127855163,5390,AlpineGP +666_rmftsa_ladata,7,0.6937229663267199,0.7003993423348064,11964,AlpineGP +666_rmftsa_ladata,8,0.6988398491093812,0.7022461102188366,6265,AlpineGP +666_rmftsa_ladata,9,0.7243314580519724,0.6019749041902247,23654,AlpineGP +666_rmftsa_ladata,10,0.709078469774263,0.6597080468600234,11284,AlpineGP +192_vineyard,1,0.8708853934183917,-0.4248209450726521,29802,AlpineGP +192_vineyard,2,0.8569606516417794,-0.4289241524650009,22118,AlpineGP +192_vineyard,3,0.8829022683683329,0.0949843343295614,860,AlpineGP +192_vineyard,4,0.8956247954424682,0.1567155619447031,15795,AlpineGP +192_vineyard,5,0.8912368002572013,0.2222560688178626,21575,AlpineGP +192_vineyard,6,0.8829681580907572,0.2039008023908344,5390,AlpineGP +192_vineyard,7,0.8457254813152156,0.4791450796237647,11964,AlpineGP +192_vineyard,8,0.8352999677222162,0.5832638534915283,6265,AlpineGP +192_vineyard,9,0.8619800115440981,0.5736448841534942,23654,AlpineGP +192_vineyard,10,0.8912822500084658,0.5882196088719382,11284,AlpineGP +519_vinnie,1,0.778064332036953,0.6748797138294724,29802,AlpineGP +519_vinnie,2,0.7670949723082623,0.7011612082750185,22118,AlpineGP +519_vinnie,3,0.7404644581023311,0.7810698905254343,860,AlpineGP +519_vinnie,4,0.7557205123459001,0.7422420153780349,15795,AlpineGP +519_vinnie,5,0.7753683470514572,0.6823505255947264,21575,AlpineGP +519_vinnie,6,0.767887083813139,0.712071518327847,5390,AlpineGP +519_vinnie,7,0.7572075674163686,0.7147332995223332,11964,AlpineGP +519_vinnie,8,0.7542153901449076,0.7587488135901574,6265,AlpineGP +519_vinnie,9,0.7467373888739486,0.7694240036664656,23654,AlpineGP +519_vinnie,10,0.7654651757060934,0.709533343161777,11284,AlpineGP +527_analcatdata_election2000,1,0.999144300466917,0.9992466998829244,29802,AlpineGP +527_analcatdata_election2000,2,0.9990232530092272,0.9999001075222042,22118,AlpineGP +527_analcatdata_election2000,3,0.999827717483624,0.9980663629616184,860,AlpineGP +527_analcatdata_election2000,4,0.998392150905119,0.9884068132502254,15795,AlpineGP +527_analcatdata_election2000,5,0.997711501919464,0.9809805075038192,21575,AlpineGP +527_analcatdata_election2000,6,0.9997656762303844,0.994551192140566,5390,AlpineGP +527_analcatdata_election2000,7,0.9977647930464764,0.995617920490958,11964,AlpineGP +527_analcatdata_election2000,8,0.996513739803008,0.990214315771051,6265,AlpineGP 527_analcatdata_election2000,9,0.9972831883429464,0.9885665336066448,23654,AlpineGP -527_analcatdata_election2000,10,0.9991493426875012,0.9995293788943556,11284,AlpineGP -706_sleuth_case1202,1,0.8708780145975402,0.5029341366457823,29802,AlpineGP -706_sleuth_case1202,2,0.8702861404017129,-0.714353254344172,22118,AlpineGP -706_sleuth_case1202,3,0.8632509557029328,0.4245844756300668,860,AlpineGP -706_sleuth_case1202,4,0.851712602605018,0.7237397749860872,15795,AlpineGP -706_sleuth_case1202,5,0.8429490129179875,0.4131705954737019,21575,AlpineGP -706_sleuth_case1202,6,0.8531468568283646,0.6333342844981529,5390,AlpineGP -706_sleuth_case1202,7,0.8831230710960768,0.1465440420940225,11964,AlpineGP -706_sleuth_case1202,8,0.8651726914712707,0.7335129112746571,6265,AlpineGP -706_sleuth_case1202,9,0.8742773800571287,0.6741149410469701,23654,AlpineGP -706_sleuth_case1202,10,0.8293451383786524,0.6500547037868788,11284,AlpineGP +527_analcatdata_election2000,10,0.9926124817775728,0.9874342630385744,11284,AlpineGP +706_sleuth_case1202,1,0.872780569380189,0.5933428865833363,29802,AlpineGP +706_sleuth_case1202,2,0.8895487175691678,-0.694852576867028,22118,AlpineGP +706_sleuth_case1202,3,0.8890023819978435,0.7564937379435485,860,AlpineGP +706_sleuth_case1202,4,0.8747620877275443,0.6792072113225716,15795,AlpineGP +706_sleuth_case1202,5,0.8732040788366771,0.5581729635294904,21575,AlpineGP +706_sleuth_case1202,6,0.8711702842978113,0.5872482245860058,5390,AlpineGP +706_sleuth_case1202,7,0.9314755325822176,0.3577169711413241,11964,AlpineGP +706_sleuth_case1202,8,0.8670877618235682,0.6895766946647329,6265,AlpineGP +706_sleuth_case1202,9,0.8873340634211149,0.4157069242564825,23654,AlpineGP +706_sleuth_case1202,10,0.8536867097875782,0.4081863608187326,11284,AlpineGP 523_analcatdata_neavote,1,0.9582300334612134,0.938671430359146,29802,AlpineGP -523_analcatdata_neavote,2,0.9638237885042876,0.915209557678332,22118,AlpineGP -523_analcatdata_neavote,3,0.9637411693056734,0.911374339211446,860,AlpineGP -523_analcatdata_neavote,4,0.9536128203238624,0.9484574870437464,15795,AlpineGP -523_analcatdata_neavote,5,0.9528400818699696,0.9555480753370238,21575,AlpineGP +523_analcatdata_neavote,2,0.9638237885042876,0.9152095576795444,22118,AlpineGP +523_analcatdata_neavote,3,0.9637411693056734,0.9113743393154916,860,AlpineGP +523_analcatdata_neavote,4,0.9536128203238624,0.9484574870125132,15795,AlpineGP +523_analcatdata_neavote,5,0.9528400818699696,0.9555480753168316,21575,AlpineGP 523_analcatdata_neavote,6,0.9469477274013106,0.9694100916316684,5390,AlpineGP -523_analcatdata_neavote,7,0.9656529293299564,0.903244449581866,11964,AlpineGP +523_analcatdata_neavote,7,0.9656529293299564,0.9032444495722484,11964,AlpineGP 523_analcatdata_neavote,8,0.9475894369194252,0.8977943324063037,6265,AlpineGP -523_analcatdata_neavote,9,0.9534332217559968,0.951120289477556,23654,AlpineGP -523_analcatdata_neavote,10,0.9466394799975568,0.9749431974558286,11284,AlpineGP -560_bodyfat,1,0.9902453470772457,0.9908636724386908,29802,AlpineGP -560_bodyfat,2,0.9879227731101468,0.997295801972364,22118,AlpineGP -560_bodyfat,3,0.9877795170598372,0.9647598579980914,860,AlpineGP -560_bodyfat,4,0.9933388192517554,0.9798362796285576,15795,AlpineGP -560_bodyfat,5,0.9884082345394184,0.9987709661807213,21575,AlpineGP -560_bodyfat,6,0.9853465377818014,0.9717951838022242,5390,AlpineGP -560_bodyfat,7,0.9898225525524794,0.9904450422634472,11964,AlpineGP -560_bodyfat,8,0.9872747781111896,0.9985958897705474,6265,AlpineGP -560_bodyfat,9,0.99069869274096,0.9972905797968374,23654,AlpineGP -560_bodyfat,10,0.9894082531733412,0.9950128265303042,11284,AlpineGP -1030_ERA,1,0.355946366178447,0.4513087608344157,29802,AlpineGP -1030_ERA,2,0.3729009382846516,0.4120055153836605,22118,AlpineGP -1030_ERA,3,0.3705570168279426,0.4195764121745853,860,AlpineGP -1030_ERA,4,0.3883474245311928,0.3812232618906924,15795,AlpineGP -1030_ERA,5,0.3890787660989249,0.3897748494190886,21575,AlpineGP -1030_ERA,6,0.3979072700512892,0.3652092964276171,5390,AlpineGP -1030_ERA,7,0.3968620953243706,0.3497423692892245,11964,AlpineGP -1030_ERA,8,0.4067885941179011,0.3181438758547575,6265,AlpineGP -1030_ERA,9,0.3923063405726871,0.3400834590518873,23654,AlpineGP -1030_ERA,10,0.405026041012461,0.3124839043779347,11284,AlpineGP -485_analcatdata_vehicle,1,0.9528804885155496,0.1246252991534849,29802,AlpineGP -485_analcatdata_vehicle,2,0.9193082516437284,0.7750424433097705,22118,AlpineGP -485_analcatdata_vehicle,3,0.9435440299115576,0.2868891930248415,860,AlpineGP -485_analcatdata_vehicle,4,0.926441787803414,-0.3336271372677764,15795,AlpineGP -485_analcatdata_vehicle,5,0.9213890151626498,0.643247847518012,21575,AlpineGP -485_analcatdata_vehicle,6,0.9566428252568012,-1.4892996326257548,5390,AlpineGP -485_analcatdata_vehicle,7,0.9272197383492176,0.346071719920654,11964,AlpineGP -485_analcatdata_vehicle,8,0.9510147018118772,0.7884736384082681,6265,AlpineGP -485_analcatdata_vehicle,9,0.9204605428326906,0.7038232035808454,23654,AlpineGP -485_analcatdata_vehicle,10,0.9082079384379624,0.5955875525497412,11284,AlpineGP -505_tecator,1,0.9854549276326078,0.9824194712517073,29802,AlpineGP -505_tecator,2,0.9900042822567748,0.9888052939802175,22118,AlpineGP -505_tecator,3,0.9856845643580412,0.9870546490414864,860,AlpineGP -505_tecator,4,0.9908573959466794,0.9856740255484958,15795,AlpineGP -505_tecator,5,0.9846678490714278,0.9845605298374048,21575,AlpineGP -505_tecator,6,0.9890515021634928,0.9919294021697468,5390,AlpineGP -505_tecator,7,0.9875095460959086,0.99457541711821,11964,AlpineGP -505_tecator,8,0.9909905285623748,0.9863775705197316,6265,AlpineGP -505_tecator,9,0.9849433268275736,0.9838444019508767,23654,AlpineGP -505_tecator,10,0.9850237224054832,0.983372108736585,11284,AlpineGP -556_analcatdata_apnea2,1,0.8900070301834665,0.8595879221942337,29802,AlpineGP -556_analcatdata_apnea2,2,0.8758624694790041,0.88941288735778,22118,AlpineGP -556_analcatdata_apnea2,3,0.8665487309615658,0.8581717391435798,860,AlpineGP -556_analcatdata_apnea2,4,0.8886758268480706,0.893920655303757,15795,AlpineGP -556_analcatdata_apnea2,5,0.916239769262128,0.8339633597457752,21575,AlpineGP -556_analcatdata_apnea2,6,0.867878903871961,0.9060204818046558,5390,AlpineGP -556_analcatdata_apnea2,7,0.895186942418516,0.8318367154488906,11964,AlpineGP -556_analcatdata_apnea2,8,0.8825170446454766,0.8747090101308928,6265,AlpineGP -556_analcatdata_apnea2,9,0.8735327174323235,0.8894505601989673,23654,AlpineGP -556_analcatdata_apnea2,10,0.8891486534240983,0.794500452106343,11284,AlpineGP -690_visualizing_galaxy,1,0.965792423816376,0.9692267789680068,29802,AlpineGP -690_visualizing_galaxy,2,0.968430479831604,0.9513042187460472,22118,AlpineGP -690_visualizing_galaxy,3,0.9723687540923116,0.9593230554018062,860,AlpineGP -690_visualizing_galaxy,4,0.9726620914118438,0.9481456106753172,15795,AlpineGP -690_visualizing_galaxy,5,0.968541041961348,0.9652952342395552,21575,AlpineGP -690_visualizing_galaxy,6,0.9666392794448734,0.9753510613379904,5390,AlpineGP -690_visualizing_galaxy,7,0.968136612967248,0.972321626761227,11964,AlpineGP -690_visualizing_galaxy,8,0.9698466434731324,0.962978152969388,6265,AlpineGP -690_visualizing_galaxy,9,0.9723399319001488,0.9621859070430976,23654,AlpineGP -690_visualizing_galaxy,10,0.9730768455209552,0.9679069669256528,11284,AlpineGP -663_rabe_266,1,0.9963318792951192,0.9944571659647574,29802,AlpineGP -663_rabe_266,2,0.9960449686025732,0.9951265004058344,22118,AlpineGP -663_rabe_266,3,0.9966286261213916,0.9951027571311136,860,AlpineGP -663_rabe_266,4,0.9980283604497546,0.9972051185877722,15795,AlpineGP -663_rabe_266,5,0.9958296721669572,0.993203259503584,21575,AlpineGP -663_rabe_266,6,0.9956347362733132,0.9927044269697708,5390,AlpineGP -663_rabe_266,7,0.9957878590760464,0.9951744042206476,11964,AlpineGP -663_rabe_266,8,0.9966844579109548,0.9943710863118348,6265,AlpineGP -663_rabe_266,9,0.9963026569250134,0.9961567400987226,23654,AlpineGP -663_rabe_266,10,0.996311236079154,0.9959438870293856,11284,AlpineGP -557_analcatdata_apnea1,1,0.8784986043605206,0.8935157686701525,29802,AlpineGP -557_analcatdata_apnea1,2,0.8805194148319184,0.8854764949913403,22118,AlpineGP -557_analcatdata_apnea1,3,0.8946427063893206,0.9266201317649982,860,AlpineGP -557_analcatdata_apnea1,4,0.8947701057761045,0.912149951414244,15795,AlpineGP -557_analcatdata_apnea1,5,0.9005877753632446,0.8576382929684554,21575,AlpineGP -557_analcatdata_apnea1,6,0.8807628306155243,0.8681637272730844,5390,AlpineGP -557_analcatdata_apnea1,7,0.8856828797455032,0.911666482712262,11964,AlpineGP -557_analcatdata_apnea1,8,0.9026977882337626,0.8330659053504004,6265,AlpineGP -557_analcatdata_apnea1,9,0.9039998899807844,0.8065397993549838,23654,AlpineGP -557_analcatdata_apnea1,10,0.8767152319664884,0.9193210372003124,11284,AlpineGP -197_cpu_act,1,0.938793489951424,0.945110632216004,29802,AlpineGP -197_cpu_act,2,0.933363961522426,0.9290571513295528,22118,AlpineGP -197_cpu_act,3,0.9530034270392485,0.9503027779498951,860,AlpineGP -197_cpu_act,4,0.947040177521454,0.9451628075750368,15795,AlpineGP -197_cpu_act,5,0.9427855190899836,0.9461699935502532,21575,AlpineGP -197_cpu_act,6,0.9471825707278558,0.950616669956796,5390,AlpineGP -197_cpu_act,7,0.9564062751447068,0.9520205277468108,11964,AlpineGP -197_cpu_act,8,0.9540432896569564,0.951202869992568,6265,AlpineGP -197_cpu_act,9,0.938287719773428,0.9361704125648528,23654,AlpineGP -197_cpu_act,10,0.9374270754243184,0.92509025355858,11284,AlpineGP -665_sleuth_case2002,1,0.5386482882378207,0.2459387636578883,29802,AlpineGP -665_sleuth_case2002,2,0.5282785192646382,0.2397042172423517,22118,AlpineGP -665_sleuth_case2002,3,0.5580656642185025,0.1046366342143381,860,AlpineGP -665_sleuth_case2002,4,0.549546366604154,0.194423352568252,15795,AlpineGP -665_sleuth_case2002,5,0.5216823748456061,0.4389290347205877,21575,AlpineGP -665_sleuth_case2002,6,0.5734923225583336,-0.0808571564408098,5390,AlpineGP -665_sleuth_case2002,7,0.5054522322359531,0.306736208995495,11964,AlpineGP -665_sleuth_case2002,8,0.4895210515104428,0.3717971389008123,6265,AlpineGP -665_sleuth_case2002,9,0.5709392917639837,0.3308981232768798,23654,AlpineGP -665_sleuth_case2002,10,0.5540077609268654,0.2694416446480783,11284,AlpineGP -210_cloud,1,0.9247636146137266,0.6455604410596569,29802,AlpineGP -210_cloud,2,0.934380853356702,0.8770082489331003,22118,AlpineGP -210_cloud,3,0.9311210811645868,0.7127490256169635,860,AlpineGP -210_cloud,4,0.934936620286743,0.9362826105251004,15795,AlpineGP -210_cloud,5,0.948292013480104,0.908272207594744,21575,AlpineGP -210_cloud,6,0.942302095803146,0.7227604098373297,5390,AlpineGP -210_cloud,7,0.9505642703201788,0.8504613720142944,11964,AlpineGP -210_cloud,8,0.9360740181207674,0.6395164814517422,6265,AlpineGP -210_cloud,9,0.9298362563595642,0.8894142340721412,23654,AlpineGP -210_cloud,10,0.9517720645617664,0.4347534454954694,11284,AlpineGP -1089_USCrime,1,0.9484139509443628,0.8493853523348904,29802,AlpineGP -1089_USCrime,2,0.9572058470347506,0.81935543633297,22118,AlpineGP -1089_USCrime,3,0.9520102598873852,0.4540434382838121,860,AlpineGP -1089_USCrime,4,0.9513053045283996,0.74447123868373,15795,AlpineGP -1089_USCrime,5,0.9418779030982184,0.7130163733303321,21575,AlpineGP -1089_USCrime,6,0.943966044212423,0.8377965052540635,5390,AlpineGP -1089_USCrime,7,0.9583551955947374,0.7145147539996739,11964,AlpineGP -1089_USCrime,8,0.9563572028892028,0.7684135499103156,6265,AlpineGP -1089_USCrime,9,0.9507182479552772,0.66950062207029,23654,AlpineGP -1089_USCrime,10,0.9449242331234132,0.8216779253190278,11284,AlpineGP -230_machine_cpu,1,0.9480533853347944,0.8992187802149694,29802,AlpineGP -230_machine_cpu,2,0.9522645085936048,0.9225522264452864,22118,AlpineGP -230_machine_cpu,3,0.9470518003228452,0.8981659347479933,860,AlpineGP -230_machine_cpu,4,0.95506023413372,0.8888056875723519,15795,AlpineGP -230_machine_cpu,5,0.9385079910061536,0.7774941119015837,21575,AlpineGP -230_machine_cpu,6,0.938020894607794,0.90727103345548,5390,AlpineGP -230_machine_cpu,7,0.9684008083925838,0.0132143468904539,11964,AlpineGP -230_machine_cpu,8,0.9450825586427528,0.7851694917284495,6265,AlpineGP -230_machine_cpu,9,0.95029831052323,0.8705442088244373,23654,AlpineGP -230_machine_cpu,10,0.949887276067157,0.8269905463822742,11284,AlpineGP -228_elusage,1,0.9135086327699564,0.3750456939381211,29802,AlpineGP -228_elusage,2,0.8970128288923932,0.3510377821339203,22118,AlpineGP -228_elusage,3,0.8886296814417715,0.7196904542426055,860,AlpineGP -228_elusage,4,0.8704194422338686,0.6653540966273213,15795,AlpineGP -228_elusage,5,0.8917017093876557,0.8306453559482805,21575,AlpineGP +523_analcatdata_neavote,9,0.9534332217559968,0.9511202894395492,23654,AlpineGP +523_analcatdata_neavote,10,0.9466394799975568,0.9749431974411712,11284,AlpineGP +560_bodyfat,1,0.9892294726831696,0.9905519230178036,29802,AlpineGP +560_bodyfat,2,0.989569996973957,-1.0023905305340368,22118,AlpineGP +560_bodyfat,3,0.9890353622478568,0.0236264505833307,860,AlpineGP +560_bodyfat,4,0.9952749801955,0.9797754559364098,15795,AlpineGP +560_bodyfat,5,0.9895968122812512,0.9468185929193084,21575,AlpineGP +560_bodyfat,6,0.9893111554082744,0.9932278402428694,5390,AlpineGP +560_bodyfat,7,0.9919130376162124,0.9615800934213272,11964,AlpineGP +560_bodyfat,8,0.987453072407826,0.9974674810236528,6265,AlpineGP +560_bodyfat,9,0.9896179946304045,0.967619761736427,23654,AlpineGP +560_bodyfat,10,0.9885884513447456,0.0961690570399333,11284,AlpineGP +1030_ERA,1,0.3682264585639765,0.4500037018895125,29802,AlpineGP +1030_ERA,2,0.3703982945293156,0.4484664334429165,22118,AlpineGP +1030_ERA,3,0.3694747321551356,0.4318374262567399,860,AlpineGP +1030_ERA,4,0.3890483637162687,0.3854357692512782,15795,AlpineGP +1030_ERA,5,0.384442345637877,0.3924311742585977,21575,AlpineGP +1030_ERA,6,0.3980002891384971,0.3775160066188339,5390,AlpineGP +1030_ERA,7,0.3955378429725021,0.370361659063548,11964,AlpineGP +1030_ERA,8,0.4050065063730373,0.3288319726806165,6265,AlpineGP +1030_ERA,9,0.3989662123825213,0.3501745992492785,23654,AlpineGP +1030_ERA,10,0.3986027031981597,0.3161933285379288,11284,AlpineGP +485_analcatdata_vehicle,1,0.9634778391300124,-0.1193519601801076,29802,AlpineGP +485_analcatdata_vehicle,2,0.9166635964968202,0.738115759107596,22118,AlpineGP +485_analcatdata_vehicle,3,0.9581223359446156,0.2249038831186386,860,AlpineGP +485_analcatdata_vehicle,4,0.9434113587122004,-0.0900263120546409,15795,AlpineGP +485_analcatdata_vehicle,5,0.9344680302958008,0.4562061291302909,21575,AlpineGP +485_analcatdata_vehicle,6,0.966115635104376,-1.54536586208844,5390,AlpineGP +485_analcatdata_vehicle,7,0.9243469464676076,-0.1349344390199267,11964,AlpineGP +485_analcatdata_vehicle,8,0.96242260650482,0.3837745945321393,6265,AlpineGP +485_analcatdata_vehicle,9,0.907431139448404,0.7667116771104026,23654,AlpineGP +485_analcatdata_vehicle,10,0.9531144867120844,0.944325808202833,11284,AlpineGP +505_tecator,1,0.9927454620736508,0.989305387998754,29802,AlpineGP +505_tecator,2,0.987932932834178,0.9875747440972732,22118,AlpineGP +505_tecator,3,0.9880041142690136,0.99084316226501,860,AlpineGP +505_tecator,4,0.9865817685899064,0.9733492090347696,15795,AlpineGP +505_tecator,5,0.9813563895788476,0.9818628960287504,21575,AlpineGP +505_tecator,6,0.987308704263958,0.9830617334029936,5390,AlpineGP +505_tecator,7,0.9871679365587726,0.9925495243949886,11964,AlpineGP +505_tecator,8,0.9924076490023038,0.9880707404120648,6265,AlpineGP +505_tecator,9,0.9823654866433312,0.9796968419455596,23654,AlpineGP +505_tecator,10,0.9889690723394612,0.9900780662217898,11284,AlpineGP +556_analcatdata_apnea2,1,0.9061466197187624,0.8446457521446504,29802,AlpineGP +556_analcatdata_apnea2,2,0.9016054699918508,0.8766708937802739,22118,AlpineGP +556_analcatdata_apnea2,3,0.8843539559161282,0.8775424432511525,860,AlpineGP +556_analcatdata_apnea2,4,0.8952595763020793,0.8625884790785422,15795,AlpineGP +556_analcatdata_apnea2,5,0.9136214810272898,0.8302649665981329,21575,AlpineGP +556_analcatdata_apnea2,6,0.8738571786519445,0.8973438896474054,5390,AlpineGP +556_analcatdata_apnea2,7,0.9354432219431368,0.8626424210401881,11964,AlpineGP +556_analcatdata_apnea2,8,0.8925069835747588,0.87321111585474,6265,AlpineGP +556_analcatdata_apnea2,9,0.9081902622651676,0.8788416431394969,23654,AlpineGP +556_analcatdata_apnea2,10,0.8992562268602847,0.8318869294207427,11284,AlpineGP +690_visualizing_galaxy,1,0.9686209956213532,0.9670061231589104,29802,AlpineGP +690_visualizing_galaxy,2,0.9683175555375249,0.962592466597744,22118,AlpineGP +690_visualizing_galaxy,3,0.9715457930361828,0.9605940090530396,860,AlpineGP +690_visualizing_galaxy,4,0.9719111294690328,0.9435767220139748,15795,AlpineGP +690_visualizing_galaxy,5,0.9730243298381768,0.9703149463405524,21575,AlpineGP +690_visualizing_galaxy,6,0.9617289298869816,0.96884985018978,5390,AlpineGP +690_visualizing_galaxy,7,0.9666790041592244,0.9700733847808086,11964,AlpineGP +690_visualizing_galaxy,8,0.9707683183934964,0.9625978775015374,6265,AlpineGP +690_visualizing_galaxy,9,0.9721491607808468,0.9662219340918892,23654,AlpineGP +690_visualizing_galaxy,10,0.9700803657416998,0.9680733344575124,11284,AlpineGP +663_rabe_266,1,0.9963318792951192,0.994457165938687,29802,AlpineGP +663_rabe_266,2,0.9960449686025732,0.9951265004153556,22118,AlpineGP +663_rabe_266,3,0.9963055683249298,0.9935596386998714,860,AlpineGP +663_rabe_266,4,0.9957290303285984,0.9973499220193376,15795,AlpineGP +663_rabe_266,5,0.9958296721669572,0.9932032594851476,21575,AlpineGP +663_rabe_266,6,0.9968077976878242,0.9936874782858696,5390,AlpineGP +663_rabe_266,7,0.9957878590679023,0.9951743564833472,11964,AlpineGP +663_rabe_266,8,0.9966844579109548,0.994371086309436,6265,AlpineGP +663_rabe_266,9,0.9963026317788026,0.9961527987423284,23654,AlpineGP +663_rabe_266,10,0.996123180610922,0.996643160687436,11284,AlpineGP +557_analcatdata_apnea1,1,0.8897175339043949,0.9165943930743574,29802,AlpineGP +557_analcatdata_apnea1,2,0.9021908051446248,0.8394505776216514,22118,AlpineGP +557_analcatdata_apnea1,3,0.8979506523943239,0.872037388692701,860,AlpineGP +557_analcatdata_apnea1,4,0.8968447450294665,0.8891633727055719,15795,AlpineGP +557_analcatdata_apnea1,5,0.9135776932586674,0.820866000875306,21575,AlpineGP +557_analcatdata_apnea1,6,0.8859520249350878,0.8400029545273097,5390,AlpineGP +557_analcatdata_apnea1,7,0.9014973550121034,0.9109113022959324,11964,AlpineGP +557_analcatdata_apnea1,8,0.913040248732336,0.7964036194623167,6265,AlpineGP +557_analcatdata_apnea1,9,0.8825839228597379,0.8044546043084179,23654,AlpineGP +557_analcatdata_apnea1,10,0.9153874936470442,0.9104344086531544,11284,AlpineGP +197_cpu_act,1,0.9457665877454394,0.943674907843432,29802,AlpineGP +197_cpu_act,2,0.9421764711644052,0.9358882728244856,22118,AlpineGP +197_cpu_act,3,0.946313954464179,0.947113028610004,860,AlpineGP +197_cpu_act,4,0.8496781432845921,0.8582846660255161,15795,AlpineGP +197_cpu_act,5,0.9517077250675728,0.953044280558118,21575,AlpineGP +197_cpu_act,6,0.9477005153417888,0.9502016383212368,5390,AlpineGP +197_cpu_act,7,0.9486233817092022,0.938278739172296,11964,AlpineGP +197_cpu_act,8,0.9597354596296004,0.9555905049338824,6265,AlpineGP +197_cpu_act,9,0.9648960107234624,0.9634820840877398,23654,AlpineGP +197_cpu_act,10,0.9577340006506452,0.95184836883823,11284,AlpineGP +665_sleuth_case2002,1,0.5492211374383427,0.3194138282629678,29802,AlpineGP +665_sleuth_case2002,2,0.546745989043543,0.2100859216823833,22118,AlpineGP +665_sleuth_case2002,3,0.5762720332288613,0.1394633703813321,860,AlpineGP +665_sleuth_case2002,4,0.5610368593566109,0.1341108952068708,15795,AlpineGP +665_sleuth_case2002,5,0.5492452364910855,0.0683331185031153,21575,AlpineGP +665_sleuth_case2002,6,0.6413743558551446,-0.377276578246055,5390,AlpineGP +665_sleuth_case2002,7,0.5737449858522283,0.2239031799500881,11964,AlpineGP +665_sleuth_case2002,8,0.5274098980367952,0.4262864029398262,6265,AlpineGP +665_sleuth_case2002,9,0.5778163802903606,0.2567767502630014,23654,AlpineGP +665_sleuth_case2002,10,0.5842290081086378,0.2428078580770178,11284,AlpineGP +210_cloud,1,0.9362762610302084,0.8691901588735473,29802,AlpineGP +210_cloud,2,0.932913153743925,0.8880995514125988,22118,AlpineGP +210_cloud,3,0.934292343166372,0.7491585711117132,860,AlpineGP +210_cloud,4,0.9439602033234796,0.8988071127272331,15795,AlpineGP +210_cloud,5,0.95307865726435,0.8442892691784296,21575,AlpineGP +210_cloud,6,0.9451596735432772,0.8376976230836759,5390,AlpineGP +210_cloud,7,0.9474825821384996,0.8673292587973871,11964,AlpineGP +210_cloud,8,0.9364317165249528,0.5617672429308037,6265,AlpineGP +210_cloud,9,0.926513516016614,0.945632888013806,23654,AlpineGP +210_cloud,10,0.9565081594889666,0.3931585863661005,11284,AlpineGP +1089_USCrime,1,0.969037608964617,0.7483367280948074,29802,AlpineGP +1089_USCrime,2,0.968660190689732,0.7314333167731497,22118,AlpineGP +1089_USCrime,3,0.962827999521206,0.496644706237765,860,AlpineGP +1089_USCrime,4,0.9661516464623952,0.7308406230327349,15795,AlpineGP +1089_USCrime,5,0.9279961242910364,0.9028921075202148,21575,AlpineGP +1089_USCrime,6,0.9620435535455484,0.8035834139223468,5390,AlpineGP +1089_USCrime,7,0.9721744001998276,0.4316886473558669,11964,AlpineGP +1089_USCrime,8,0.966795218806302,0.776848816174635,6265,AlpineGP +1089_USCrime,9,0.9679690797919094,0.7503110908067083,23654,AlpineGP +1089_USCrime,10,0.9573742889963756,0.7093714233399555,11284,AlpineGP +230_machine_cpu,1,0.9534204477578466,0.9425716802553034,29802,AlpineGP +230_machine_cpu,2,0.9534857556685918,0.9205756992252286,22118,AlpineGP +230_machine_cpu,3,0.964190935237586,0.8914275532122926,860,AlpineGP +230_machine_cpu,4,0.9570109315062694,0.9189267900502148,15795,AlpineGP +230_machine_cpu,5,0.9376701255761472,0.6273145810017067,21575,AlpineGP +230_machine_cpu,6,0.9598033161097972,-0.7349430092852289,5390,AlpineGP +230_machine_cpu,7,0.9678673237923466,0.6179567298323092,11964,AlpineGP +230_machine_cpu,8,0.9519821921693108,-0.0586719382964624,6265,AlpineGP +230_machine_cpu,9,0.9554845489984238,0.9365405947760388,23654,AlpineGP +230_machine_cpu,10,0.9570806497148374,0.5510813323223882,11284,AlpineGP +228_elusage,1,0.9311419726673622,0.2350684821794946,29802,AlpineGP +228_elusage,2,0.9025813528838752,0.6364760689448354,22118,AlpineGP +228_elusage,3,0.9110242745696652,0.5792736949769591,860,AlpineGP +228_elusage,4,0.9053214518223174,0.7273002285110786,15795,AlpineGP +228_elusage,5,0.8985627851722835,0.8097852281164157,21575,AlpineGP 228_elusage,6,0.8936773561445938,0.7160706477254897,5390,AlpineGP -228_elusage,7,0.8900002753872707,0.8345048286306371,11964,AlpineGP -228_elusage,8,0.9003394088878686,0.7726511311327471,6265,AlpineGP -228_elusage,9,0.9209220925229497,0.7121838299498349,23654,AlpineGP -228_elusage,10,0.9349071079768176,0.236846671035417,11284,AlpineGP +228_elusage,7,0.8997277559837891,0.7979770642133817,11964,AlpineGP +228_elusage,8,0.9030298795272532,0.7540088410298927,6265,AlpineGP +228_elusage,9,0.9150788651356546,0.7846289337518353,23654,AlpineGP +228_elusage,10,0.9403025397412128,0.201718727319101,11284,AlpineGP diff --git a/bench/results/process_results.ipynb b/bench/results/process_results.ipynb index 1c1f5ab..49aaeac 100644 --- a/bench/results/process_results.ipynb +++ b/bench/results/process_results.ipynb @@ -2,7 +2,7 @@ "cells": [ { "cell_type": "code", - "execution_count": 816, + "execution_count": 74, "metadata": {}, "outputs": [], "source": [ @@ -14,7 +14,7 @@ }, { "cell_type": "code", - "execution_count": 817, + "execution_count": 75, "metadata": {}, "outputs": [ { @@ -35,7 +35,7 @@ }, { "cell_type": "code", - "execution_count": 818, + "execution_count": 76, "metadata": {}, "outputs": [], "source": [ @@ -59,7 +59,7 @@ }, { "cell_type": "code", - "execution_count": 819, + "execution_count": 77, "metadata": {}, "outputs": [ { @@ -67,17 +67,17 @@ "output_type": "stream", "text": [ " algorithm problem r2_test\n", - "0 AlpineGP 678_visualizing_environmental -0.234804\n", - "1 AlpineGP 678_visualizing_environmental 0.384211\n", - "2 AlpineGP 678_visualizing_environmental 0.200520\n", - "3 AlpineGP 678_visualizing_environmental 0.121942\n", - "4 AlpineGP 678_visualizing_environmental -0.371221\n", + "0 AlpineGP 678_visualizing_environmental -0.269128\n", + "1 AlpineGP 678_visualizing_environmental 0.297113\n", + "2 AlpineGP 678_visualizing_environmental 0.230136\n", + "3 AlpineGP 678_visualizing_environmental 0.034309\n", + "4 AlpineGP 678_visualizing_environmental 0.338141\n", ".. ... ... ...\n", "5 AlpineGP 228_elusage 0.716071\n", - "6 AlpineGP 228_elusage 0.834505\n", - "7 AlpineGP 228_elusage 0.772651\n", - "8 AlpineGP 228_elusage 0.712184\n", - "9 AlpineGP 228_elusage 0.236847\n", + "6 AlpineGP 228_elusage 0.797977\n", + "7 AlpineGP 228_elusage 0.754009\n", + "8 AlpineGP 228_elusage 0.784629\n", + "9 AlpineGP 228_elusage 0.201719\n", "\n", "[380 rows x 3 columns]\n" ] @@ -89,7 +89,7 @@ }, { "cell_type": "code", - "execution_count": 820, + "execution_count": 78, "metadata": {}, "outputs": [], "source": [ @@ -100,7 +100,7 @@ }, { "cell_type": "code", - "execution_count": 821, + "execution_count": 79, "metadata": {}, "outputs": [ { @@ -108,44 +108,44 @@ "output_type": "stream", "text": [ " dataset r2_train r2_zero_test r2_difference\n", - "0 687_sleuth_ex1605 0.849301 -0.074039 0.923340\n", - "1 542_pollution 0.853563 0.279329 0.574234\n", - "2 192_vineyard 0.863953 0.380180 0.483773\n", - "3 485_analcatdata_vehicle 0.926831 0.470830 0.456001\n", - "4 678_visualizing_environmental 0.544766 0.193514 0.351252\n", - "5 706_sleuth_case1202 0.864212 0.568134 0.296078\n", - "6 665_sleuth_case2002 0.544097 0.257690 0.286407\n", - "7 659_sleuth_ex1714 0.953942 0.702428 0.251514\n", - "8 1089_USCrime 0.951012 0.756442 0.194569\n", - "9 228_elusage 0.895345 0.714127 0.181218\n", - "10 210_cloud 0.935505 0.786611 0.148894\n", - "11 522_pm10 0.370838 0.233109 0.137729\n", - "12 547_no2 0.594315 0.502983 0.091332\n", - "13 1028_SWD 0.425505 0.343532 0.081973\n", - "14 1096_FacultySalaries 0.967721 0.894004 0.073717\n", - "15 230_machine_cpu 0.948970 0.879675 0.069295\n", - "16 695_chatfield_4 0.897843 0.830825 0.067018\n", - "17 229_pwLinear 0.869644 0.811717 0.057926\n", - "18 712_chscase_geyser1 0.799420 0.745605 0.053815\n", - "19 519_vinnie 0.760391 0.719948 0.040443\n", - "20 561_cpu 0.992758 0.967161 0.025596\n", - "21 1029_LEV 0.580859 0.560547 0.020312\n", - "22 556_analcatdata_apnea2 0.885596 0.867148 0.018448\n", - "23 1030_ERA 0.390693 0.373216 0.017476\n", - "24 529_pollen 0.793444 0.782358 0.011086\n", - "25 523_analcatdata_neavote 0.953523 0.943564 0.009959\n", - "26 1027_ESL 0.869154 0.860647 0.008507\n", - "27 666_rmftsa_ladata 0.680417 0.672306 0.008110\n", - "28 503_wind 0.750899 0.745787 0.005112\n", - "29 690_visualizing_galaxy 0.969194 0.964137 0.005057\n", - "30 227_cpu_small 0.947961 0.945094 0.002867\n", - "31 225_puma8NH 0.670164 0.667771 0.002393\n", - "32 663_rabe_266 0.996307 0.995115 0.001192\n", - "33 557_analcatdata_apnea1 0.890163 0.889496 0.000667\n", - "34 505_tecator 0.986597 0.986026 0.000571\n", - "35 527_analcatdata_election2000 0.999136 0.999273 -0.000137\n", - "36 197_cpu_act 0.944913 0.945666 -0.000754\n", - "37 560_bodyfat 0.988908 0.992938 -0.004030\n" + "0 687_sleuth_ex1605 0.859387 0.151750 0.707638\n", + "1 192_vineyard 0.876894 0.213078 0.663815\n", + "2 485_analcatdata_vehicle 0.948263 0.304339 0.643924\n", + "3 542_pollution 0.871968 0.252399 0.619569\n", + "4 678_visualizing_environmental 0.572854 0.111752 0.461102\n", + "5 659_sleuth_ex1714 0.961291 0.610788 0.350504\n", + "6 665_sleuth_case2002 0.567391 0.216995 0.350396\n", + "7 706_sleuth_case1202 0.873983 0.572711 0.301272\n", + "8 1089_USCrime 0.966473 0.739885 0.226588\n", + "9 230_machine_cpu 0.956248 0.759371 0.196877\n", + "10 228_elusage 0.904176 0.721685 0.182490\n", + "11 547_no2 0.604419 0.446190 0.158229\n", + "12 522_pm10 0.382137 0.240674 0.141463\n", + "13 1096_FacultySalaries 0.977285 0.836315 0.140971\n", + "14 1028_SWD 0.437824 0.339576 0.098248\n", + "15 210_cloud 0.940196 0.855809 0.084387\n", + "16 712_chscase_geyser1 0.801973 0.749685 0.052288\n", + "17 519_vinnie 0.761336 0.713402 0.047934\n", + "18 557_analcatdata_apnea1 0.899724 0.856020 0.043704\n", + "19 695_chatfield_4 0.895922 0.854139 0.041783\n", + "20 229_pwLinear 0.898988 0.864017 0.034971\n", + "21 556_analcatdata_apnea2 0.900431 0.867927 0.032504\n", + "22 1029_LEV 0.583067 0.553204 0.029863\n", + "23 666_rmftsa_ladata 0.691882 0.663484 0.028399\n", + "24 560_bodyfat 0.989441 0.964600 0.024841\n", + "25 1027_ESL 0.872930 0.859878 0.013052\n", + "26 503_wind 0.765635 0.752685 0.012950\n", + "27 561_cpu 0.992763 0.981073 0.011690\n", + "28 529_pollen 0.793444 0.782358 0.011086\n", + "29 1030_ERA 0.392293 0.381476 0.010817\n", + "30 523_analcatdata_neavote 0.953523 0.943564 0.009959\n", + "31 527_analcatdata_election2000 0.998078 0.992383 0.005696\n", + "32 690_visualizing_galaxy 0.970424 0.966614 0.003810\n", + "33 663_rabe_266 0.996213 0.994792 0.001421\n", + "34 225_puma8NH 0.676930 0.676840 0.000090\n", + "35 505_tecator 0.987621 0.987823 -0.000202\n", + "36 227_cpu_small 0.952742 0.953020 -0.000278\n", + "37 197_cpu_act 0.948162 0.948657 -0.000495\n" ] } ], @@ -163,7 +163,7 @@ }, { "cell_type": "code", - "execution_count": 822, + "execution_count": 80, "metadata": {}, "outputs": [ { @@ -171,44 +171,44 @@ "output_type": "stream", "text": [ " dataset mean median std\n", - "0 527_analcatdata_election2000 0.997727 0.999273 0.003575\n", - "1 663_rabe_266 0.994945 0.995115 0.001346\n", - "2 560_bodyfat 0.988467 0.992938 0.012163\n", - "3 505_tecator 0.986861 0.986026 0.003901\n", - "4 561_cpu 0.957349 0.967161 0.033006\n", - "5 690_visualizing_galaxy 0.963404 0.964137 0.008677\n", - "6 197_cpu_act 0.943090 0.945666 0.009666\n", - "7 227_cpu_small 0.946096 0.945094 0.008128\n", + "0 663_rabe_266 0.994973 0.994792 0.001387\n", + "1 527_analcatdata_election2000 0.992298 0.992383 0.006152\n", + "2 505_tecator 0.985639 0.987823 0.006008\n", + "3 561_cpu 0.976692 0.981073 0.015743\n", + "4 690_visualizing_galaxy 0.963990 0.966614 0.007913\n", + "5 560_bodyfat 0.595445 0.964600 0.678973\n", + "6 227_cpu_small 0.949931 0.953020 0.010269\n", + "7 197_cpu_act 0.939741 0.948657 0.029758\n", "8 523_analcatdata_neavote 0.936577 0.943564 0.027836\n", - "9 1096_FacultySalaries 0.662191 0.894004 0.525012\n", - "10 557_analcatdata_apnea1 0.881416 0.889496 0.039704\n", - "11 230_machine_cpu 0.778943 0.879675 0.273846\n", - "12 556_analcatdata_apnea2 0.863157 0.867148 0.034773\n", - "13 1027_ESL 0.858838 0.860647 0.012759\n", - "14 695_chatfield_4 0.827457 0.830825 0.067719\n", - "15 229_pwLinear 0.810944 0.811717 0.045383\n", - "16 210_cloud 0.761678 0.786611 0.159399\n", - "17 529_pollen 0.787219 0.782358 0.011886\n", - "18 1089_USCrime 0.739218 0.756442 0.117112\n", - "19 503_wind 0.747271 0.745787 0.008830\n", - "20 712_chscase_geyser1 0.751443 0.745605 0.054979\n", - "21 519_vinnie 0.728873 0.719948 0.037725\n", - "22 228_elusage 0.621403 0.714127 0.216677\n", - "23 659_sleuth_ex1714 0.562146 0.702428 0.309503\n", - "24 666_rmftsa_ladata 0.679718 0.672306 0.062048\n", - "25 225_puma8NH 0.668540 0.667771 0.012741\n", - "26 706_sleuth_case1202 0.418764 0.568134 0.437420\n", - "27 1029_LEV 0.557169 0.560547 0.033023\n", - "28 547_no2 0.505620 0.502983 0.092075\n", - "29 485_analcatdata_vehicle 0.244083 0.470830 0.702171\n", - "30 192_vineyard 0.381856 0.380180 0.200867\n", - "31 1030_ERA 0.373955 0.373216 0.045362\n", - "32 1028_SWD 0.335559 0.343532 0.055677\n", - "33 542_pollution 0.170091 0.279329 0.254557\n", - "34 665_sleuth_case2002 0.242165 0.257690 0.146767\n", - "35 522_pm10 0.235107 0.233109 0.044548\n", - "36 678_visualizing_environmental 0.060402 0.193514 0.358373\n", - "37 687_sleuth_ex1605 -0.070725 -0.074039 0.372597\n" + "9 556_analcatdata_apnea2 0.863564 0.867927 0.021875\n", + "10 229_pwLinear 0.856554 0.864017 0.029523\n", + "11 1027_ESL 0.859966 0.859878 0.021908\n", + "12 557_analcatdata_apnea1 0.860032 0.856020 0.045725\n", + "13 210_cloud 0.785513 0.855809 0.174546\n", + "14 695_chatfield_4 0.857361 0.854139 0.032336\n", + "15 1096_FacultySalaries 0.607180 0.836315 0.514190\n", + "16 529_pollen 0.787219 0.782358 0.011886\n", + "17 230_machine_cpu 0.561278 0.759371 0.550194\n", + "18 503_wind 0.755600 0.752685 0.010931\n", + "19 712_chscase_geyser1 0.749426 0.749685 0.052773\n", + "20 1089_USCrime 0.708195 0.739885 0.140384\n", + "21 228_elusage 0.624231 0.721685 0.225679\n", + "22 519_vinnie 0.724621 0.713402 0.036459\n", + "23 225_puma8NH 0.673587 0.676840 0.019615\n", + "24 666_rmftsa_ladata 0.678517 0.663484 0.045206\n", + "25 659_sleuth_ex1714 0.577351 0.610788 0.252262\n", + "26 706_sleuth_case1202 0.435080 0.572711 0.418311\n", + "27 1029_LEV 0.557463 0.553204 0.036105\n", + "28 547_no2 0.473821 0.446190 0.102342\n", + "29 1030_ERA 0.385125 0.381476 0.046993\n", + "30 1028_SWD 0.339529 0.339576 0.039479\n", + "31 485_analcatdata_vehicle 0.162436 0.304339 0.714552\n", + "32 542_pollution -0.510228 0.252399 2.126192\n", + "33 522_pm10 0.242843 0.240674 0.052415\n", + "34 665_sleuth_case2002 0.164390 0.216995 0.215279\n", + "35 192_vineyard 0.204839 0.213078 0.382138\n", + "36 687_sleuth_ex1605 0.007894 0.151750 0.394736\n", + "37 678_visualizing_environmental 0.107825 0.111752 0.199401\n" ] } ], @@ -224,7 +224,7 @@ }, { "cell_type": "code", - "execution_count": 823, + "execution_count": 81, "metadata": {}, "outputs": [], "source": [ @@ -234,14 +234,14 @@ }, { "cell_type": "code", - "execution_count": 824, + "execution_count": 82, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ - "Mean test R2 = 0.6552373025589278\n" + "Mean test R2 = 0.6167499427856007\n" ] } ], @@ -251,16 +251,16 @@ }, { "cell_type": "code", - "execution_count": 825, + "execution_count": 83, "metadata": {}, "outputs": [ { "data": { "text/plain": [ - "0.3487136233938222" + "0.5219995371567658" ] }, - "execution_count": 825, + "execution_count": 83, "metadata": {}, "output_type": "execute_result" } @@ -271,14 +271,14 @@ }, { "cell_type": "code", - "execution_count": 826, + "execution_count": 84, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ - "Median test R2 = 0.7683394391975586\n" + "Median test R2 = 0.7539631702366081\n" ] } ], @@ -288,53 +288,29 @@ }, { "cell_type": "code", - "execution_count": 828, + "execution_count": 85, "metadata": {}, "outputs": [ { - "name": "stdout", - "output_type": "stream", - "text": [ - "| dataset | mean | median | std |\n", - "|:------------------------------|-----------:|-----------:|-----------:|\n", - "| 527_analcatdata_election2000 | 0.997727 | 0.999273 | 0.00357541 |\n", - "| 663_rabe_266 | 0.994945 | 0.995115 | 0.00134602 |\n", - "| 560_bodyfat | 0.988467 | 0.992938 | 0.0121634 |\n", - "| 505_tecator | 0.986861 | 0.986026 | 0.0039009 |\n", - "| 561_cpu | 0.957349 | 0.967161 | 0.0330056 |\n", - "| 690_visualizing_galaxy | 0.963404 | 0.964137 | 0.00867664 |\n", - "| 197_cpu_act | 0.94309 | 0.945666 | 0.00966613 |\n", - "| 227_cpu_small | 0.946096 | 0.945094 | 0.00812824 |\n", - "| 523_analcatdata_neavote | 0.936577 | 0.943564 | 0.0278365 |\n", - "| 1096_FacultySalaries | 0.662191 | 0.894004 | 0.525012 |\n", - "| 557_analcatdata_apnea1 | 0.881416 | 0.889496 | 0.0397044 |\n", - "| 230_machine_cpu | 0.778943 | 0.879675 | 0.273846 |\n", - "| 556_analcatdata_apnea2 | 0.863157 | 0.867148 | 0.0347729 |\n", - "| 1027_ESL | 0.858838 | 0.860647 | 0.0127587 |\n", - "| 695_chatfield_4 | 0.827457 | 0.830825 | 0.0677194 |\n", - "| 229_pwLinear | 0.810944 | 0.811717 | 0.0453826 |\n", - "| 210_cloud | 0.761678 | 0.786611 | 0.159399 |\n", - "| 529_pollen | 0.787219 | 0.782358 | 0.0118861 |\n", - "| 1089_USCrime | 0.739218 | 0.756442 | 0.117112 |\n", - "| 503_wind | 0.747271 | 0.745787 | 0.0088297 |\n", - "| 712_chscase_geyser1 | 0.751443 | 0.745605 | 0.0549794 |\n", - "| 519_vinnie | 0.728873 | 0.719948 | 0.0377254 |\n", - "| 228_elusage | 0.621403 | 0.714127 | 0.216677 |\n", - "| 659_sleuth_ex1714 | 0.562146 | 0.702428 | 0.309503 |\n", - "| 666_rmftsa_ladata | 0.679718 | 0.672306 | 0.0620477 |\n", - "| 225_puma8NH | 0.66854 | 0.667771 | 0.0127414 |\n", - "| 706_sleuth_case1202 | 0.418764 | 0.568134 | 0.43742 |\n", - "| 1029_LEV | 0.557169 | 0.560547 | 0.0330229 |\n", - "| 547_no2 | 0.50562 | 0.502983 | 0.0920748 |\n", - "| 485_analcatdata_vehicle | 0.244083 | 0.47083 | 0.702171 |\n", - "| 192_vineyard | 0.381856 | 0.38018 | 0.200867 |\n", - "| 1030_ERA | 0.373955 | 0.373216 | 0.0453621 |\n", - "| 1028_SWD | 0.335559 | 0.343532 | 0.0556771 |\n", - "| 542_pollution | 0.170091 | 0.279329 | 0.254557 |\n", - "| 665_sleuth_case2002 | 0.242165 | 0.25769 | 0.146767 |\n", - "| 522_pm10 | 0.235107 | 0.233109 | 0.0445476 |\n", - "| 678_visualizing_environmental | 0.0604016 | 0.193514 | 0.358373 |\n", - "| 687_sleuth_ex1605 | -0.0707247 | -0.0740387 | 0.372597 |\n" + "ename": "ImportError", + "evalue": "Missing optional dependency 'tabulate'. Use pip or conda to install tabulate.", + "output_type": "error", + "traceback": [ + "\u001b[31m---------------------------------------------------------------------------\u001b[39m", + "\u001b[31mModuleNotFoundError\u001b[39m Traceback (most recent call last)", + "\u001b[36mFile \u001b[39m\u001b[32m~/miniforge3/envs/alpine/lib/python3.12/site-packages/pandas/compat/_optional.py:135\u001b[39m, in \u001b[36mimport_optional_dependency\u001b[39m\u001b[34m(name, extra, errors, min_version)\u001b[39m\n\u001b[32m 134\u001b[39m \u001b[38;5;28;01mtry\u001b[39;00m:\n\u001b[32m--> \u001b[39m\u001b[32m135\u001b[39m module = \u001b[43mimportlib\u001b[49m\u001b[43m.\u001b[49m\u001b[43mimport_module\u001b[49m\u001b[43m(\u001b[49m\u001b[43mname\u001b[49m\u001b[43m)\u001b[49m\n\u001b[32m 136\u001b[39m \u001b[38;5;28;01mexcept\u001b[39;00m \u001b[38;5;167;01mImportError\u001b[39;00m:\n", + "\u001b[36mFile \u001b[39m\u001b[32m~/miniforge3/envs/alpine/lib/python3.12/importlib/__init__.py:90\u001b[39m, in \u001b[36mimport_module\u001b[39m\u001b[34m(name, package)\u001b[39m\n\u001b[32m 89\u001b[39m level += \u001b[32m1\u001b[39m\n\u001b[32m---> \u001b[39m\u001b[32m90\u001b[39m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43m_bootstrap\u001b[49m\u001b[43m.\u001b[49m\u001b[43m_gcd_import\u001b[49m\u001b[43m(\u001b[49m\u001b[43mname\u001b[49m\u001b[43m[\u001b[49m\u001b[43mlevel\u001b[49m\u001b[43m:\u001b[49m\u001b[43m]\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mpackage\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mlevel\u001b[49m\u001b[43m)\u001b[49m\n", + "\u001b[36mFile \u001b[39m\u001b[32m:1381\u001b[39m, in \u001b[36m_gcd_import\u001b[39m\u001b[34m(name, package, level)\u001b[39m\n", + "\u001b[36mFile \u001b[39m\u001b[32m:1354\u001b[39m, in \u001b[36m_find_and_load\u001b[39m\u001b[34m(name, import_)\u001b[39m\n", + "\u001b[36mFile \u001b[39m\u001b[32m:1318\u001b[39m, in \u001b[36m_find_and_load_unlocked\u001b[39m\u001b[34m(name, import_)\u001b[39m\n", + "\u001b[31mModuleNotFoundError\u001b[39m: No module named 'tabulate'", + "\nDuring handling of the above exception, another exception occurred:\n", + "\u001b[31mImportError\u001b[39m Traceback (most recent call last)", + "\u001b[36mCell\u001b[39m\u001b[36m \u001b[39m\u001b[32mIn[85]\u001b[39m\u001b[32m, line 2\u001b[39m\n\u001b[32m 1\u001b[39m \u001b[38;5;66;03m# Convert the DataFrame to Markdown\u001b[39;00m\n\u001b[32m----> \u001b[39m\u001b[32m2\u001b[39m markdown_table = \u001b[43malgorithm_stats\u001b[49m\u001b[43m.\u001b[49m\u001b[43mto_markdown\u001b[49m\u001b[43m(\u001b[49m\u001b[43mindex\u001b[49m\u001b[43m=\u001b[49m\u001b[38;5;28;43;01mFalse\u001b[39;49;00m\u001b[43m)\u001b[49m\n\u001b[32m 4\u001b[39m \u001b[38;5;66;03m# Print the Markdown table\u001b[39;00m\n\u001b[32m 5\u001b[39m \u001b[38;5;28mprint\u001b[39m(markdown_table)\n", + "\u001b[36mFile \u001b[39m\u001b[32m~/miniforge3/envs/alpine/lib/python3.12/site-packages/pandas/util/_decorators.py:333\u001b[39m, in \u001b[36mdeprecate_nonkeyword_arguments..decorate..wrapper\u001b[39m\u001b[34m(*args, **kwargs)\u001b[39m\n\u001b[32m 327\u001b[39m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28mlen\u001b[39m(args) > num_allow_args:\n\u001b[32m 328\u001b[39m warnings.warn(\n\u001b[32m 329\u001b[39m msg.format(arguments=_format_argument_list(allow_args)),\n\u001b[32m 330\u001b[39m \u001b[38;5;167;01mFutureWarning\u001b[39;00m,\n\u001b[32m 331\u001b[39m stacklevel=find_stack_level(),\n\u001b[32m 332\u001b[39m )\n\u001b[32m--> \u001b[39m\u001b[32m333\u001b[39m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43mfunc\u001b[49m\u001b[43m(\u001b[49m\u001b[43m*\u001b[49m\u001b[43margs\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43m*\u001b[49m\u001b[43m*\u001b[49m\u001b[43mkwargs\u001b[49m\u001b[43m)\u001b[49m\n", + "\u001b[36mFile \u001b[39m\u001b[32m~/miniforge3/envs/alpine/lib/python3.12/site-packages/pandas/core/frame.py:2983\u001b[39m, in \u001b[36mDataFrame.to_markdown\u001b[39m\u001b[34m(self, buf, mode, index, storage_options, **kwargs)\u001b[39m\n\u001b[32m 2981\u001b[39m kwargs.setdefault(\u001b[33m\"\u001b[39m\u001b[33mtablefmt\u001b[39m\u001b[33m\"\u001b[39m, \u001b[33m\"\u001b[39m\u001b[33mpipe\u001b[39m\u001b[33m\"\u001b[39m)\n\u001b[32m 2982\u001b[39m kwargs.setdefault(\u001b[33m\"\u001b[39m\u001b[33mshowindex\u001b[39m\u001b[33m\"\u001b[39m, index)\n\u001b[32m-> \u001b[39m\u001b[32m2983\u001b[39m tabulate = \u001b[43mimport_optional_dependency\u001b[49m\u001b[43m(\u001b[49m\u001b[33;43m\"\u001b[39;49m\u001b[33;43mtabulate\u001b[39;49m\u001b[33;43m\"\u001b[39;49m\u001b[43m)\u001b[49m\n\u001b[32m 2984\u001b[39m result = tabulate.tabulate(\u001b[38;5;28mself\u001b[39m, **kwargs)\n\u001b[32m 2985\u001b[39m \u001b[38;5;28;01mif\u001b[39;00m buf \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m:\n", + "\u001b[36mFile \u001b[39m\u001b[32m~/miniforge3/envs/alpine/lib/python3.12/site-packages/pandas/compat/_optional.py:138\u001b[39m, in \u001b[36mimport_optional_dependency\u001b[39m\u001b[34m(name, extra, errors, min_version)\u001b[39m\n\u001b[32m 136\u001b[39m \u001b[38;5;28;01mexcept\u001b[39;00m \u001b[38;5;167;01mImportError\u001b[39;00m:\n\u001b[32m 137\u001b[39m \u001b[38;5;28;01mif\u001b[39;00m errors == \u001b[33m\"\u001b[39m\u001b[33mraise\u001b[39m\u001b[33m\"\u001b[39m:\n\u001b[32m--> \u001b[39m\u001b[32m138\u001b[39m \u001b[38;5;28;01mraise\u001b[39;00m \u001b[38;5;167;01mImportError\u001b[39;00m(msg)\n\u001b[32m 139\u001b[39m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m\n\u001b[32m 141\u001b[39m \u001b[38;5;66;03m# Handle submodules: if we have submodule, grab parent module from sys.modules\u001b[39;00m\n", + "\u001b[31mImportError\u001b[39m: Missing optional dependency 'tabulate'. Use pip or conda to install tabulate." ] } ], @@ -374,7 +350,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.12.5" + "version": "3.12.0" } }, "nbformat": 4, From 3c609960fd5b9d48e3d69de78f34cad4ea919442 Mon Sep 17 00:00:00 2001 From: Alessandro Lucantonio Date: Mon, 10 Mar 2025 07:53:22 +0100 Subject: [PATCH 11/23] Updated table. --- bench/results/process_results.ipynb | 88 ++++++++++++++++++----------- bench/results/table.md | 80 +++++++++++++------------- 2 files changed, 96 insertions(+), 72 deletions(-) diff --git a/bench/results/process_results.ipynb b/bench/results/process_results.ipynb index 49aaeac..b05a2ff 100644 --- a/bench/results/process_results.ipynb +++ b/bench/results/process_results.ipynb @@ -2,7 +2,7 @@ "cells": [ { "cell_type": "code", - "execution_count": 74, + "execution_count": 2, "metadata": {}, "outputs": [], "source": [ @@ -14,7 +14,7 @@ }, { "cell_type": "code", - "execution_count": 75, + "execution_count": 3, "metadata": {}, "outputs": [ { @@ -35,7 +35,7 @@ }, { "cell_type": "code", - "execution_count": 76, + "execution_count": 4, "metadata": {}, "outputs": [], "source": [ @@ -59,7 +59,7 @@ }, { "cell_type": "code", - "execution_count": 77, + "execution_count": 5, "metadata": {}, "outputs": [ { @@ -89,7 +89,7 @@ }, { "cell_type": "code", - "execution_count": 78, + "execution_count": 6, "metadata": {}, "outputs": [], "source": [ @@ -100,7 +100,7 @@ }, { "cell_type": "code", - "execution_count": 79, + "execution_count": 7, "metadata": {}, "outputs": [ { @@ -163,7 +163,7 @@ }, { "cell_type": "code", - "execution_count": 80, + "execution_count": 8, "metadata": {}, "outputs": [ { @@ -224,7 +224,7 @@ }, { "cell_type": "code", - "execution_count": 81, + "execution_count": 9, "metadata": {}, "outputs": [], "source": [ @@ -234,7 +234,7 @@ }, { "cell_type": "code", - "execution_count": 82, + "execution_count": 10, "metadata": {}, "outputs": [ { @@ -251,7 +251,7 @@ }, { "cell_type": "code", - "execution_count": 83, + "execution_count": 11, "metadata": {}, "outputs": [ { @@ -260,7 +260,7 @@ "0.5219995371567658" ] }, - "execution_count": 83, + "execution_count": 11, "metadata": {}, "output_type": "execute_result" } @@ -271,7 +271,7 @@ }, { "cell_type": "code", - "execution_count": 84, + "execution_count": 12, "metadata": {}, "outputs": [ { @@ -288,29 +288,53 @@ }, { "cell_type": "code", - "execution_count": 85, + "execution_count": 13, "metadata": {}, "outputs": [ { - "ename": "ImportError", - "evalue": "Missing optional dependency 'tabulate'. Use pip or conda to install tabulate.", - "output_type": "error", - "traceback": [ - "\u001b[31m---------------------------------------------------------------------------\u001b[39m", - "\u001b[31mModuleNotFoundError\u001b[39m Traceback (most recent call last)", - "\u001b[36mFile \u001b[39m\u001b[32m~/miniforge3/envs/alpine/lib/python3.12/site-packages/pandas/compat/_optional.py:135\u001b[39m, in \u001b[36mimport_optional_dependency\u001b[39m\u001b[34m(name, extra, errors, min_version)\u001b[39m\n\u001b[32m 134\u001b[39m \u001b[38;5;28;01mtry\u001b[39;00m:\n\u001b[32m--> \u001b[39m\u001b[32m135\u001b[39m module = \u001b[43mimportlib\u001b[49m\u001b[43m.\u001b[49m\u001b[43mimport_module\u001b[49m\u001b[43m(\u001b[49m\u001b[43mname\u001b[49m\u001b[43m)\u001b[49m\n\u001b[32m 136\u001b[39m \u001b[38;5;28;01mexcept\u001b[39;00m \u001b[38;5;167;01mImportError\u001b[39;00m:\n", - "\u001b[36mFile \u001b[39m\u001b[32m~/miniforge3/envs/alpine/lib/python3.12/importlib/__init__.py:90\u001b[39m, in \u001b[36mimport_module\u001b[39m\u001b[34m(name, package)\u001b[39m\n\u001b[32m 89\u001b[39m level += \u001b[32m1\u001b[39m\n\u001b[32m---> \u001b[39m\u001b[32m90\u001b[39m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43m_bootstrap\u001b[49m\u001b[43m.\u001b[49m\u001b[43m_gcd_import\u001b[49m\u001b[43m(\u001b[49m\u001b[43mname\u001b[49m\u001b[43m[\u001b[49m\u001b[43mlevel\u001b[49m\u001b[43m:\u001b[49m\u001b[43m]\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mpackage\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mlevel\u001b[49m\u001b[43m)\u001b[49m\n", - "\u001b[36mFile \u001b[39m\u001b[32m:1381\u001b[39m, in \u001b[36m_gcd_import\u001b[39m\u001b[34m(name, package, level)\u001b[39m\n", - "\u001b[36mFile \u001b[39m\u001b[32m:1354\u001b[39m, in \u001b[36m_find_and_load\u001b[39m\u001b[34m(name, import_)\u001b[39m\n", - "\u001b[36mFile \u001b[39m\u001b[32m:1318\u001b[39m, in \u001b[36m_find_and_load_unlocked\u001b[39m\u001b[34m(name, import_)\u001b[39m\n", - "\u001b[31mModuleNotFoundError\u001b[39m: No module named 'tabulate'", - "\nDuring handling of the above exception, another exception occurred:\n", - "\u001b[31mImportError\u001b[39m Traceback (most recent call last)", - "\u001b[36mCell\u001b[39m\u001b[36m \u001b[39m\u001b[32mIn[85]\u001b[39m\u001b[32m, line 2\u001b[39m\n\u001b[32m 1\u001b[39m \u001b[38;5;66;03m# Convert the DataFrame to Markdown\u001b[39;00m\n\u001b[32m----> \u001b[39m\u001b[32m2\u001b[39m markdown_table = \u001b[43malgorithm_stats\u001b[49m\u001b[43m.\u001b[49m\u001b[43mto_markdown\u001b[49m\u001b[43m(\u001b[49m\u001b[43mindex\u001b[49m\u001b[43m=\u001b[49m\u001b[38;5;28;43;01mFalse\u001b[39;49;00m\u001b[43m)\u001b[49m\n\u001b[32m 4\u001b[39m \u001b[38;5;66;03m# Print the Markdown table\u001b[39;00m\n\u001b[32m 5\u001b[39m \u001b[38;5;28mprint\u001b[39m(markdown_table)\n", - "\u001b[36mFile \u001b[39m\u001b[32m~/miniforge3/envs/alpine/lib/python3.12/site-packages/pandas/util/_decorators.py:333\u001b[39m, in \u001b[36mdeprecate_nonkeyword_arguments..decorate..wrapper\u001b[39m\u001b[34m(*args, **kwargs)\u001b[39m\n\u001b[32m 327\u001b[39m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28mlen\u001b[39m(args) > num_allow_args:\n\u001b[32m 328\u001b[39m warnings.warn(\n\u001b[32m 329\u001b[39m msg.format(arguments=_format_argument_list(allow_args)),\n\u001b[32m 330\u001b[39m \u001b[38;5;167;01mFutureWarning\u001b[39;00m,\n\u001b[32m 331\u001b[39m stacklevel=find_stack_level(),\n\u001b[32m 332\u001b[39m )\n\u001b[32m--> \u001b[39m\u001b[32m333\u001b[39m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43mfunc\u001b[49m\u001b[43m(\u001b[49m\u001b[43m*\u001b[49m\u001b[43margs\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43m*\u001b[49m\u001b[43m*\u001b[49m\u001b[43mkwargs\u001b[49m\u001b[43m)\u001b[49m\n", - "\u001b[36mFile \u001b[39m\u001b[32m~/miniforge3/envs/alpine/lib/python3.12/site-packages/pandas/core/frame.py:2983\u001b[39m, in \u001b[36mDataFrame.to_markdown\u001b[39m\u001b[34m(self, buf, mode, index, storage_options, **kwargs)\u001b[39m\n\u001b[32m 2981\u001b[39m kwargs.setdefault(\u001b[33m\"\u001b[39m\u001b[33mtablefmt\u001b[39m\u001b[33m\"\u001b[39m, \u001b[33m\"\u001b[39m\u001b[33mpipe\u001b[39m\u001b[33m\"\u001b[39m)\n\u001b[32m 2982\u001b[39m kwargs.setdefault(\u001b[33m\"\u001b[39m\u001b[33mshowindex\u001b[39m\u001b[33m\"\u001b[39m, index)\n\u001b[32m-> \u001b[39m\u001b[32m2983\u001b[39m tabulate = \u001b[43mimport_optional_dependency\u001b[49m\u001b[43m(\u001b[49m\u001b[33;43m\"\u001b[39;49m\u001b[33;43mtabulate\u001b[39;49m\u001b[33;43m\"\u001b[39;49m\u001b[43m)\u001b[49m\n\u001b[32m 2984\u001b[39m result = tabulate.tabulate(\u001b[38;5;28mself\u001b[39m, **kwargs)\n\u001b[32m 2985\u001b[39m \u001b[38;5;28;01mif\u001b[39;00m buf \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m:\n", - "\u001b[36mFile \u001b[39m\u001b[32m~/miniforge3/envs/alpine/lib/python3.12/site-packages/pandas/compat/_optional.py:138\u001b[39m, in \u001b[36mimport_optional_dependency\u001b[39m\u001b[34m(name, extra, errors, min_version)\u001b[39m\n\u001b[32m 136\u001b[39m \u001b[38;5;28;01mexcept\u001b[39;00m \u001b[38;5;167;01mImportError\u001b[39;00m:\n\u001b[32m 137\u001b[39m \u001b[38;5;28;01mif\u001b[39;00m errors == \u001b[33m\"\u001b[39m\u001b[33mraise\u001b[39m\u001b[33m\"\u001b[39m:\n\u001b[32m--> \u001b[39m\u001b[32m138\u001b[39m \u001b[38;5;28;01mraise\u001b[39;00m \u001b[38;5;167;01mImportError\u001b[39;00m(msg)\n\u001b[32m 139\u001b[39m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m\n\u001b[32m 141\u001b[39m \u001b[38;5;66;03m# Handle submodules: if we have submodule, grab parent module from sys.modules\u001b[39;00m\n", - "\u001b[31mImportError\u001b[39m: Missing optional dependency 'tabulate'. Use pip or conda to install tabulate." + "name": "stdout", + "output_type": "stream", + "text": [ + "| dataset | mean | median | std |\n", + "|:------------------------------|------------:|---------:|-----------:|\n", + "| 663_rabe_266 | 0.994973 | 0.994792 | 0.00138667 |\n", + "| 527_analcatdata_election2000 | 0.992298 | 0.992383 | 0.00615176 |\n", + "| 505_tecator | 0.985639 | 0.987823 | 0.00600829 |\n", + "| 561_cpu | 0.976692 | 0.981073 | 0.0157433 |\n", + "| 690_visualizing_galaxy | 0.96399 | 0.966614 | 0.00791278 |\n", + "| 560_bodyfat | 0.595445 | 0.9646 | 0.678973 |\n", + "| 227_cpu_small | 0.949931 | 0.95302 | 0.0102687 |\n", + "| 197_cpu_act | 0.939741 | 0.948657 | 0.0297579 |\n", + "| 523_analcatdata_neavote | 0.936577 | 0.943564 | 0.0278365 |\n", + "| 556_analcatdata_apnea2 | 0.863564 | 0.867927 | 0.0218752 |\n", + "| 229_pwLinear | 0.856554 | 0.864017 | 0.0295234 |\n", + "| 1027_ESL | 0.859966 | 0.859878 | 0.0219077 |\n", + "| 557_analcatdata_apnea1 | 0.860032 | 0.85602 | 0.0457254 |\n", + "| 210_cloud | 0.785513 | 0.855809 | 0.174546 |\n", + "| 695_chatfield_4 | 0.857361 | 0.854139 | 0.0323364 |\n", + "| 1096_FacultySalaries | 0.60718 | 0.836315 | 0.51419 |\n", + "| 529_pollen | 0.787219 | 0.782358 | 0.0118861 |\n", + "| 230_machine_cpu | 0.561278 | 0.759371 | 0.550194 |\n", + "| 503_wind | 0.7556 | 0.752685 | 0.0109313 |\n", + "| 712_chscase_geyser1 | 0.749426 | 0.749685 | 0.0527733 |\n", + "| 1089_USCrime | 0.708195 | 0.739885 | 0.140384 |\n", + "| 228_elusage | 0.624231 | 0.721685 | 0.225679 |\n", + "| 519_vinnie | 0.724621 | 0.713402 | 0.0364589 |\n", + "| 225_puma8NH | 0.673587 | 0.67684 | 0.0196146 |\n", + "| 666_rmftsa_ladata | 0.678517 | 0.663484 | 0.0452063 |\n", + "| 659_sleuth_ex1714 | 0.577351 | 0.610788 | 0.252262 |\n", + "| 706_sleuth_case1202 | 0.43508 | 0.572711 | 0.418311 |\n", + "| 1029_LEV | 0.557463 | 0.553204 | 0.0361047 |\n", + "| 547_no2 | 0.473821 | 0.44619 | 0.102342 |\n", + "| 1030_ERA | 0.385125 | 0.381476 | 0.0469926 |\n", + "| 1028_SWD | 0.339529 | 0.339576 | 0.0394794 |\n", + "| 485_analcatdata_vehicle | 0.162436 | 0.304339 | 0.714552 |\n", + "| 542_pollution | -0.510228 | 0.252399 | 2.12619 |\n", + "| 522_pm10 | 0.242843 | 0.240674 | 0.0524153 |\n", + "| 665_sleuth_case2002 | 0.16439 | 0.216995 | 0.215279 |\n", + "| 192_vineyard | 0.204839 | 0.213078 | 0.382138 |\n", + "| 687_sleuth_ex1605 | 0.00789428 | 0.15175 | 0.394736 |\n", + "| 678_visualizing_environmental | 0.107825 | 0.111752 | 0.199401 |\n" ] } ], diff --git a/bench/results/table.md b/bench/results/table.md index 1cb4766..b2f28b8 100644 --- a/bench/results/table.md +++ b/bench/results/table.md @@ -1,40 +1,40 @@ -| dataset | mean | median | std | -|:------------------------------|-----------:|-----------:|-----------:| -| 527_analcatdata_election2000 | 0.997727 | 0.999273 | 0.00357541 | -| 663_rabe_266 | 0.994945 | 0.995115 | 0.00134602 | -| 560_bodyfat | 0.988467 | 0.992938 | 0.0121634 | -| 505_tecator | 0.986861 | 0.986026 | 0.0039009 | -| 561_cpu | 0.957349 | 0.967161 | 0.0330056 | -| 690_visualizing_galaxy | 0.963404 | 0.964137 | 0.00867664 | -| 197_cpu_act | 0.94309 | 0.945666 | 0.00966613 | -| 227_cpu_small | 0.946096 | 0.945094 | 0.00812824 | -| 523_analcatdata_neavote | 0.936577 | 0.943564 | 0.0278365 | -| 1096_FacultySalaries | 0.662191 | 0.894004 | 0.525012 | -| 557_analcatdata_apnea1 | 0.881416 | 0.889496 | 0.0397044 | -| 230_machine_cpu | 0.778943 | 0.879675 | 0.273846 | -| 556_analcatdata_apnea2 | 0.863157 | 0.867148 | 0.0347729 | -| 1027_ESL | 0.858838 | 0.860647 | 0.0127587 | -| 695_chatfield_4 | 0.827457 | 0.830825 | 0.0677194 | -| 229_pwLinear | 0.810944 | 0.811717 | 0.0453826 | -| 210_cloud | 0.761678 | 0.786611 | 0.159399 | -| 529_pollen | 0.787219 | 0.782358 | 0.0118861 | -| 1089_USCrime | 0.739218 | 0.756442 | 0.117112 | -| 503_wind | 0.747271 | 0.745787 | 0.0088297 | -| 712_chscase_geyser1 | 0.751443 | 0.745605 | 0.0549794 | -| 519_vinnie | 0.728873 | 0.719948 | 0.0377254 | -| 228_elusage | 0.621403 | 0.714127 | 0.216677 | -| 659_sleuth_ex1714 | 0.562146 | 0.702428 | 0.309503 | -| 666_rmftsa_ladata | 0.679718 | 0.672306 | 0.0620477 | -| 225_puma8NH | 0.66854 | 0.667771 | 0.0127414 | -| 706_sleuth_case1202 | 0.418764 | 0.568134 | 0.43742 | -| 1029_LEV | 0.557169 | 0.560547 | 0.0330229 | -| 547_no2 | 0.50562 | 0.502983 | 0.0920748 | -| 485_analcatdata_vehicle | 0.244083 | 0.47083 | 0.702171 | -| 192_vineyard | 0.381856 | 0.38018 | 0.200867 | -| 1030_ERA | 0.373955 | 0.373216 | 0.0453621 | -| 1028_SWD | 0.335559 | 0.343532 | 0.0556771 | -| 542_pollution | 0.170091 | 0.279329 | 0.254557 | -| 665_sleuth_case2002 | 0.242165 | 0.25769 | 0.146767 | -| 522_pm10 | 0.235107 | 0.233109 | 0.0445476 | -| 678_visualizing_environmental | 0.0604016 | 0.193514 | 0.358373 | -| 687_sleuth_ex1605 | -0.0707247 | -0.0740387 | 0.372597 | \ No newline at end of file +| dataset | mean | median | std | +|:------------------------------|------------:|---------:|-----------:| +| 663_rabe_266 | 0.994973 | 0.994792 | 0.00138667 | +| 527_analcatdata_election2000 | 0.992298 | 0.992383 | 0.00615176 | +| 505_tecator | 0.985639 | 0.987823 | 0.00600829 | +| 561_cpu | 0.976692 | 0.981073 | 0.0157433 | +| 690_visualizing_galaxy | 0.96399 | 0.966614 | 0.00791278 | +| 560_bodyfat | 0.595445 | 0.9646 | 0.678973 | +| 227_cpu_small | 0.949931 | 0.95302 | 0.0102687 | +| 197_cpu_act | 0.939741 | 0.948657 | 0.0297579 | +| 523_analcatdata_neavote | 0.936577 | 0.943564 | 0.0278365 | +| 556_analcatdata_apnea2 | 0.863564 | 0.867927 | 0.0218752 | +| 229_pwLinear | 0.856554 | 0.864017 | 0.0295234 | +| 1027_ESL | 0.859966 | 0.859878 | 0.0219077 | +| 557_analcatdata_apnea1 | 0.860032 | 0.85602 | 0.0457254 | +| 210_cloud | 0.785513 | 0.855809 | 0.174546 | +| 695_chatfield_4 | 0.857361 | 0.854139 | 0.0323364 | +| 1096_FacultySalaries | 0.60718 | 0.836315 | 0.51419 | +| 529_pollen | 0.787219 | 0.782358 | 0.0118861 | +| 230_machine_cpu | 0.561278 | 0.759371 | 0.550194 | +| 503_wind | 0.7556 | 0.752685 | 0.0109313 | +| 712_chscase_geyser1 | 0.749426 | 0.749685 | 0.0527733 | +| 1089_USCrime | 0.708195 | 0.739885 | 0.140384 | +| 228_elusage | 0.624231 | 0.721685 | 0.225679 | +| 519_vinnie | 0.724621 | 0.713402 | 0.0364589 | +| 225_puma8NH | 0.673587 | 0.67684 | 0.0196146 | +| 666_rmftsa_ladata | 0.678517 | 0.663484 | 0.0452063 | +| 659_sleuth_ex1714 | 0.577351 | 0.610788 | 0.252262 | +| 706_sleuth_case1202 | 0.43508 | 0.572711 | 0.418311 | +| 1029_LEV | 0.557463 | 0.553204 | 0.0361047 | +| 547_no2 | 0.473821 | 0.44619 | 0.102342 | +| 1030_ERA | 0.385125 | 0.381476 | 0.0469926 | +| 1028_SWD | 0.339529 | 0.339576 | 0.0394794 | +| 485_analcatdata_vehicle | 0.162436 | 0.304339 | 0.714552 | +| 542_pollution | -0.510228 | 0.252399 | 2.12619 | +| 522_pm10 | 0.242843 | 0.240674 | 0.0524153 | +| 665_sleuth_case2002 | 0.16439 | 0.216995 | 0.215279 | +| 192_vineyard | 0.204839 | 0.213078 | 0.382138 | +| 687_sleuth_ex1605 | 0.00789428 | 0.15175 | 0.394736 | +| 678_visualizing_environmental | 0.107825 | 0.111752 | 0.199401 | \ No newline at end of file From 5e332cf61b355c75d9a1d4a139dfb5b0da0ee006 Mon Sep 17 00:00:00 2001 From: Alessandro Lucantonio Date: Mon, 10 Mar 2025 16:39:33 +0100 Subject: [PATCH 12/23] Working on making the regressor compliant with sklearn specs. --- src/alpine/gp/regressor.py | 31 +++++++++----- tests/test_regressor.py | 83 ++++++++++++++++++++++++++++++++++++++ 2 files changed, 103 insertions(+), 11 deletions(-) create mode 100644 tests/test_regressor.py diff --git a/src/alpine/gp/regressor.py b/src/alpine/gp/regressor.py index 2442b82..23616f6 100644 --- a/src/alpine/gp/regressor.py +++ b/src/alpine/gp/regressor.py @@ -104,7 +104,7 @@ def __init__( output_path: str | None = None, batch_size=1, ): - + super().__init__() self.pset = pset self.fitness = fitness @@ -122,16 +122,14 @@ def __init__( self.num_best_inds_str = num_best_inds_str self.plot_freq = plot_freq self.preprocess_func = preprocess_func - self.callback_fun = callback_func + self.callback_func = callback_func self.is_plot_best_individual_tree = plot_best_individual_tree self.is_save_best_individual = save_best_individual self.is_save_train_fit_history = save_train_fit_history self.output_path = output_path self.batch_size = batch_size - if common_data is not None: - # FIXME: does everything work when the functions do not have common args? - self.__store_fit_error_common_args(common_data) + self.common_data = common_data self.NINDIVIDUALS = NINDIVIDUALS self.NGEN = NGEN @@ -157,8 +155,14 @@ def __init__( self.overlapping_generation = overlapping_generation self.validate = validate + self.frac_elitist = frac_elitist + # Elitism settings - self.n_elitist = int(frac_elitist * self.NINDIVIDUALS) + self.n_elitist = int(self.frac_elitist * self.NINDIVIDUALS) + + if self.common_data is not None: + # FIXME: does everything work when the functions do not have common args? + self.__store_fit_error_common_args(self.common_data) # config individual creator and toolbox self.__creator_toolbox_config() @@ -196,6 +200,9 @@ def __init__( self.plot_initialized = False self.fig_id = 0 + def get_params(self, deep=True): + return self.__dict__ + def __creator_toolbox_config(self): """Initialize toolbox and individual creator based on config file.""" self.toolbox = base.Toolbox() @@ -276,7 +283,8 @@ def __store_datasets(self, datasets: Dict[str, Dataset]): def __store_shared_objects(self, label: str, data: Dict): for key, value in data.items(): # replace each item of the dataset with its obj ref - data[key] = ray.put(value) + if not isinstance(value, ray.ObjectRef): + data[key] = ray.put(value) self.data_store[label] = data def __init_logbook(self): @@ -425,6 +433,7 @@ def fit(self, X_train, y_train=None, X_val=None, y_val=None): if self.validate and self.error_metric is not None: self.__register_val_funcs() self.__run() + return self def predict(self, X_test): test_data = {"X": X_test} @@ -567,8 +576,8 @@ def __evolve_islands(self, cgen: int): fitnesses = self.__unflatten_list(fitnesses, [len(i) for i in invalid_inds]) for i in range(self.num_islands): - if self.callback_fun is not None: - self.callback_fun(invalid_inds[i], fitnesses[i]) + if self.callback_func is not None: + self.callback_func(invalid_inds[i], fitnesses[i]) else: for ind, fit in zip(invalid_inds[i], fitnesses[i]): ind.fitness.values = fit @@ -626,8 +635,8 @@ def __run(self): for i in range(self.num_islands): fitnesses = self.toolbox.map(self.toolbox.evaluate_train, self.pop[i]) - if self.callback_fun is not None: - self.callback_fun(self.pop[i], fitnesses) + if self.callback_func is not None: + self.callback_func(self.pop[i], fitnesses) else: for ind, fit in zip(self.pop[i], fitnesses): ind.fitness.values = fit diff --git a/tests/test_regressor.py b/tests/test_regressor.py new file mode 100644 index 0000000..737ff06 --- /dev/null +++ b/tests/test_regressor.py @@ -0,0 +1,83 @@ +from sklearn.utils.estimator_checks import check_estimator +from alpine.gp.regressor import GPSymbolicRegressor +from alpine.gp import util +from deap import gp +from sklearn.datasets import make_regression +from sklearn.model_selection import train_test_split, GridSearchCV + + +def test_regressor(): + pset = gp.PrimitiveSetTyped( + "MAIN", + [ + float, + ], + float, + ) + pset.renameArguments(ARG0="x") + + primitives = { + "imports": {"alpine.gp.numpy_primitives": ["numpy_primitives"]}, + "used": [ + {"name": "add", "dimension": None, "rank": None}, + {"name": "sub", "dimension": None, "rank": None}, + {"name": "mul", "dimension": None, "rank": None}, + {"name": "div", "dimension": None, "rank": None}, + {"name": "sin", "dimension": None, "rank": None}, + {"name": "cos", "dimension": None, "rank": None}, + {"name": "exp", "dimension": None, "rank": None}, + {"name": "log", "dimension": None, "rank": None}, + ], + } + + pset = util.add_primitives_to_pset_from_dict(pset, primitives) + + penalty = {"reg_param": 0.0} + common_data = {"penalty": penalty} + + gpsr = GPSymbolicRegressor( + pset=pset, + fitness=None, + error_metric=None, + predict_func=None, + common_data=common_data, + NINDIVIDUALS=100, + num_islands=10, + NGEN=200, + MUTPB=0.1, + min_height=2, + max_height=6, + crossover_prob=0.9, + overlapping_generation=True, + print_log=True, + batch_size=100, + ) + + print(gpsr.get_params()) + check_estimator(gpsr) + + # # Generate synthetic data + # X, y = make_regression(n_samples=100, n_features=10, random_state=42) + # X_train, X_test, y_train, y_test = train_test_split( + # X, y, test_size=0.2, random_state=42 + # ) + + # # Parameter grid + # param_grid = {"NGEN": [10, 20]} + + # # Grid search + # grid_search = GridSearchCV( + # estimator=gpsr, + # param_grid=param_grid, + # cv=3, + # scoring="r2", + # verbose=1, + # n_jobs=1, + # ) + + # # Fit the grid search + # grid_search.fit(X_train, y_train) + + +if __name__ == "__main__": + test_regressor() From 0d9b1c3667e2022362acb366f507ab11cb811748 Mon Sep 17 00:00:00 2001 From: Alessandro Lucantonio Date: Tue, 11 Mar 2025 08:54:08 +0100 Subject: [PATCH 13/23] Working on check regressor. Cannot pickle fitness and score functions when passed as parameters. --- src/alpine/gp/regressor.py | 118 +++++++++++++++++++------------------ src/alpine/gp/util.py | 16 +++++ tests/test_poisson1d.py | 2 +- tests/test_regressor.py | 8 +-- 4 files changed, 82 insertions(+), 62 deletions(-) diff --git a/src/alpine/gp/regressor.py b/src/alpine/gp/regressor.py index 23616f6..98081fb 100644 --- a/src/alpine/gp/regressor.py +++ b/src/alpine/gp/regressor.py @@ -111,8 +111,6 @@ def __init__( self.error_metric = error_metric self.predict_func = predict_func - self.data_store = dict() - self.plot_best = plot_best self.plot_best_genealogy = plot_best_genealogy @@ -123,9 +121,9 @@ def __init__( self.plot_freq = plot_freq self.preprocess_func = preprocess_func self.callback_func = callback_func - self.is_plot_best_individual_tree = plot_best_individual_tree - self.is_save_best_individual = save_best_individual - self.is_save_train_fit_history = save_train_fit_history + self.plot_best_individual_tree = plot_best_individual_tree + self.save_best_individual = save_best_individual + self.save_train_fit_history = save_train_fit_history self.output_path = output_path self.batch_size = batch_size @@ -157,48 +155,14 @@ def __init__( self.frac_elitist = frac_elitist - # Elitism settings - self.n_elitist = int(self.frac_elitist * self.NINDIVIDUALS) - - if self.common_data is not None: - # FIXME: does everything work when the functions do not have common args? - self.__store_fit_error_common_args(self.common_data) - - # config individual creator and toolbox - self.__creator_toolbox_config() - self.seed = seed if self.seed is not None: self.seed = [self.createIndividual.from_string(i, pset) for i in seed] - # Initialize variables for statistics - self.stats_fit = tools.Statistics(lambda ind: ind.fitness.values) - self.stats_size = tools.Statistics(len) - self.mstats = tools.MultiStatistics( - fitness=self.stats_fit, size=self.stats_size - ) - self.mstats.register("avg", lambda ind: np.around(np.mean(ind), 4)) - self.mstats.register("std", lambda ind: np.around(np.std(ind), 4)) - self.mstats.register("min", lambda ind: np.around(np.min(ind), 4)) - self.mstats.register("max", lambda ind: np.around(np.max(ind), 4)) - - self.__init_logbook() - - self.train_fit_history = [] - - # Create history object to build the genealogy tree - self.history = tools.History() - - if self.plot_best_genealogy: - # Decorators for history - self.toolbox.decorate("mate", self.history.decorator) - self.toolbox.decorate("mutate", self.history.decorator) - - self.__register_map() - - self.plot_initialized = False - self.fig_id = 0 + @property + def n_elitist(self): + return int(self.frac_elitist * self.NINDIVIDUALS) def get_params(self, deep=True): return self.__dict__ @@ -420,9 +384,48 @@ def mapper(f, individuals, toolbox_ref): toolbox_ref = ray.put(self.toolbox) self.toolbox.register("map", mapper, toolbox_ref=toolbox_ref) - def fit(self, X_train, y_train=None, X_val=None, y_val=None): + def fit(self, X, y=None, X_val=None, y_val=None): """Fits the training data using GP-based symbolic regression.""" - train_data = {"X": X_train, "y": y_train} + + if not hasattr(self, "_is_fitted"): + self.data_store = dict() + + if self.common_data is not None: + # FIXME: does everything work when the functions do not have common args? + self.__store_fit_error_common_args(self.common_data) + + # config individual creator and toolbox + self.__creator_toolbox_config() + + # Initialize variables for statistics + self.stats_fit = tools.Statistics(lambda ind: ind.fitness.values) + self.stats_size = tools.Statistics(len) + self.mstats = tools.MultiStatistics( + fitness=self.stats_fit, size=self.stats_size + ) + self.mstats.register("avg", lambda ind: np.around(np.mean(ind), 4)) + self.mstats.register("std", lambda ind: np.around(np.std(ind), 4)) + self.mstats.register("min", lambda ind: np.around(np.min(ind), 4)) + self.mstats.register("max", lambda ind: np.around(np.max(ind), 4)) + + self.__init_logbook() + + self.train_fit_history = [] + + # Create history object to build the genealogy tree + self.history = tools.History() + + if self.plot_best_genealogy: + # Decorators for history + self.toolbox.decorate("mate", self.history.decorator) + self.toolbox.decorate("mutate", self.history.decorator) + + self.__register_map() + + self.plot_initialized = False + self.fig_id = 0 + + train_data = {"X": X, "y": y} if self.validate and X_val is not None: val_data = {"X": X_val, "y": y_val} datasets = {"train": train_data, "val": val_data} @@ -433,6 +436,7 @@ def fit(self, X_train, y_train=None, X_val=None, y_val=None): if self.validate and self.error_metric is not None: self.__register_val_funcs() self.__run() + self._is_fitted = True return self def predict(self, X_test): @@ -443,18 +447,18 @@ def predict(self, X_test): u_best = self.toolbox.map(self.toolbox.evaluate_test_sols, (self.best,))[0] return u_best - def score(self, X_test, y_test): + def score(self, X, y): """Computes the error metric (passed to the `GPSymbolicRegressor` constructor) on a given dataset. """ - test_data = {"X": X_test, "y": y_test} + test_data = {"X": X, "y": y} datasets = {"test": test_data} self.__store_datasets(datasets) self.__register_score_func() score = self.toolbox.map(self.toolbox.evaluate_test_score, (self.best,))[0] return score - def immigration(self, pop, num_immigrants: int): + def __immigration(self, pop, num_immigrants: int): immigrants = self.toolbox.population(n=num_immigrants) for i in range(num_immigrants): idx_individual_to_replace = random.randint(0, self.NINDIVIDUALS - 1) @@ -543,7 +547,7 @@ def __evolve_islands(self, cgen: int): for i in range(self.num_islands): if self.immigration_enabled: if cgen % self.immigration_freq == 0: - self.immigration( + self.__immigration( self.pop[i], int(self.immigration_frac * self.NINDIVIDUALS) ) @@ -709,20 +713,20 @@ def __run(self): if self.plot_best_genealogy: self.__plot_genealogy(self.best) - if self.is_plot_best_individual_tree: - self.plot_best_individual_tree() + if self.plot_best_individual_tree: + self.__plot_best_individual_tree() - if self.is_save_best_individual and self.output_path is not None: - self.save_best_individual(self.output_path) + if self.save_best_individual and self.output_path is not None: + self.__save_best_individual(self.output_path) print("String of the best individual saved to disk.") - if self.is_save_train_fit_history and self.output_path is not None: - self.save_train_fit_history(self.output_path) + if self.save_train_fit_history and self.output_path is not None: + self.__save_train_fit_history(self.output_path) print("Training fitness history saved to disk.") # NOTE: ray.shutdown should be manually called by the user - def plot_best_individual_tree(self): + def __plot_best_individual_tree(self): """Plots the tree of the best individual at the end of the evolution.""" nodes, edges, labels = gp.graph(self.best) graph = nx.Graph() @@ -736,13 +740,13 @@ def plot_best_individual_tree(self): plt.axis("off") plt.show() - def save_best_individual(self, output_path: str): + def __save_best_individual(self, output_path: str): """Saves the string of the best individual of the population in a .txt file.""" file = open(join(output_path, "best_ind.txt"), "w") file.write(str(self.best)) file.close() - def save_train_fit_history(self, output_path: str): + def __save_train_fit_history(self, output_path: str): np.save(join(output_path, "train_fit_history.npy"), self.train_fit_history) if self.validate: np.save(join(output_path, "val_fit_history.npy"), self.val_fit_history) diff --git a/src/alpine/gp/util.py b/src/alpine/gp/util.py index a468040..b59f3b1 100644 --- a/src/alpine/gp/util.py +++ b/src/alpine/gp/util.py @@ -1,6 +1,7 @@ import yaml from .primitives import add_primitives_to_pset from importlib import import_module +import ray def add_primitives_to_pset_from_dict(pset, primitives_dict): @@ -91,3 +92,18 @@ def detect_nested_trigonometric_functions(equation): i += 1 return nested + + +@ray.remote +def dummy_fitness(individuals_str, toolbox, X, y): + fitnesses = [(0.0,)] * len(individuals_str) + + return fitnesses + + +@ray.remote +def dummy_score(individuals_str, toolbox, X, y): + + MSE = [0.0] * len(individuals_str) + + return MSE diff --git a/tests/test_poisson1d.py b/tests/test_poisson1d.py index 96da59a..433efe8 100644 --- a/tests/test_poisson1d.py +++ b/tests/test_poisson1d.py @@ -228,7 +228,7 @@ def test_poisson1d(set_test_dir, yamlfile): fit_score = gpsr.score(X_train, y_train) - gpsr.save_best_test_sols(X_train, "./") + gpsr.__save_best_test_sols(X_train, "./") ray.shutdown() assert np.allclose(u.coeffs.flatten(), np.ravel(u_best)) diff --git a/tests/test_regressor.py b/tests/test_regressor.py index 737ff06..98f4cdd 100644 --- a/tests/test_regressor.py +++ b/tests/test_regressor.py @@ -4,6 +4,7 @@ from deap import gp from sklearn.datasets import make_regression from sklearn.model_selection import train_test_split, GridSearchCV +from alpine.gp.util import dummy_fitness, dummy_score def test_regressor(): @@ -32,13 +33,12 @@ def test_regressor(): pset = util.add_primitives_to_pset_from_dict(pset, primitives) - penalty = {"reg_param": 0.0} - common_data = {"penalty": penalty} + common_data = {} gpsr = GPSymbolicRegressor( pset=pset, - fitness=None, - error_metric=None, + fitness=dummy_fitness.remote, + error_metric=dummy_score.remote, predict_func=None, common_data=common_data, NINDIVIDUALS=100, From 86b7a8bf37f34cae08066b101a5426d3b80e6231 Mon Sep 17 00:00:00 2001 From: Alessandro Lucantonio Date: Mon, 17 Mar 2025 13:48:36 +0100 Subject: [PATCH 14/23] [WIP] Making interface sklearn compatible. Fixed private attributes. --- bench/bench.py | 6 +- src/alpine/gp/regressor.py | 279 +++++++++++++++++++++---------------- src/alpine/gp/util.py | 8 +- tests/test_regressor.py | 8 +- 4 files changed, 169 insertions(+), 132 deletions(-) diff --git a/bench/bench.py b/bench/bench.py index 2fc23b5..7432b1d 100644 --- a/bench/bench.py +++ b/bench/bench.py @@ -266,12 +266,12 @@ def eval(problem, cfgfile, seed=42): gpsr.fit(X_train, y_train_scaled) toc = time.time() - if hasattr(gpsr.best, "consts"): - print("Best parameters = ", gpsr.best.consts) + if hasattr(gpsr.__best, "consts"): + print("Best parameters = ", gpsr.__best.consts) print("Elapsed time = ", toc - tic) individuals_per_sec = ( - (gpsr.cgen + 1) * gpsr.NINDIVIDUALS * gpsr.num_islands / (toc - tic) + (gpsr.__cgen + 1) * gpsr.NINDIVIDUALS * gpsr.num_islands / (toc - tic) ) print("Individuals per sec = ", individuals_per_sec) diff --git a/src/alpine/gp/regressor.py b/src/alpine/gp/regressor.py index 98081fb..9f460c1 100644 --- a/src/alpine/gp/regressor.py +++ b/src/alpine/gp/regressor.py @@ -11,7 +11,13 @@ import ray import random from itertools import chain -from sklearn.base import BaseEstimator, RegressorMixin +from sklearn.base import BaseEstimator, RegressorMixin, _fit_context +from sklearn.utils.validation import ( + check_is_fitted, + validate_data, + check_array, + check_X_y, +) # reducing the number of threads launched by fitness evaluations os.environ["MKL_NUM_THREADS"] = "1" @@ -66,7 +72,7 @@ def __init__( select_fun: str = "tools.selection.tournament_with_elitism", select_args: str = "{'num_elitist': self.n_elitist, 'tournsize': 3, 'stochastic_tourn': { 'enabled': False, 'prob': [0.8, 0.2] }}", # noqa: E501 mut_fun: str = "gp.mutUniform", - mut_args: str = "{'expr': self.toolbox.expr_mut, 'pset': self.pset}", + mut_args: str = "{'expr': self._GPSymbolicRegressor__toolbox.expr_mut, 'pset': self.pset}", expr_mut_fun: str = "gp.genHalfAndHalf", expr_mut_args: str = "{'min_': 1, 'max_': 3}", crossover_fun: str = "gp.cxOnePoint", @@ -157,9 +163,6 @@ def __init__( self.seed = seed - if self.seed is not None: - self.seed = [self.createIndividual.from_string(i, pset) for i in seed] - @property def n_elitist(self): return int(self.frac_elitist * self.NINDIVIDUALS) @@ -169,38 +172,40 @@ def get_params(self, deep=True): def __creator_toolbox_config(self): """Initialize toolbox and individual creator based on config file.""" - self.toolbox = base.Toolbox() + self.__toolbox = base.Toolbox() # SELECTION - self.toolbox.register("select", eval(self.select_fun), **eval(self.select_args)) + self.__toolbox.register( + "select", eval(self.select_fun), **eval(self.select_args) + ) # MUTATION - self.toolbox.register( + self.__toolbox.register( "expr_mut", eval(self.expr_mut_fun), **eval(self.expr_mut_args) ) - self.toolbox.register("mutate", eval(self.mut_fun), **eval(self.mut_args)) + self.__toolbox.register("mutate", eval(self.mut_fun), **eval(self.mut_args)) # CROSSOVER - self.toolbox.register( + self.__toolbox.register( "mate", eval(self.crossover_fun), **eval(self.crossover_args) ) - self.toolbox.decorate( + self.__toolbox.decorate( "mate", gp.staticLimit(key=operator.attrgetter("height"), max_value=17) ) - self.toolbox.decorate( + self.__toolbox.decorate( "mutate", gp.staticLimit(key=operator.attrgetter("height"), max_value=17) ) # INDIVIDUAL GENERATOR/CREATOR - self.toolbox.register( + self.__toolbox.register( "expr", gp.genHalfAndHalf, pset=self.pset, min_=self.min_height, max_=self.max_height, ) - self.toolbox.register( + self.__toolbox.register( "expr_pop", gp.genHalfAndHalf, pset=self.pset, @@ -211,16 +216,21 @@ def __creator_toolbox_config(self): creator.create("FitnessMin", base.Fitness, weights=(-1.0,)) creator.create("Individual", gp.PrimitiveTree, fitness=creator.FitnessMin) createIndividual = creator.Individual - self.toolbox.register( - "individual", tools.initIterate, createIndividual, self.toolbox.expr + self.__toolbox.register( + "individual", tools.initIterate, createIndividual, self.__toolbox.expr ) - self.toolbox.register( - "population", tools.initRepeat, list, self.toolbox.individual + self.__toolbox.register( + "population", tools.initRepeat, list, self.__toolbox.individual ) - self.toolbox.register("compile", gp.compile, pset=self.pset) + self.__toolbox.register("compile", gp.compile, pset=self.pset) - self.createIndividual = createIndividual + self.__createIndividual = createIndividual + + if self.seed is not None: + self.seed = [ + self.__createIndividual.from_string(i, self.pset) for i in self.seed + ] def __store_fit_error_common_args(self, data: Dict): """Store names and values of the arguments that are in common between @@ -249,26 +259,26 @@ def __store_shared_objects(self, label: str, data: Dict): # replace each item of the dataset with its obj ref if not isinstance(value, ray.ObjectRef): data[key] = ray.put(value) - self.data_store[label] = data + self.__data_store[label] = data def __init_logbook(self): # Initialize logbook to collect statistics - self.logbook = tools.Logbook() + self.__logbook = tools.Logbook() # Headers of fields to be printed during log if self.validate: - self.logbook.header = "gen", "evals", "fitness", "size", "valid" - self.logbook.chapters["valid"].header = "valid_fit", "valid_err" + self.__logbook.header = "gen", "evals", "fitness", "size", "valid" + self.__logbook.chapters["valid"].header = "valid_fit", "valid_err" else: - self.logbook.header = "gen", "evals", "fitness", "size" - self.logbook.chapters["fitness"].header = "min", "avg", "max", "std" - self.logbook.chapters["size"].header = "min", "avg", "max", "std" + self.__logbook.header = "gen", "evals", "fitness", "size" + self.__logbook.chapters["fitness"].header = "min", "avg", "max", "std" + self.__logbook.chapters["size"].header = "min", "avg", "max", "std" def __compute_valid_stats(self, pop): best = tools.selBest(pop, k=1) # FIXME: ugly way of handling lists/tuples; assume eval_val_MSE returns a # single-valued tuple as eval_val_fit - valid_fit = self.toolbox.map(self.toolbox.evaluate_val_fit, best)[0][0] - valid_err = self.toolbox.map(self.toolbox.evaluate_val_MSE, best)[0] + valid_fit = self.__toolbox.map(self.__toolbox.evaluate_val_fit, best)[0][0] + valid_err = self.__toolbox.map(self.__toolbox.evaluate_val_MSE, best)[0] return valid_fit, valid_err @@ -278,7 +288,7 @@ def __stats(self, pop, gen, evals): # LINE_UP = '\033[1A' # LINE_CLEAR = '\x1b[2K' # Compile statistics for the current population - record = self.mstats.compile(pop) + record = self.__mstats.compile(pop) # record the statistics in the logbook if self.validate: @@ -286,28 +296,28 @@ def __stats(self, pop, gen, evals): valid_fit, valid_err = self.__compute_valid_stats(pop) record["valid"] = {"valid_fit": valid_fit, "valid_err": valid_err} - self.logbook.record(gen=gen, evals=evals, **record) + self.__logbook.record(gen=gen, evals=evals, **record) if self.print_log: # Print statistics for the current population # print(LINE_UP, end=LINE_CLEAR, flush=True) - print(self.logbook.stream, flush=True) + print(self.__logbook.stream, flush=True) def __plot_history(self): """Plots the fitness of the best individual vs generation number.""" - if not self.plot_initialized: - self.plot_initialized = True + if not self.__plot_initialized: + self.__plot_initialized = True # new figure number when starting with new evolution - self.fig_id = self.fig_id + 1 - plt.figure(self.fig_id).show() + self.__fig_id = self.__fig_id + 1 + plt.figure(self.__fig_id).show() plt.pause(0.01) - plt.figure(self.fig_id) + plt.figure(self.__fig_id) fig = plt.gcf() # Array of generations starts from 1 - x = range(1, len(self.train_fit_history) + 1) - plt.plot(x, self.train_fit_history, "b", label="Training Fitness") + x = range(1, len(self.__train_fit_history) + 1) + plt.plot(x, self.__train_fit_history, "b", label="Training Fitness") if self.validate: plt.plot(x, self.val_fit_history, "r", label="Validation Fitness") fig.legend(loc="upper right") @@ -323,7 +333,7 @@ def __plot_genealogy(self, best): # Get genealogy of best individual import networkx - gen_best = self.history.getGenealogy(best) + gen_best = self.__history.getGenealogy(best) graph = networkx.DiGraph(gen_best) graph = graph.reverse() pos = networkx.nx_agraph.graphviz_layout( @@ -332,7 +342,7 @@ def __plot_genealogy(self, best): # Retrieve individual strings for graph node labels labels = gen_best.copy() for key in labels.keys(): - labels[key] = str(self.history.genealogy_history[key]) + labels[key] = str(self.__history.genealogy_history[key]) plt.figure() networkx.draw_networkx(graph, pos=pos) label_options = {"ec": "k", "fc": "lightblue", "alpha": 1.0} @@ -343,33 +353,46 @@ def __plot_genealogy(self, best): # Save genealogy to file # networkx.nx_agraph.write_dot(graph, "genealogy.dot") + def __get_remote(self, f): + return (ray.remote(f)).remote + def __register_fitness_func(self): - store = self.data_store + store = self.__data_store args_train = store["common"] | store["train"] - self.toolbox.register("evaluate_train", self.fitness, **args_train) + self.__toolbox.register( + "evaluate_train", self.__get_remote(self.fitness), **args_train + ) def __register_val_funcs(self): """Register the functions needed for validation, i.e. the error metric and the fitness function. Must be called after storing the datasets in the common obj space. """ - store = self.data_store + store = self.__data_store args_val = store["common"] | store["val"] - self.toolbox.register("evaluate_val_fit", self.fitness, **args_val) - self.toolbox.register("evaluate_val_MSE", self.error_metric, **args_val) + self.__toolbox.register( + "evaluate_val_fit", self.__get_remote(self.fitness), **args_val + ) + self.__toolbox.register( + "evaluate_val_MSE", self.__get_remote(self.error_metric), **args_val + ) def __register_score_func(self): - store = self.data_store + store = self.__data_store args_score_func = store["common"] | store["test"] - self.toolbox.register( - "evaluate_test_score", self.error_metric, **args_score_func + self.__toolbox.register( + "evaluate_test_score", + self.__get_remote(self.error_metric), + **args_score_func, ) def __register_predict_func(self): - store = self.data_store + store = self.__data_store args_predict_func = store["common"] | store["test"] - self.toolbox.register( - "evaluate_test_sols", self.predict_func, **args_predict_func + self.__toolbox.register( + "evaluate_test_sols", + self.__get_remote(self.predict_func), + **args_predict_func, ) def __register_map(self): @@ -381,14 +404,16 @@ def mapper(f, individuals, toolbox_ref): fitnesses = list(chain(*ray.get(fitnesses))) return fitnesses - toolbox_ref = ray.put(self.toolbox) - self.toolbox.register("map", mapper, toolbox_ref=toolbox_ref) + toolbox_ref = ray.put(self.__toolbox) + self.__toolbox.register("map", mapper, toolbox_ref=toolbox_ref) + # @_fit_context(prefer_skip_nested_validation=True) def fit(self, X, y=None, X_val=None, y_val=None): """Fits the training data using GP-based symbolic regression.""" + X, y = self._validate_data(X, y, accept_sparse=False) if not hasattr(self, "_is_fitted"): - self.data_store = dict() + self.__data_store = dict() if self.common_data is not None: # FIXME: does everything work when the functions do not have common args? @@ -398,32 +423,32 @@ def fit(self, X, y=None, X_val=None, y_val=None): self.__creator_toolbox_config() # Initialize variables for statistics - self.stats_fit = tools.Statistics(lambda ind: ind.fitness.values) - self.stats_size = tools.Statistics(len) - self.mstats = tools.MultiStatistics( - fitness=self.stats_fit, size=self.stats_size + self.__stats_fit = tools.Statistics(lambda ind: ind.fitness.values) + self.__stats_size = tools.Statistics(len) + self.__mstats = tools.MultiStatistics( + fitness=self.__stats_fit, size=self.__stats_size ) - self.mstats.register("avg", lambda ind: np.around(np.mean(ind), 4)) - self.mstats.register("std", lambda ind: np.around(np.std(ind), 4)) - self.mstats.register("min", lambda ind: np.around(np.min(ind), 4)) - self.mstats.register("max", lambda ind: np.around(np.max(ind), 4)) + self.__mstats.register("avg", lambda ind: np.around(np.mean(ind), 4)) + self.__mstats.register("std", lambda ind: np.around(np.std(ind), 4)) + self.__mstats.register("min", lambda ind: np.around(np.min(ind), 4)) + self.__mstats.register("max", lambda ind: np.around(np.max(ind), 4)) self.__init_logbook() - self.train_fit_history = [] + self.__train_fit_history = [] # Create history object to build the genealogy tree - self.history = tools.History() + self.__history = tools.History() if self.plot_best_genealogy: # Decorators for history - self.toolbox.decorate("mate", self.history.decorator) - self.toolbox.decorate("mutate", self.history.decorator) + self.__toolbox.decorate("mate", self.__history.decorator) + self.__toolbox.decorate("mutate", self.__history.decorator) self.__register_map() - self.plot_initialized = False - self.fig_id = 0 + self.__plot_initialized = False + self.__fig_id = 0 train_data = {"X": X, "y": y} if self.validate and X_val is not None: @@ -439,27 +464,37 @@ def fit(self, X, y=None, X_val=None, y_val=None): self._is_fitted = True return self - def predict(self, X_test): - test_data = {"X": X_test} + def predict(self, X): + check_is_fitted(self) + X = self._validate_data(X, accept_sparse=False, reset=False) + test_data = {"X": X} datasets = {"test": test_data} self.__store_datasets(datasets) - self.__register_predict_func() - u_best = self.toolbox.map(self.toolbox.evaluate_test_sols, (self.best,))[0] + if not hasattr(self, "_predict_func_registered"): + self.__register_predict_func() + self._predict_func_registered = True + u_best = self.__toolbox.map(self.__toolbox.evaluate_test_sols, (self.__best,))[ + 0 + ] return u_best def score(self, X, y): """Computes the error metric (passed to the `GPSymbolicRegressor` constructor) on a given dataset. """ + check_is_fitted(self) + X, y = self._validate_data(X, y, accept_sparse=False, reset=False) test_data = {"X": X, "y": y} datasets = {"test": test_data} self.__store_datasets(datasets) self.__register_score_func() - score = self.toolbox.map(self.toolbox.evaluate_test_score, (self.best,))[0] + score = self.__toolbox.map(self.__toolbox.evaluate_test_score, (self.__best,))[ + 0 + ] return score def __immigration(self, pop, num_immigrants: int): - immigrants = self.toolbox.population(n=num_immigrants) + immigrants = self.__toolbox.population(n=num_immigrants) for i in range(num_immigrants): idx_individual_to_replace = random.randint(0, self.NINDIVIDUALS - 1) pop[idx_individual_to_replace] = immigrants[i] @@ -486,11 +521,11 @@ def __local_search( for i in range(self.num_islands): # select N best individuals for refinement - sel_individuals = tools.selBest(self.pop[i], k=n_inds_to_refine) + sel_individuals = tools.selBest(self.__pop[i], k=n_inds_to_refine) # store indices of best individuals in the population idx_ind = [ - self.pop[i].index(sel_individuals[j]) for j in range(n_inds_to_refine) + self.__pop[i].index(sel_individuals[j]) for j in range(n_inds_to_refine) ] # initialize best-so-far individuals and fitnesses with the @@ -498,16 +533,16 @@ def __local_search( best_so_far_fits = [ sel_individuals[j].fitness.values[0] for j in range(n_inds_to_refine) ] - best_so_far_inds = self.toolbox.clone(sel_individuals) + best_so_far_inds = self.__toolbox.clone(sel_individuals) for _ in range(n_iter): - mutants = self.toolbox.clone(best_so_far_inds) + mutants = self.__toolbox.clone(best_so_far_inds) # generate mutations for each of the best individuals mut_ind = [ [ gp.mixedMutate( mutants[j], - self.toolbox.expr_mut, + self.__toolbox.expr_mut, self.pset, [0.4, 0.3, 0.3], )[0] @@ -517,8 +552,8 @@ def __local_search( ] for j in range(n_inds_to_refine): # evaluate fitnesses of mutated individuals - fitness_mutated_inds = self.toolbox.map( - self.toolbox.evaluate_train, mut_ind[j] + fitness_mutated_inds = self.__toolbox.map( + self.__toolbox.evaluate_train, mut_ind[j] ) # assign fitnesses to mutated individuals @@ -535,7 +570,7 @@ def __local_search( # replace individuals with refined ones (if improved) for j in range(n_inds_to_refine): - self.pop[i][idx_ind[j]] = best_so_far_inds[j] + self.__pop[i][idx_ind[j]] = best_so_far_inds[j] def __evolve_islands(self, cgen: int): num_evals = 0 @@ -548,19 +583,19 @@ def __evolve_islands(self, cgen: int): if self.immigration_enabled: if cgen % self.immigration_freq == 0: self.__immigration( - self.pop[i], int(self.immigration_frac * self.NINDIVIDUALS) + self.__pop[i], int(self.immigration_frac * self.NINDIVIDUALS) ) # Select the parents for the offspring offsprings[i] = list( - map(self.toolbox.clone, self.toolbox.select(self.pop[i])) + map(self.__toolbox.clone, self.__toolbox.select(self.__pop[i])) ) # Apply crossover and mutation to the offspring with elitism elite_inds[i] = tools.selBest(offsprings[i], self.n_elitist) offsprings[i] = elite_inds[i] + algorithms.varOr( offsprings[i], - self.toolbox, + self.__toolbox, self.NINDIVIDUALS - self.n_elitist, self.crossover_prob, self.MUTPB, @@ -574,8 +609,8 @@ def __evolve_islands(self, cgen: int): if self.preprocess_func is not None: self.preprocess_func(invalid_inds[i]) - fitnesses = self.toolbox.map( - self.toolbox.evaluate_train, self.__flatten_list(invalid_inds) + fitnesses = self.__toolbox.map( + self.__toolbox.evaluate_train, self.__flatten_list(invalid_inds) ) fitnesses = self.__unflatten_list(fitnesses, [len(i) for i in invalid_inds]) @@ -589,17 +624,17 @@ def __evolve_islands(self, cgen: int): # survival selection if not self.overlapping_generation: # The population is entirely replaced by the offspring - self.pop[i][:] = offsprings[i] + self.__pop[i][:] = offsprings[i] else: # parents and offspring compete for survival (truncation selection) - self.pop[i] = tools.selBest( - self.pop[i] + offsprings[i], self.NINDIVIDUALS + self.__pop[i] = tools.selBest( + self.__pop[i] + offsprings[i], self.NINDIVIDUALS ) # migrations among islands if cgen % self.mig_frac == 0 and self.num_islands > 1: migRing( - self.pop, + self.__pop, int(self.mig_frac * self.NINDIVIDUALS), selection=random.sample, ) @@ -613,20 +648,20 @@ def __run(self): # Generate initial population print("Generating initial population(s)...", flush=True) - self.pop = [None] * self.num_islands + self.__pop = [None] * self.num_islands for i in range(self.num_islands): - self.pop[i] = self.toolbox.population(n=self.NINDIVIDUALS) + self.__pop[i] = self.__toolbox.population(n=self.NINDIVIDUALS) print("DONE.", flush=True) if self.plot_best_genealogy: # Populate the history and the Hall Of Fame of the first island - self.history.update(self.pop[0]) + self.__history.update(self.__pop[0]) # Seeds the first island with individuals if self.seed is not None: print("Seeding population with individuals...", flush=True) - self.pop[0][: len(self.seed)] = self.seed + self.__pop[0][: len(self.seed)] = self.seed print(" -= START OF EVOLUTION =- ", flush=True) @@ -634,15 +669,15 @@ def __run(self): print("Evaluating initial population(s)...", flush=True) if self.preprocess_func is not None: - self.preprocess_func(self.pop) + self.preprocess_func(self.__pop) for i in range(self.num_islands): - fitnesses = self.toolbox.map(self.toolbox.evaluate_train, self.pop[i]) + fitnesses = self.__toolbox.map(self.__toolbox.evaluate_train, self.__pop[i]) if self.callback_func is not None: - self.callback_func(self.pop[i], fitnesses) + self.callback_func(self.__pop[i], fitnesses) else: - for ind, fit in zip(self.pop[i], fitnesses): + for ind, fit in zip(self.__pop[i], fitnesses): ind.fitness.values = fit if self.validate: @@ -651,18 +686,18 @@ def __run(self): print("DONE.", flush=True) for gen in range(self.NGEN): - self.cgen = gen + 1 + self.__cgen = gen + 1 - num_evals = self.__evolve_islands(self.cgen) + num_evals = self.__evolve_islands(self.__cgen) # select the best individuals in the current population # (including all islands) best_inds = tools.selBest( - self.__flatten_list(self.pop), k=self.num_best_inds_str + self.__flatten_list(self.__pop), k=self.num_best_inds_str ) # compute and print population statistics (including all islands) - self.__stats(self.__flatten_list(self.pop), self.cgen, num_evals) + self.__stats(self.__flatten_list(self.__pop), self.__cgen, num_evals) if self.print_log: print("Best individuals of this generation:", flush=True) @@ -670,48 +705,48 @@ def __run(self): print(str(best_inds[i])) # Update history of best fitness and best validation error - self.train_fit_history = self.logbook.chapters["fitness"].select("min") + self.__train_fit_history = self.__logbook.chapters["fitness"].select("min") if self.validate: - self.val_fit_history = self.logbook.chapters["valid"].select( + self.val_fit_history = self.__logbook.chapters["valid"].select( "valid_fit" ) - self.val_fit_history = self.logbook.chapters["valid"].select( + self.val_fit_history = self.__logbook.chapters["valid"].select( "valid_fit" ) self.min_valerr = min(self.val_fit_history) if self.plot_history and ( - self.cgen % self.plot_freq == 0 or self.cgen == 1 + self.__cgen % self.plot_freq == 0 or self.__cgen == 1 ): self.__plot_history() if ( self.plot_best - and (self.toolbox.plot_best_func is not None) + and (self.__toolbox.plot_best_func is not None) and ( - self.cgen % self.plot_freq == 0 - or self.cgen == 1 - or self.cgen == self.NGEN + self.__cgen % self.plot_freq == 0 + or self.__cgen == 1 + or self.__cgen == self.NGEN ) ): - self.toolbox.plot_best_func(best_inds[0]) + self.__toolbox.plot_best_func(best_inds[0]) - self.best = best_inds[0] - if self.best.fitness.values[0] <= 1e-15: + self.__best = best_inds[0] + if self.__best.fitness.values[0] <= 1e-15: print("EARLY STOPPING.") break - self.plot_initialized = False + self.__plot_initialized = False print(" -= END OF EVOLUTION =- ", flush=True) - print(f"The best individual is {self.best}", flush=True) - print(f"The best fitness on the training set is {self.train_fit_history[-1]}") + print(f"The best individual is {self.__best}", flush=True) + print(f"The best fitness on the training set is {self.__train_fit_history[-1]}") if self.validate: print(f"The best fitness on the validation set is {self.min_valerr}") if self.plot_best_genealogy: - self.__plot_genealogy(self.best) + self.__plot_genealogy(self.__best) if self.plot_best_individual_tree: self.__plot_best_individual_tree() @@ -728,7 +763,7 @@ def __run(self): def __plot_best_individual_tree(self): """Plots the tree of the best individual at the end of the evolution.""" - nodes, edges, labels = gp.graph(self.best) + nodes, edges, labels = gp.graph(self.__best) graph = nx.Graph() graph.add_nodes_from(nodes) graph.add_edges_from(edges) @@ -743,11 +778,11 @@ def __plot_best_individual_tree(self): def __save_best_individual(self, output_path: str): """Saves the string of the best individual of the population in a .txt file.""" file = open(join(output_path, "best_ind.txt"), "w") - file.write(str(self.best)) + file.write(str(self.__best)) file.close() def __save_train_fit_history(self, output_path: str): - np.save(join(output_path, "train_fit_history.npy"), self.train_fit_history) + np.save(join(output_path, "train_fit_history.npy"), self.__train_fit_history) if self.validate: np.save(join(output_path, "val_fit_history.npy"), self.val_fit_history) diff --git a/src/alpine/gp/util.py b/src/alpine/gp/util.py index b59f3b1..53ee32c 100644 --- a/src/alpine/gp/util.py +++ b/src/alpine/gp/util.py @@ -1,7 +1,6 @@ import yaml from .primitives import add_primitives_to_pset from importlib import import_module -import ray def add_primitives_to_pset_from_dict(pset, primitives_dict): @@ -94,16 +93,19 @@ def detect_nested_trigonometric_functions(equation): return nested -@ray.remote def dummy_fitness(individuals_str, toolbox, X, y): fitnesses = [(0.0,)] * len(individuals_str) return fitnesses -@ray.remote def dummy_score(individuals_str, toolbox, X, y): MSE = [0.0] * len(individuals_str) return MSE + + +def dummy_predict(individuals_str, toolbox, X): + pred = [0.0] * len(individuals_str) + return pred diff --git a/tests/test_regressor.py b/tests/test_regressor.py index 98f4cdd..c9de6c0 100644 --- a/tests/test_regressor.py +++ b/tests/test_regressor.py @@ -4,7 +4,7 @@ from deap import gp from sklearn.datasets import make_regression from sklearn.model_selection import train_test_split, GridSearchCV -from alpine.gp.util import dummy_fitness, dummy_score +from alpine.gp.util import dummy_fitness, dummy_score, dummy_predict def test_regressor(): @@ -37,9 +37,9 @@ def test_regressor(): gpsr = GPSymbolicRegressor( pset=pset, - fitness=dummy_fitness.remote, - error_metric=dummy_score.remote, - predict_func=None, + fitness=dummy_fitness, + error_metric=dummy_score, + predict_func=dummy_predict, common_data=common_data, NINDIVIDUALS=100, num_islands=10, From 0ea6cf4adced579a99bc57dbca7df4f29409a14c Mon Sep 17 00:00:00 2001 From: Alessandro Lucantonio Date: Mon, 17 Mar 2025 16:40:16 +0100 Subject: [PATCH 15/23] [WIP] Still problems with toolbox and map. --- bench/bench.py | 2 +- examples/simple_sr.py | 2 +- examples/simple_sr_noyaml.py | 2 +- src/alpine/gp/regressor.py | 341 ++++++++++++++++++----------------- src/alpine/gp/util.py | 11 ++ tests/test_basic_sr.py | 2 +- tests/test_poisson1d.py | 2 +- tests/test_regressor.py | 2 +- 8 files changed, 192 insertions(+), 172 deletions(-) diff --git a/bench/bench.py b/bench/bench.py index 7432b1d..74a040f 100644 --- a/bench/bench.py +++ b/bench/bench.py @@ -237,7 +237,7 @@ def eval(problem, cfgfile, seed=42): common_params = {"penalty": penalty, "fitness_scale": fitness_scale} gpsr = gps.GPSymbolicRegressor( - pset=pset, + pset_config=pset, fitness=compute_attributes.remote, predict_func=predict.remote, error_metric=compute_MSEs.remote, diff --git a/examples/simple_sr.py b/examples/simple_sr.py index ecfc338..320ba42 100644 --- a/examples/simple_sr.py +++ b/examples/simple_sr.py @@ -120,7 +120,7 @@ def main(): common_data = {"penalty": penalty} gpsr = GPSymbolicRegressor( - pset=pset, + pset_config=pset, fitness=fitness.remote, error_metric=score.remote, predict_func=predict.remote, diff --git a/examples/simple_sr_noyaml.py b/examples/simple_sr_noyaml.py index 32f8460..cc0462b 100644 --- a/examples/simple_sr_noyaml.py +++ b/examples/simple_sr_noyaml.py @@ -127,7 +127,7 @@ def main(): common_data = {"penalty": penalty} gpsr = GPSymbolicRegressor( - pset=pset, + pset_config=pset, fitness=fitness.remote, error_metric=score.remote, predict_func=predict.remote, diff --git a/src/alpine/gp/regressor.py b/src/alpine/gp/regressor.py index 9f460c1..21a6fd3 100644 --- a/src/alpine/gp/regressor.py +++ b/src/alpine/gp/regressor.py @@ -10,14 +10,10 @@ import os import ray import random -from itertools import chain +from alpine.gp.util import mapper, add_primitives_to_pset_from_dict from sklearn.base import BaseEstimator, RegressorMixin, _fit_context -from sklearn.utils.validation import ( - check_is_fitted, - validate_data, - check_array, - check_X_y, -) +from sklearn.utils.validation import check_is_fitted + # reducing the number of threads launched by fitness evaluations os.environ["MKL_NUM_THREADS"] = "1" @@ -67,12 +63,12 @@ class GPSymbolicRegressor(RegressorMixin, BaseEstimator): def __init__( self, - pset: gp.PrimitiveSet | gp.PrimitiveSetTyped, + pset_config: gp.PrimitiveSet | gp.PrimitiveSetTyped, fitness: Callable, select_fun: str = "tools.selection.tournament_with_elitism", select_args: str = "{'num_elitist': self.n_elitist, 'tournsize': 3, 'stochastic_tourn': { 'enabled': False, 'prob': [0.8, 0.2] }}", # noqa: E501 mut_fun: str = "gp.mutUniform", - mut_args: str = "{'expr': self._GPSymbolicRegressor__toolbox.expr_mut, 'pset': self.pset}", + mut_args: str = "{'expr': toolbox.expr_mut, 'pset': pset}", expr_mut_fun: str = "gp.genHalfAndHalf", expr_mut_args: str = "{'min_': 1, 'max_': 3}", crossover_fun: str = "gp.cxOnePoint", @@ -111,7 +107,7 @@ def __init__( batch_size=1, ): super().__init__() - self.pset = pset + self.pset_config = pset_config self.fitness = fitness self.error_metric = error_metric @@ -170,45 +166,68 @@ def n_elitist(self): def get_params(self, deep=True): return self.__dict__ - def __creator_toolbox_config(self): + def __pset_config(self): + pset = gp.PrimitiveSetTyped( + "MAIN", + [ + float, + ], + float, + ) + pset.renameArguments(ARG0="x") + primitives = { + "imports": {"alpine.gp.numpy_primitives": ["numpy_primitives"]}, + "used": [ + {"name": "add", "dimension": None, "rank": None}, + {"name": "sub", "dimension": None, "rank": None}, + {"name": "mul", "dimension": None, "rank": None}, + {"name": "div", "dimension": None, "rank": None}, + {"name": "sin", "dimension": None, "rank": None}, + {"name": "cos", "dimension": None, "rank": None}, + {"name": "exp", "dimension": None, "rank": None}, + {"name": "log", "dimension": None, "rank": None}, + ], + } + + pset = add_primitives_to_pset_from_dict(pset, primitives) + return pset + + def __creator_toolbox_pset_config(self): """Initialize toolbox and individual creator based on config file.""" - self.__toolbox = base.Toolbox() + pset = self.__pset_config() + toolbox = base.Toolbox() # SELECTION - self.__toolbox.register( - "select", eval(self.select_fun), **eval(self.select_args) - ) + toolbox.register("select", eval(self.select_fun), **eval(self.select_args)) # MUTATION - self.__toolbox.register( + toolbox.register( "expr_mut", eval(self.expr_mut_fun), **eval(self.expr_mut_args) ) - self.__toolbox.register("mutate", eval(self.mut_fun), **eval(self.mut_args)) + toolbox.register("mutate", eval(self.mut_fun), **eval(self.mut_args)) # CROSSOVER - self.__toolbox.register( - "mate", eval(self.crossover_fun), **eval(self.crossover_args) - ) - self.__toolbox.decorate( + toolbox.register("mate", eval(self.crossover_fun), **eval(self.crossover_args)) + toolbox.decorate( "mate", gp.staticLimit(key=operator.attrgetter("height"), max_value=17) ) - self.__toolbox.decorate( + toolbox.decorate( "mutate", gp.staticLimit(key=operator.attrgetter("height"), max_value=17) ) # INDIVIDUAL GENERATOR/CREATOR - self.__toolbox.register( + toolbox.register( "expr", gp.genHalfAndHalf, - pset=self.pset, + pset=pset, min_=self.min_height, max_=self.max_height, ) - self.__toolbox.register( + toolbox.register( "expr_pop", gp.genHalfAndHalf, - pset=self.pset, + pset=pset, min_=self.min_height, max_=self.max_height, is_pop=True, @@ -216,21 +235,20 @@ def __creator_toolbox_config(self): creator.create("FitnessMin", base.Fitness, weights=(-1.0,)) creator.create("Individual", gp.PrimitiveTree, fitness=creator.FitnessMin) createIndividual = creator.Individual - self.__toolbox.register( - "individual", tools.initIterate, createIndividual, self.__toolbox.expr + toolbox.register( + "individual", tools.initIterate, createIndividual, toolbox.expr ) - self.__toolbox.register( - "population", tools.initRepeat, list, self.__toolbox.individual - ) - self.__toolbox.register("compile", gp.compile, pset=self.pset) + toolbox.register("population", tools.initRepeat, list, toolbox.individual) + toolbox.register("compile", gp.compile, pset=pset) self.__createIndividual = createIndividual if self.seed is not None: self.seed = [ - self.__createIndividual.from_string(i, self.pset) for i in self.seed + self.__createIndividual.from_string(i, pset) for i in self.seed ] + return toolbox, pset def __store_fit_error_common_args(self, data: Dict): """Store names and values of the arguments that are in common between @@ -273,12 +291,12 @@ def __init_logbook(self): self.__logbook.chapters["fitness"].header = "min", "avg", "max", "std" self.__logbook.chapters["size"].header = "min", "avg", "max", "std" - def __compute_valid_stats(self, pop): + def __compute_valid_stats(self, pop, toolbox): best = tools.selBest(pop, k=1) # FIXME: ugly way of handling lists/tuples; assume eval_val_MSE returns a # single-valued tuple as eval_val_fit - valid_fit = self.__toolbox.map(self.__toolbox.evaluate_val_fit, best)[0][0] - valid_err = self.__toolbox.map(self.__toolbox.evaluate_val_MSE, best)[0] + valid_fit = toolbox.map(toolbox.evaluate_val_fit, best)[0][0] + valid_err = toolbox.map(toolbox.evaluate_val_MSE, best)[0] return valid_fit, valid_err @@ -356,99 +374,92 @@ def __plot_genealogy(self, best): def __get_remote(self, f): return (ray.remote(f)).remote - def __register_fitness_func(self): + def __register_fitness_func(self, toolbox): store = self.__data_store args_train = store["common"] | store["train"] - self.__toolbox.register( + toolbox.register( "evaluate_train", self.__get_remote(self.fitness), **args_train ) - def __register_val_funcs(self): + def __register_val_funcs(self, toolbox): """Register the functions needed for validation, i.e. the error metric and the fitness function. Must be called after storing the datasets in the common obj space. """ store = self.__data_store args_val = store["common"] | store["val"] - self.__toolbox.register( + toolbox.register( "evaluate_val_fit", self.__get_remote(self.fitness), **args_val ) - self.__toolbox.register( + toolbox.register( "evaluate_val_MSE", self.__get_remote(self.error_metric), **args_val ) - def __register_score_func(self): + def __register_score_func(self, toolbox): store = self.__data_store args_score_func = store["common"] | store["test"] - self.__toolbox.register( + toolbox.register( "evaluate_test_score", self.__get_remote(self.error_metric), **args_score_func, ) - def __register_predict_func(self): + def __register_predict_func(self, toolbox): store = self.__data_store args_predict_func = store["common"] | store["test"] - self.__toolbox.register( + toolbox.register( "evaluate_test_sols", self.__get_remote(self.predict_func), **args_predict_func, ) - def __register_map(self): - def mapper(f, individuals, toolbox_ref): - fitnesses = [] * len(individuals) - for i in range(0, len(individuals), self.batch_size): - individuals_batch = individuals[i : i + self.batch_size] - fitnesses.append(f(individuals_batch, toolbox_ref)) - fitnesses = list(chain(*ray.get(fitnesses))) - return fitnesses - - toolbox_ref = ray.put(self.__toolbox) - self.__toolbox.register("map", mapper, toolbox_ref=toolbox_ref) + def __register_map(self, toolbox): + toolbox_ref = ray.put(toolbox) + toolbox.register( + "map", mapper, toolbox_ref=toolbox_ref, batch_size=self.batch_size + ) # @_fit_context(prefer_skip_nested_validation=True) def fit(self, X, y=None, X_val=None, y_val=None): """Fits the training data using GP-based symbolic regression.""" X, y = self._validate_data(X, y, accept_sparse=False) - if not hasattr(self, "_is_fitted"): - self.__data_store = dict() + # config individual creator and toolbox + toolbox, pset = self.__creator_toolbox_pset_config() - if self.common_data is not None: - # FIXME: does everything work when the functions do not have common args? - self.__store_fit_error_common_args(self.common_data) + self.__data_store = dict() - # config individual creator and toolbox - self.__creator_toolbox_config() + if self.common_data is not None: + # FIXME: does everything work when the functions do not have common args? + self.__store_fit_error_common_args(self.common_data) - # Initialize variables for statistics - self.__stats_fit = tools.Statistics(lambda ind: ind.fitness.values) - self.__stats_size = tools.Statistics(len) - self.__mstats = tools.MultiStatistics( - fitness=self.__stats_fit, size=self.__stats_size - ) - self.__mstats.register("avg", lambda ind: np.around(np.mean(ind), 4)) - self.__mstats.register("std", lambda ind: np.around(np.std(ind), 4)) - self.__mstats.register("min", lambda ind: np.around(np.min(ind), 4)) - self.__mstats.register("max", lambda ind: np.around(np.max(ind), 4)) + # Initialize variables for statistics + self.__stats_fit = tools.Statistics(lambda ind: ind.fitness.values) + self.__stats_size = tools.Statistics(len) + self.__mstats = tools.MultiStatistics( + fitness=self.__stats_fit, size=self.__stats_size + ) + self.__mstats.register("avg", lambda ind: np.around(np.mean(ind), 4)) + self.__mstats.register("std", lambda ind: np.around(np.std(ind), 4)) + self.__mstats.register("min", lambda ind: np.around(np.min(ind), 4)) + self.__mstats.register("max", lambda ind: np.around(np.max(ind), 4)) - self.__init_logbook() + self.__init_logbook() - self.__train_fit_history = [] + self.__train_fit_history = [] - # Create history object to build the genealogy tree - self.__history = tools.History() + # Create history object to build the genealogy tree + self.__history = tools.History() - if self.plot_best_genealogy: - # Decorators for history - self.__toolbox.decorate("mate", self.__history.decorator) - self.__toolbox.decorate("mutate", self.__history.decorator) + if self.plot_best_genealogy: + # Decorators for history + toolbox.decorate("mate", self.__history.decorator) + toolbox.decorate("mutate", self.__history.decorator) - self.__register_map() + self.__register_map(toolbox) - self.__plot_initialized = False - self.__fig_id = 0 + self.__plot_initialized = False + self.__fig_id = 0 train_data = {"X": X, "y": y} if self.validate and X_val is not None: @@ -457,25 +468,25 @@ def fit(self, X, y=None, X_val=None, y_val=None): else: datasets = {"train": train_data} self.__store_datasets(datasets) - self.__register_fitness_func() + self.__register_fitness_func(toolbox) if self.validate and self.error_metric is not None: - self.__register_val_funcs() - self.__run() + self.__register_val_funcs(toolbox) + self.__run(toolbox) self._is_fitted = True return self def predict(self, X): check_is_fitted(self) + toolbox, pset = self.__creator_toolbox_pset_config() X = self._validate_data(X, accept_sparse=False, reset=False) test_data = {"X": X} datasets = {"test": test_data} self.__store_datasets(datasets) if not hasattr(self, "_predict_func_registered"): - self.__register_predict_func() + self.__register_predict_func(toolbox) self._predict_func_registered = True - u_best = self.__toolbox.map(self.__toolbox.evaluate_test_sols, (self.__best,))[ - 0 - ] + u_best = toolbox.map(toolbox.evaluate_test_sols, (self.__best,))[0] + # u_best = toolbox.map(toolbox.evaluate_test_sols, (self.__best,)) return u_best def score(self, X, y): @@ -483,18 +494,18 @@ def score(self, X, y): on a given dataset. """ check_is_fitted(self) + toolbox, pset = self.__creator_toolbox_pset_config() X, y = self._validate_data(X, y, accept_sparse=False, reset=False) test_data = {"X": X, "y": y} datasets = {"test": test_data} self.__store_datasets(datasets) - self.__register_score_func() - score = self.__toolbox.map(self.__toolbox.evaluate_test_score, (self.__best,))[ - 0 - ] + self.__register_score_func(toolbox) + score = toolbox.map(toolbox.evaluate_test_score, (self.__best,))[0] + # score = toolbox.map(toolbox.evaluate_test_score, (self.__best,)) return score - def __immigration(self, pop, num_immigrants: int): - immigrants = self.__toolbox.population(n=num_immigrants) + def __immigration(self, pop, num_immigrants: int, toolbox): + immigrants = toolbox.population(n=num_immigrants) for i in range(num_immigrants): idx_individual_to_replace = random.randint(0, self.NINDIVIDUALS - 1) pop[idx_individual_to_replace] = immigrants[i] @@ -515,64 +526,64 @@ def __unflatten_list(self, flat_lst, lengths): start = end # Update the start index for the next sublist return result - def __local_search( - self, n_iter: int = 1, n_mutations: int = 500, n_inds_to_refine: int = 10 - ): - - for i in range(self.num_islands): - # select N best individuals for refinement - sel_individuals = tools.selBest(self.__pop[i], k=n_inds_to_refine) - - # store indices of best individuals in the population - idx_ind = [ - self.__pop[i].index(sel_individuals[j]) for j in range(n_inds_to_refine) - ] - - # initialize best-so-far individuals and fitnesses with the - # current individuals - best_so_far_fits = [ - sel_individuals[j].fitness.values[0] for j in range(n_inds_to_refine) - ] - best_so_far_inds = self.__toolbox.clone(sel_individuals) - - for _ in range(n_iter): - mutants = self.__toolbox.clone(best_so_far_inds) - # generate mutations for each of the best individuals - mut_ind = [ - [ - gp.mixedMutate( - mutants[j], - self.__toolbox.expr_mut, - self.pset, - [0.4, 0.3, 0.3], - )[0] - for _ in range(n_mutations) - ] - for j in range(n_inds_to_refine) - ] - for j in range(n_inds_to_refine): - # evaluate fitnesses of mutated individuals - fitness_mutated_inds = self.__toolbox.map( - self.__toolbox.evaluate_train, mut_ind[j] - ) - - # assign fitnesses to mutated individuals - for ind, fit in zip(mut_ind[j], fitness_mutated_inds): - ind.fitness.values = fit - - # select best mutation - best_mutation = tools.selBest(mut_ind[j], k=1)[0] - - if best_mutation.fitness.values[0] < best_so_far_fits[j]: - print("Found better individual in tabu search") - best_so_far_inds[j] = best_mutation - best_so_far_fits[j] = best_mutation.fitness.values[0] - - # replace individuals with refined ones (if improved) - for j in range(n_inds_to_refine): - self.__pop[i][idx_ind[j]] = best_so_far_inds[j] - - def __evolve_islands(self, cgen: int): + # def __local_search( + # self, n_iter: int = 1, n_mutations: int = 500, n_inds_to_refine: int = 10 + # ): + + # for i in range(self.num_islands): + # # select N best individuals for refinement + # sel_individuals = tools.selBest(self.__pop[i], k=n_inds_to_refine) + + # # store indices of best individuals in the population + # idx_ind = [ + # self.__pop[i].index(sel_individuals[j]) for j in range(n_inds_to_refine) + # ] + + # # initialize best-so-far individuals and fitnesses with the + # # current individuals + # best_so_far_fits = [ + # sel_individuals[j].fitness.values[0] for j in range(n_inds_to_refine) + # ] + # best_so_far_inds = self.__toolbox.clone(sel_individuals) + + # for _ in range(n_iter): + # mutants = self.__toolbox.clone(best_so_far_inds) + # # generate mutations for each of the best individuals + # mut_ind = [ + # [ + # gp.mixedMutate( + # mutants[j], + # self.__toolbox.expr_mut, + # self.__pset, + # [0.4, 0.3, 0.3], + # )[0] + # for _ in range(n_mutations) + # ] + # for j in range(n_inds_to_refine) + # ] + # for j in range(n_inds_to_refine): + # # evaluate fitnesses of mutated individuals + # fitness_mutated_inds = self.__toolbox.map( + # self.__toolbox.evaluate_train, mut_ind[j] + # ) + + # # assign fitnesses to mutated individuals + # for ind, fit in zip(mut_ind[j], fitness_mutated_inds): + # ind.fitness.values = fit + + # # select best mutation + # best_mutation = tools.selBest(mut_ind[j], k=1)[0] + + # if best_mutation.fitness.values[0] < best_so_far_fits[j]: + # print("Found better individual in tabu search") + # best_so_far_inds[j] = best_mutation + # best_so_far_fits[j] = best_mutation.fitness.values[0] + + # # replace individuals with refined ones (if improved) + # for j in range(n_inds_to_refine): + # self.__pop[i][idx_ind[j]] = best_so_far_inds[j] + + def __evolve_islands(self, cgen: int, toolbox): num_evals = 0 invalid_inds = [None] * self.num_islands @@ -587,15 +598,13 @@ def __evolve_islands(self, cgen: int): ) # Select the parents for the offspring - offsprings[i] = list( - map(self.__toolbox.clone, self.__toolbox.select(self.__pop[i])) - ) + offsprings[i] = list(map(toolbox.clone, toolbox.select(self.__pop[i]))) # Apply crossover and mutation to the offspring with elitism elite_inds[i] = tools.selBest(offsprings[i], self.n_elitist) offsprings[i] = elite_inds[i] + algorithms.varOr( offsprings[i], - self.__toolbox, + toolbox, self.NINDIVIDUALS - self.n_elitist, self.crossover_prob, self.MUTPB, @@ -609,8 +618,8 @@ def __evolve_islands(self, cgen: int): if self.preprocess_func is not None: self.preprocess_func(invalid_inds[i]) - fitnesses = self.__toolbox.map( - self.__toolbox.evaluate_train, self.__flatten_list(invalid_inds) + fitnesses = toolbox.map( + toolbox.evaluate_train, self.__flatten_list(invalid_inds) ) fitnesses = self.__unflatten_list(fitnesses, [len(i) for i in invalid_inds]) @@ -643,14 +652,14 @@ def __evolve_islands(self, cgen: int): return num_evals - def __run(self): + def __run(self, toolbox): """Runs symbolic regression.""" # Generate initial population print("Generating initial population(s)...", flush=True) self.__pop = [None] * self.num_islands for i in range(self.num_islands): - self.__pop[i] = self.__toolbox.population(n=self.NINDIVIDUALS) + self.__pop[i] = toolbox.population(n=self.NINDIVIDUALS) print("DONE.", flush=True) @@ -672,7 +681,7 @@ def __run(self): self.preprocess_func(self.__pop) for i in range(self.num_islands): - fitnesses = self.__toolbox.map(self.__toolbox.evaluate_train, self.__pop[i]) + fitnesses = toolbox.map(toolbox.evaluate_train, self.__pop[i]) if self.callback_func is not None: self.callback_func(self.__pop[i], fitnesses) @@ -688,7 +697,7 @@ def __run(self): for gen in range(self.NGEN): self.__cgen = gen + 1 - num_evals = self.__evolve_islands(self.__cgen) + num_evals = self.__evolve_islands(self.__cgen, toolbox) # select the best individuals in the current population # (including all islands) @@ -729,7 +738,7 @@ def __run(self): or self.__cgen == self.NGEN ) ): - self.__toolbox.plot_best_func(best_inds[0]) + toolbox.plot_best_func(best_inds[0]) self.__best = best_inds[0] if self.__best.fitness.values[0] <= 1e-15: diff --git a/src/alpine/gp/util.py b/src/alpine/gp/util.py index 53ee32c..e90159c 100644 --- a/src/alpine/gp/util.py +++ b/src/alpine/gp/util.py @@ -1,6 +1,8 @@ import yaml from .primitives import add_primitives_to_pset from importlib import import_module +from itertools import chain +import ray def add_primitives_to_pset_from_dict(pset, primitives_dict): @@ -93,6 +95,15 @@ def detect_nested_trigonometric_functions(equation): return nested +def mapper(f, individuals, toolbox_ref, batch_size): + fitnesses = [] * len(individuals) + for i in range(0, len(individuals), batch_size): + individuals_batch = individuals[i : i + batch_size] + fitnesses.append(f(individuals_batch, toolbox_ref)) + fitnesses = list(chain(*ray.get(fitnesses))) + return fitnesses + + def dummy_fitness(individuals_str, toolbox, X, y): fitnesses = [(0.0,)] * len(individuals_str) diff --git a/tests/test_basic_sr.py b/tests/test_basic_sr.py index 6688a01..f3fe2f6 100644 --- a/tests/test_basic_sr.py +++ b/tests/test_basic_sr.py @@ -102,7 +102,7 @@ def test_basic_sr(set_test_dir): ] gpsr = GPSymbolicRegressor( - pset=pset, + pset_config=pset, fitness=fitness.remote, error_metric=score.remote, predict_func=predict.remote, diff --git a/tests/test_poisson1d.py b/tests/test_poisson1d.py index 433efe8..7bff105 100644 --- a/tests/test_poisson1d.py +++ b/tests/test_poisson1d.py @@ -206,7 +206,7 @@ def test_poisson1d(set_test_dir, yamlfile): common_params = {"S": S, "u_0": u_0, "penalty": penalty} gpsr = gps.GPSymbolicRegressor( - pset=pset, + pset_config=pset, fitness=fitness.remote, error_metric=score.remote, predict_func=predict.remote, diff --git a/tests/test_regressor.py b/tests/test_regressor.py index c9de6c0..e084127 100644 --- a/tests/test_regressor.py +++ b/tests/test_regressor.py @@ -36,7 +36,7 @@ def test_regressor(): common_data = {} gpsr = GPSymbolicRegressor( - pset=pset, + pset_config=pset, fitness=dummy_fitness, error_metric=dummy_score, predict_func=dummy_predict, From d3bcb24c74c2c09913f0d9316f5ad48b36d158e2 Mon Sep 17 00:00:00 2001 From: Alessandro Lucantonio Date: Wed, 19 Mar 2025 08:17:16 +0100 Subject: [PATCH 16/23] [WIP] Trying to rebuild toolbox in score and predict and register functions. --- src/alpine/gp/regressor.py | 66 +++----------------------------------- 1 file changed, 5 insertions(+), 61 deletions(-) diff --git a/src/alpine/gp/regressor.py b/src/alpine/gp/regressor.py index 21a6fd3..06ad7f6 100644 --- a/src/alpine/gp/regressor.py +++ b/src/alpine/gp/regressor.py @@ -11,7 +11,7 @@ import ray import random from alpine.gp.util import mapper, add_primitives_to_pset_from_dict -from sklearn.base import BaseEstimator, RegressorMixin, _fit_context +from sklearn.base import BaseEstimator, RegressorMixin from sklearn.utils.validation import check_is_fitted @@ -473,11 +473,13 @@ def fit(self, X, y=None, X_val=None, y_val=None): self.__register_val_funcs(toolbox) self.__run(toolbox) self._is_fitted = True + self.__toolbox = toolbox return self def predict(self, X): check_is_fitted(self) toolbox, pset = self.__creator_toolbox_pset_config() + self.__register_map(toolbox) X = self._validate_data(X, accept_sparse=False, reset=False) test_data = {"X": X} datasets = {"test": test_data} @@ -486,7 +488,6 @@ def predict(self, X): self.__register_predict_func(toolbox) self._predict_func_registered = True u_best = toolbox.map(toolbox.evaluate_test_sols, (self.__best,))[0] - # u_best = toolbox.map(toolbox.evaluate_test_sols, (self.__best,)) return u_best def score(self, X, y): @@ -495,13 +496,13 @@ def score(self, X, y): """ check_is_fitted(self) toolbox, pset = self.__creator_toolbox_pset_config() + self.__register_map(toolbox) X, y = self._validate_data(X, y, accept_sparse=False, reset=False) test_data = {"X": X, "y": y} datasets = {"test": test_data} self.__store_datasets(datasets) self.__register_score_func(toolbox) score = toolbox.map(toolbox.evaluate_test_score, (self.__best,))[0] - # score = toolbox.map(toolbox.evaluate_test_score, (self.__best,)) return score def __immigration(self, pop, num_immigrants: int, toolbox): @@ -526,63 +527,6 @@ def __unflatten_list(self, flat_lst, lengths): start = end # Update the start index for the next sublist return result - # def __local_search( - # self, n_iter: int = 1, n_mutations: int = 500, n_inds_to_refine: int = 10 - # ): - - # for i in range(self.num_islands): - # # select N best individuals for refinement - # sel_individuals = tools.selBest(self.__pop[i], k=n_inds_to_refine) - - # # store indices of best individuals in the population - # idx_ind = [ - # self.__pop[i].index(sel_individuals[j]) for j in range(n_inds_to_refine) - # ] - - # # initialize best-so-far individuals and fitnesses with the - # # current individuals - # best_so_far_fits = [ - # sel_individuals[j].fitness.values[0] for j in range(n_inds_to_refine) - # ] - # best_so_far_inds = self.__toolbox.clone(sel_individuals) - - # for _ in range(n_iter): - # mutants = self.__toolbox.clone(best_so_far_inds) - # # generate mutations for each of the best individuals - # mut_ind = [ - # [ - # gp.mixedMutate( - # mutants[j], - # self.__toolbox.expr_mut, - # self.__pset, - # [0.4, 0.3, 0.3], - # )[0] - # for _ in range(n_mutations) - # ] - # for j in range(n_inds_to_refine) - # ] - # for j in range(n_inds_to_refine): - # # evaluate fitnesses of mutated individuals - # fitness_mutated_inds = self.__toolbox.map( - # self.__toolbox.evaluate_train, mut_ind[j] - # ) - - # # assign fitnesses to mutated individuals - # for ind, fit in zip(mut_ind[j], fitness_mutated_inds): - # ind.fitness.values = fit - - # # select best mutation - # best_mutation = tools.selBest(mut_ind[j], k=1)[0] - - # if best_mutation.fitness.values[0] < best_so_far_fits[j]: - # print("Found better individual in tabu search") - # best_so_far_inds[j] = best_mutation - # best_so_far_fits[j] = best_mutation.fitness.values[0] - - # # replace individuals with refined ones (if improved) - # for j in range(n_inds_to_refine): - # self.__pop[i][idx_ind[j]] = best_so_far_inds[j] - def __evolve_islands(self, cgen: int, toolbox): num_evals = 0 @@ -731,7 +675,7 @@ def __run(self, toolbox): if ( self.plot_best - and (self.__toolbox.plot_best_func is not None) + and (toolbox.plot_best_func is not None) and ( self.__cgen % self.plot_freq == 0 or self.__cgen == 1 From bc568fbf1d629ffca1d4521d27ac0c261b17e0ec Mon Sep 17 00:00:00 2001 From: Alessandro Lucantonio Date: Wed, 19 Mar 2025 14:35:58 +0100 Subject: [PATCH 17/23] First running estimator check. --- src/alpine/gp/regressor.py | 40 +++++++++++++++++++++----------------- src/alpine/gp/util.py | 23 +++++++++++++++++++++- tests/test_regressor.py | 5 ++++- 3 files changed, 48 insertions(+), 20 deletions(-) diff --git a/src/alpine/gp/regressor.py b/src/alpine/gp/regressor.py index 06ad7f6..8a3da0f 100644 --- a/src/alpine/gp/regressor.py +++ b/src/alpine/gp/regressor.py @@ -10,7 +10,15 @@ import os import ray import random -from alpine.gp.util import mapper, add_primitives_to_pset_from_dict +from alpine.gp.util import ( + mapper, + add_primitives_to_pset_from_dict, + max_func, + min_func, + avg_func, + std_func, + fitness_value, +) from sklearn.base import BaseEstimator, RegressorMixin from sklearn.utils.validation import check_is_fitted @@ -232,8 +240,10 @@ def __creator_toolbox_pset_config(self): max_=self.max_height, is_pop=True, ) - creator.create("FitnessMin", base.Fitness, weights=(-1.0,)) - creator.create("Individual", gp.PrimitiveTree, fitness=creator.FitnessMin) + if not hasattr(creator, "FitnessMin"): + creator.create("FitnessMin", base.Fitness, weights=(-1.0,)) + if not hasattr(creator, "Individual"): + creator.create("Individual", gp.PrimitiveTree, fitness=creator.FitnessMin) createIndividual = creator.Individual toolbox.register( "individual", tools.initIterate, createIndividual, toolbox.expr @@ -242,12 +252,8 @@ def __creator_toolbox_pset_config(self): toolbox.register("population", tools.initRepeat, list, toolbox.individual) toolbox.register("compile", gp.compile, pset=pset) - self.__createIndividual = createIndividual - if self.seed is not None: - self.seed = [ - self.__createIndividual.from_string(i, pset) for i in self.seed - ] + self.seed = [createIndividual.from_string(i, pset) for i in self.seed] return toolbox, pset def __store_fit_error_common_args(self, data: Dict): @@ -434,15 +440,15 @@ def fit(self, X, y=None, X_val=None, y_val=None): self.__store_fit_error_common_args(self.common_data) # Initialize variables for statistics - self.__stats_fit = tools.Statistics(lambda ind: ind.fitness.values) + self.__stats_fit = tools.Statistics(fitness_value) self.__stats_size = tools.Statistics(len) self.__mstats = tools.MultiStatistics( fitness=self.__stats_fit, size=self.__stats_size ) - self.__mstats.register("avg", lambda ind: np.around(np.mean(ind), 4)) - self.__mstats.register("std", lambda ind: np.around(np.std(ind), 4)) - self.__mstats.register("min", lambda ind: np.around(np.min(ind), 4)) - self.__mstats.register("max", lambda ind: np.around(np.max(ind), 4)) + self.__mstats.register("avg", avg_func) + self.__mstats.register("std", std_func) + self.__mstats.register("min", min_func) + self.__mstats.register("max", max_func) self.__init_logbook() @@ -473,7 +479,6 @@ def fit(self, X, y=None, X_val=None, y_val=None): self.__register_val_funcs(toolbox) self.__run(toolbox) self._is_fitted = True - self.__toolbox = toolbox return self def predict(self, X): @@ -484,10 +489,8 @@ def predict(self, X): test_data = {"X": X} datasets = {"test": test_data} self.__store_datasets(datasets) - if not hasattr(self, "_predict_func_registered"): - self.__register_predict_func(toolbox) - self._predict_func_registered = True - u_best = toolbox.map(toolbox.evaluate_test_sols, (self.__best,))[0] + self.__register_predict_func(toolbox) + u_best = toolbox.map(toolbox.evaluate_test_sols, (self.__str_best,))[0] return u_best def score(self, X, y): @@ -685,6 +688,7 @@ def __run(self, toolbox): toolbox.plot_best_func(best_inds[0]) self.__best = best_inds[0] + self.__str_best = str(self.__best) if self.__best.fitness.values[0] <= 1e-15: print("EARLY STOPPING.") break diff --git a/src/alpine/gp/util.py b/src/alpine/gp/util.py index e90159c..8344bcc 100644 --- a/src/alpine/gp/util.py +++ b/src/alpine/gp/util.py @@ -3,6 +3,7 @@ from importlib import import_module from itertools import chain import ray +import numpy as np def add_primitives_to_pset_from_dict(pset, primitives_dict): @@ -118,5 +119,25 @@ def dummy_score(individuals_str, toolbox, X, y): def dummy_predict(individuals_str, toolbox, X): - pred = [0.0] * len(individuals_str) + pred = [np.zeros(len(X))] * len(individuals_str) return pred + + +def fitness_value(ind): + return ind.fitness.values + + +def avg_func(values): + return np.around(np.mean(values), 4) + + +def std_func(values): + return np.around(np.std(values), 4) + + +def min_func(values): + return np.around(np.min(values), 4) + + +def max_func(values): + return np.around(np.max(values), 4) diff --git a/tests/test_regressor.py b/tests/test_regressor.py index e084127..179d02d 100644 --- a/tests/test_regressor.py +++ b/tests/test_regressor.py @@ -54,7 +54,10 @@ def test_regressor(): ) print(gpsr.get_params()) - check_estimator(gpsr) + check_estimator( + gpsr, + expected_failed_checks={"check_regressors_train": "dummy model"}, + ) # # Generate synthetic data # X, y = make_regression(n_samples=100, n_features=10, random_state=42) From 0c518bbfd43b7fb40e147e618d3f79381ae19bdd Mon Sep 17 00:00:00 2001 From: Alessandro Lucantonio Date: Wed, 19 Mar 2025 15:20:32 +0100 Subject: [PATCH 18/23] Working on tests. --- environment.yaml | 3 ++- src/alpine/gp/util.py | 4 ++++ tests/conftest.py | 10 +++++----- tests/test_basic_sr.py | 21 +++++++-------------- tests/test_regressor.py | 9 +++++---- 5 files changed, 23 insertions(+), 24 deletions(-) diff --git a/environment.yaml b/environment.yaml index 4e938ca..063e85c 100644 --- a/environment.yaml +++ b/environment.yaml @@ -8,7 +8,8 @@ dependencies: - jaxopt - numpy - pygmo - - python==3.12 + - python +# - python==3.12 - python-gmsh - trame - ipywidgets diff --git a/src/alpine/gp/util.py b/src/alpine/gp/util.py index 8344bcc..4d747e2 100644 --- a/src/alpine/gp/util.py +++ b/src/alpine/gp/util.py @@ -123,6 +123,10 @@ def dummy_predict(individuals_str, toolbox, X): return pred +def compile_individuals(toolbox, individuals_str_batch): + return [toolbox.compile(expr=ind) for ind in individuals_str_batch] + + def fitness_value(ind): return ind.fitness.values diff --git a/tests/conftest.py b/tests/conftest.py index ad43d80..e907130 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -1,10 +1,10 @@ """ - Dummy conftest.py for alpine. +Dummy conftest.py for alpine. - If you don't know what this is for, just leave it empty. - Read more about conftest.py under: - - https://docs.pytest.org/en/stable/fixture.html - - https://docs.pytest.org/en/stable/writing_plugins.html +If you don't know what this is for, just leave it empty. +Read more about conftest.py under: +- https://docs.pytest.org/en/stable/fixture.html +- https://docs.pytest.org/en/stable/writing_plugins.html """ import pytest diff --git a/tests/test_basic_sr.py b/tests/test_basic_sr.py index f3fe2f6..b058fc7 100644 --- a/tests/test_basic_sr.py +++ b/tests/test_basic_sr.py @@ -12,11 +12,7 @@ config() -def compile_individuals(toolbox, individuals_str_batch): - return [toolbox.compile(expr=ind) for ind in individuals_str_batch] - - -x = jnp.array([x / 10.0 for x in range(-10, 10)]) +x = jnp.array([x / 10.0 for x in range(-10, 10)]).reshape(-1, 1) y = x**4 + x**3 + x**2 + x @@ -37,10 +33,9 @@ def eval_MSE_sol(individual, X, y): return MSE, y_pred -@ray.remote def predict(individuals_str, toolbox, X): - callables = compile_individuals(toolbox, individuals_str) + callables = util.compile_individuals(toolbox, individuals_str) u = [None] * len(individuals_str) @@ -50,10 +45,9 @@ def predict(individuals_str, toolbox, X): return u -@ray.remote def score(individuals_str, toolbox, X, y): - callables = compile_individuals(toolbox, individuals_str) + callables = util.compile_individuals(toolbox, individuals_str) MSE = [None] * len(individuals_str) @@ -63,9 +57,8 @@ def score(individuals_str, toolbox, X, y): return MSE -@ray.remote def fitness(individuals_str, toolbox, X, y): - callables = compile_individuals(toolbox, individuals_str) + callables = util.compile_individuals(toolbox, individuals_str) fitnesses = [None] * len(individuals_str) for i, ind in enumerate(callables): @@ -103,9 +96,9 @@ def test_basic_sr(set_test_dir): gpsr = GPSymbolicRegressor( pset_config=pset, - fitness=fitness.remote, - error_metric=score.remote, - predict_func=predict.remote, + fitness=fitness, + error_metric=score, + predict_func=predict, common_data=common_data, seed=seed, batch_size=10, diff --git a/tests/test_regressor.py b/tests/test_regressor.py index 179d02d..e86363f 100644 --- a/tests/test_regressor.py +++ b/tests/test_regressor.py @@ -2,12 +2,13 @@ from alpine.gp.regressor import GPSymbolicRegressor from alpine.gp import util from deap import gp -from sklearn.datasets import make_regression -from sklearn.model_selection import train_test_split, GridSearchCV + +# from sklearn.datasets import make_regression +# from sklearn.model_selection import train_test_split, GridSearchCV from alpine.gp.util import dummy_fitness, dummy_score, dummy_predict -def test_regressor(): +def test_check_regressor(): pset = gp.PrimitiveSetTyped( "MAIN", [ @@ -83,4 +84,4 @@ def test_regressor(): if __name__ == "__main__": - test_regressor() + test_check_regressor() From 0b9d9827568fa49e4188d29620168143f4bd5e8e Mon Sep 17 00:00:00 2001 From: Alessandro Lucantonio Date: Wed, 19 Mar 2025 15:53:22 +0100 Subject: [PATCH 19/23] First test almost working, probably problem with data shapes. --- bench/bench.py | 2 +- src/alpine/gp/regressor.py | 66 ++++++++++++++++++++------------------ tests/test_basic_sr.py | 2 +- tests/test_poisson1d.py | 2 +- 4 files changed, 37 insertions(+), 35 deletions(-) diff --git a/bench/bench.py b/bench/bench.py index 74a040f..8fb27a0 100644 --- a/bench/bench.py +++ b/bench/bench.py @@ -247,7 +247,7 @@ def eval(problem, cfgfile, seed=42): num_best_inds_str=1, save_best_individual=False, output_path="./", - seed=None, + seed_str=None, batch_size=batch_size, **regressor_params, ) diff --git a/src/alpine/gp/regressor.py b/src/alpine/gp/regressor.py index 8a3da0f..d319785 100644 --- a/src/alpine/gp/regressor.py +++ b/src/alpine/gp/regressor.py @@ -101,7 +101,7 @@ def __init__( validate: bool = False, preprocess_func: Callable | None = None, callback_func: Callable | None = None, - seed: List[str] | None = None, + seed_str: List[str] | None = None, plot_history: bool = False, print_log: bool = False, num_best_inds_str: int = 1, @@ -165,7 +165,7 @@ def __init__( self.frac_elitist = frac_elitist - self.seed = seed + self.seed_str = seed_str @property def n_elitist(self): @@ -174,35 +174,35 @@ def n_elitist(self): def get_params(self, deep=True): return self.__dict__ - def __pset_config(self): - pset = gp.PrimitiveSetTyped( - "MAIN", - [ - float, - ], - float, - ) - pset.renameArguments(ARG0="x") - primitives = { - "imports": {"alpine.gp.numpy_primitives": ["numpy_primitives"]}, - "used": [ - {"name": "add", "dimension": None, "rank": None}, - {"name": "sub", "dimension": None, "rank": None}, - {"name": "mul", "dimension": None, "rank": None}, - {"name": "div", "dimension": None, "rank": None}, - {"name": "sin", "dimension": None, "rank": None}, - {"name": "cos", "dimension": None, "rank": None}, - {"name": "exp", "dimension": None, "rank": None}, - {"name": "log", "dimension": None, "rank": None}, - ], - } - - pset = add_primitives_to_pset_from_dict(pset, primitives) - return pset + # def __pset_config(self): + # pset = gp.PrimitiveSetTyped( + # "MAIN", + # [ + # float, + # ], + # float, + # ) + # pset.renameArguments(ARG0="x") + # primitives = { + # "imports": {"alpine.gp.numpy_primitives": ["numpy_primitives"]}, + # "used": [ + # {"name": "add", "dimension": None, "rank": None}, + # {"name": "sub", "dimension": None, "rank": None}, + # {"name": "mul", "dimension": None, "rank": None}, + # {"name": "div", "dimension": None, "rank": None}, + # {"name": "sin", "dimension": None, "rank": None}, + # {"name": "cos", "dimension": None, "rank": None}, + # {"name": "exp", "dimension": None, "rank": None}, + # {"name": "log", "dimension": None, "rank": None}, + # ], + # } + + # pset = add_primitives_to_pset_from_dict(pset, primitives) + # return pset def __creator_toolbox_pset_config(self): """Initialize toolbox and individual creator based on config file.""" - pset = self.__pset_config() + pset = self.pset_config toolbox = base.Toolbox() # SELECTION @@ -252,8 +252,10 @@ def __creator_toolbox_pset_config(self): toolbox.register("population", tools.initRepeat, list, toolbox.individual) toolbox.register("compile", gp.compile, pset=pset) - if self.seed is not None: - self.seed = [createIndividual.from_string(i, pset) for i in self.seed] + if self.seed_str is not None: + self.seed_ind = [ + createIndividual.from_string(i, pset) for i in self.seed_str + ] return toolbox, pset def __store_fit_error_common_args(self, data: Dict): @@ -615,9 +617,9 @@ def __run(self, toolbox): self.__history.update(self.__pop[0]) # Seeds the first island with individuals - if self.seed is not None: + if self.seed_ind is not None: print("Seeding population with individuals...", flush=True) - self.__pop[0][: len(self.seed)] = self.seed + self.__pop[0][: len(self.seed_ind)] = self.seed_ind print(" -= START OF EVOLUTION =- ", flush=True) diff --git a/tests/test_basic_sr.py b/tests/test_basic_sr.py index b058fc7..5dbb9c8 100644 --- a/tests/test_basic_sr.py +++ b/tests/test_basic_sr.py @@ -100,7 +100,7 @@ def test_basic_sr(set_test_dir): error_metric=score, predict_func=predict, common_data=common_data, - seed=seed, + seed_str=seed, batch_size=10, **regressor_params ) diff --git a/tests/test_poisson1d.py b/tests/test_poisson1d.py index 7bff105..6a4b0e8 100644 --- a/tests/test_poisson1d.py +++ b/tests/test_poisson1d.py @@ -212,7 +212,7 @@ def test_poisson1d(set_test_dir, yamlfile): predict_func=predict.remote, print_log=True, common_data=common_params, - seed=seed_str, + seed_str=seed_str, plot_history=False, save_best_individual=True, save_train_fit_history=True, From dbcd96bbccc3ce0d232bee12a1f8f5cf3def2b3b Mon Sep 17 00:00:00 2001 From: Alessandro Lucantonio Date: Mon, 31 Mar 2025 16:58:23 +0200 Subject: [PATCH 20/23] [WIP] Fixing tests. --- examples/simple_sr.py | 6 ++-- src/alpine/gp/regressor.py | 33 ++----------------- tests/test_basic_sr.py | 6 ++-- tests/test_poisson1d.py | 65 +++++++++++++++----------------------- 4 files changed, 36 insertions(+), 74 deletions(-) diff --git a/examples/simple_sr.py b/examples/simple_sr.py index 320ba42..1d3f48b 100644 --- a/examples/simple_sr.py +++ b/examples/simple_sr.py @@ -13,8 +13,8 @@ def compile_individuals(toolbox, individuals_str_batch): # Ground truth -x = np.array([x / 10.0 for x in range(-10, 10)]) -y = x**4 + x**3 + x**2 + x +x = np.array([x / 10.0 for x in range(-10, 10)]).reshape(-1, 1) +y = (x**4 + x**3 + x**2 + x).ravel() def check_trig_fn(ind): @@ -42,7 +42,7 @@ def get_features_batch( def eval_MSE_sol(individual, X, y): warnings.filterwarnings("ignore") - y_pred = individual(X) + y_pred = individual(X).ravel() MSE = np.mean(np.square(y_pred - y)) if np.isnan(MSE): MSE = 1e5 diff --git a/src/alpine/gp/regressor.py b/src/alpine/gp/regressor.py index d319785..e7b0448 100644 --- a/src/alpine/gp/regressor.py +++ b/src/alpine/gp/regressor.py @@ -12,7 +12,6 @@ import random from alpine.gp.util import ( mapper, - add_primitives_to_pset_from_dict, max_func, min_func, avg_func, @@ -20,7 +19,7 @@ fitness_value, ) from sklearn.base import BaseEstimator, RegressorMixin -from sklearn.utils.validation import check_is_fitted +from sklearn.utils.validation import check_is_fitted, validate_data # reducing the number of threads launched by fitness evaluations @@ -174,32 +173,6 @@ def n_elitist(self): def get_params(self, deep=True): return self.__dict__ - # def __pset_config(self): - # pset = gp.PrimitiveSetTyped( - # "MAIN", - # [ - # float, - # ], - # float, - # ) - # pset.renameArguments(ARG0="x") - # primitives = { - # "imports": {"alpine.gp.numpy_primitives": ["numpy_primitives"]}, - # "used": [ - # {"name": "add", "dimension": None, "rank": None}, - # {"name": "sub", "dimension": None, "rank": None}, - # {"name": "mul", "dimension": None, "rank": None}, - # {"name": "div", "dimension": None, "rank": None}, - # {"name": "sin", "dimension": None, "rank": None}, - # {"name": "cos", "dimension": None, "rank": None}, - # {"name": "exp", "dimension": None, "rank": None}, - # {"name": "log", "dimension": None, "rank": None}, - # ], - # } - - # pset = add_primitives_to_pset_from_dict(pset, primitives) - # return pset - def __creator_toolbox_pset_config(self): """Initialize toolbox and individual creator based on config file.""" pset = self.pset_config @@ -430,7 +403,7 @@ def __register_map(self, toolbox): # @_fit_context(prefer_skip_nested_validation=True) def fit(self, X, y=None, X_val=None, y_val=None): """Fits the training data using GP-based symbolic regression.""" - X, y = self._validate_data(X, y, accept_sparse=False) + X, y = validate_data(self, X, y, accept_sparse=False) # config individual creator and toolbox toolbox, pset = self.__creator_toolbox_pset_config() @@ -617,7 +590,7 @@ def __run(self, toolbox): self.__history.update(self.__pop[0]) # Seeds the first island with individuals - if self.seed_ind is not None: + if self.seed_str is not None: print("Seeding population with individuals...", flush=True) self.__pop[0][: len(self.seed_ind)] = self.seed_ind diff --git a/tests/test_basic_sr.py b/tests/test_basic_sr.py index 5dbb9c8..a9747a0 100644 --- a/tests/test_basic_sr.py +++ b/tests/test_basic_sr.py @@ -13,7 +13,7 @@ x = jnp.array([x / 10.0 for x in range(-10, 10)]).reshape(-1, 1) -y = x**4 + x**3 + x**2 + x +y = (x**4 + x**3 + x**2 + x).ravel() def eval_MSE_sol(individual, X, y): @@ -23,7 +23,9 @@ def eval_MSE_sol(individual, X, y): config() # Evaluate the mean squared error between the expression # and the real function : x**4 + x**3 + x**2 + x - y_pred = individual(X) + # WARNING: since X is a column vector, you get a 2D array as an output, + # while y is a 1D array + y_pred = individual(X).ravel() MSE = None if y is not None: diff --git a/tests/test_poisson1d.py b/tests/test_poisson1d.py index 6a4b0e8..bf368b1 100644 --- a/tests/test_poisson1d.py +++ b/tests/test_poisson1d.py @@ -16,6 +16,7 @@ # choose precision and whether to use GPU or CPU # needed for context of the plots at the end of the evolution +os.environ["JAX_PLATFORMS"] = "cpu" config() @@ -40,6 +41,7 @@ def eval_MSE_sol( # need to call config again before using JAX in energy evaluations to make sure that # the current worker has initialized JAX + os.environ["JAX_PLATFORMS"] = "cpu" config() # objective: squared norm of the residual of the equation + penalty on Dirichlet @@ -56,44 +58,27 @@ def obj(x, y): MSE = 0.0 - us = [] + # set additional arguments of the objective function + # (apart from the vector of unknowns) + args = {"y": X} + prb.set_obj_args(args) - for i, curr_force in enumerate(X): - # set additional arguments of the objective function - # (apart from the vector of unknowns) - args = {"y": curr_force} - prb.set_obj_args(args) + print(X, y) + # minimize the objective + u = prb.solve(x0=u_0.coeffs.flatten(), ftol_abs=1e-12, ftol_rel=1e-12, maxeval=1000) - # minimize the objective - u = prb.solve( - x0=u_0.coeffs.flatten(), ftol_abs=1e-12, ftol_rel=1e-12, maxeval=1000 - ) + if prb.last_opt_result == 1 or prb.last_opt_result == 3 or prb.last_opt_result == 4: - if y is not None: - if ( - prb.last_opt_result == 1 - or prb.last_opt_result == 3 - or prb.last_opt_result == 4 - ): + MSE = np.mean(np.linalg.norm(u - y) ** 2) + else: + MSE = math.nan - current_err = np.linalg.norm(u - y[i, :]) ** 2 - else: - current_err = math.nan + if math.isnan(MSE): + MSE = 1e5 - if math.isnan(current_err): - MSE = 1e5 - break + return MSE, u - MSE += current_err - us.append(u) - - MSE *= 1 / num_nodes - - return MSE, us - - -@ray.remote def predict( individuals_str: list[str], toolbox, @@ -113,7 +98,6 @@ def predict( return u -@ray.remote def score( individuals_str: list[str], toolbox, @@ -134,7 +118,6 @@ def score( return MSE -@ray.remote def fitness( individuals_str: list[str], toolbox, @@ -162,7 +145,7 @@ def fitness( @pytest.mark.parametrize("yamlfile", cases) -def test_poisson1d(set_test_dir, yamlfile): +def test_poisson1d(yamlfile): filename = os.path.join(os.path.dirname(__file__), yamlfile) regressor_params, config_file_data = util.load_config_data(filename) @@ -182,8 +165,8 @@ def test_poisson1d(set_test_dir, yamlfile): f = C.laplacian(u) f.coeffs *= -1.0 - X_train = np.array([f.coeffs.flatten()], dtype=dctkit.float_dtype) - y_train = np.array([u.coeffs.flatten()], dtype=dctkit.float_dtype) + X_train = np.array(f.coeffs, dtype=dctkit.float_dtype) + y_train = np.array(u.coeffs.flatten(), dtype=dctkit.float_dtype) # initial guess for the unknown of the Poisson problem (cochain of nodals values) u_0_vec = np.zeros(num_nodes, dtype=dctkit.float_dtype) @@ -207,9 +190,9 @@ def test_poisson1d(set_test_dir, yamlfile): gpsr = gps.GPSymbolicRegressor( pset_config=pset, - fitness=fitness.remote, - error_metric=score.remote, - predict_func=predict.remote, + fitness=fitness, + error_metric=score, + predict_func=predict, print_log=True, common_data=common_params, seed_str=seed_str, @@ -233,3 +216,7 @@ def test_poisson1d(set_test_dir, yamlfile): ray.shutdown() assert np.allclose(u.coeffs.flatten(), np.ravel(u_best)) assert fit_score <= 1e-12 + + +if __name__ == "__main__": + test_poisson1d("poisson1d_1.yaml") From 7b4dcea5cc20eda8e3f35763931517ee1f5e26e6 Mon Sep 17 00:00:00 2001 From: Alessandro Lucantonio Date: Mon, 31 Mar 2025 18:57:36 +0200 Subject: [PATCH 21/23] All tests running. --- src/alpine/gp/regressor.py | 29 ++++++++++++++++++++++------- tests/test_poisson1d.py | 33 ++++++++++++++++----------------- tests/test_regressor.py | 18 ++++++++++++++++-- 3 files changed, 54 insertions(+), 26 deletions(-) diff --git a/src/alpine/gp/regressor.py b/src/alpine/gp/regressor.py index e7b0448..bdf29ee 100644 --- a/src/alpine/gp/regressor.py +++ b/src/alpine/gp/regressor.py @@ -281,7 +281,7 @@ def __compute_valid_stats(self, pop, toolbox): return valid_fit, valid_err - def __stats(self, pop, gen, evals): + def __stats(self, pop, gen, evals, toolbox): """Compute and print statistics of a population.""" # LINE_UP = '\033[1A' @@ -292,7 +292,7 @@ def __stats(self, pop, gen, evals): # record the statistics in the logbook if self.validate: # compute satistics related to the validation set - valid_fit, valid_err = self.__compute_valid_stats(pop) + valid_fit, valid_err = self.__compute_valid_stats(pop, toolbox) record["valid"] = {"valid_fit": valid_fit, "valid_err": valid_err} self.__logbook.record(gen=gen, evals=evals, **record) @@ -403,7 +403,16 @@ def __register_map(self, toolbox): # @_fit_context(prefer_skip_nested_validation=True) def fit(self, X, y=None, X_val=None, y_val=None): """Fits the training data using GP-based symbolic regression.""" - X, y = validate_data(self, X, y, accept_sparse=False) + X, y = validate_data( + self, + X, + y, + accept_sparse=False, + skip_check_array=True, + # ensure_2d=False, + # allow_nd=True, + # multi_output=True, + ) # config individual creator and toolbox toolbox, pset = self.__creator_toolbox_pset_config() @@ -453,14 +462,16 @@ def fit(self, X, y=None, X_val=None, y_val=None): if self.validate and self.error_metric is not None: self.__register_val_funcs(toolbox) self.__run(toolbox) - self._is_fitted = True + self.is_fitted_ = True return self def predict(self, X): check_is_fitted(self) toolbox, pset = self.__creator_toolbox_pset_config() self.__register_map(toolbox) - X = self._validate_data(X, accept_sparse=False, reset=False) + X = validate_data( + self, X, accept_sparse=False, reset=False, skip_check_array=True + ) test_data = {"X": X} datasets = {"test": test_data} self.__store_datasets(datasets) @@ -475,7 +486,9 @@ def score(self, X, y): check_is_fitted(self) toolbox, pset = self.__creator_toolbox_pset_config() self.__register_map(toolbox) - X, y = self._validate_data(X, y, accept_sparse=False, reset=False) + X, y = validate_data( + self, X, y, accept_sparse=False, reset=False, skip_check_array=True + ) test_data = {"X": X, "y": y} datasets = {"test": test_data} self.__store_datasets(datasets) @@ -628,7 +641,9 @@ def __run(self, toolbox): ) # compute and print population statistics (including all islands) - self.__stats(self.__flatten_list(self.__pop), self.__cgen, num_evals) + self.__stats( + self.__flatten_list(self.__pop), self.__cgen, num_evals, toolbox + ) if self.print_log: print("Best individuals of this generation:", flush=True) diff --git a/tests/test_poisson1d.py b/tests/test_poisson1d.py index bf368b1..9b4129e 100644 --- a/tests/test_poisson1d.py +++ b/tests/test_poisson1d.py @@ -63,20 +63,23 @@ def obj(x, y): args = {"y": X} prb.set_obj_args(args) - print(X, y) # minimize the objective u = prb.solve(x0=u_0.coeffs.flatten(), ftol_abs=1e-12, ftol_rel=1e-12, maxeval=1000) - if prb.last_opt_result == 1 or prb.last_opt_result == 3 or prb.last_opt_result == 4: + if y is not None: + if ( + prb.last_opt_result == 1 + or prb.last_opt_result == 3 + or prb.last_opt_result == 4 + ): + MSE = np.mean(np.linalg.norm(u - y) ** 2) + else: + MSE = math.nan - MSE = np.mean(np.linalg.norm(u - y) ** 2) - else: - MSE = math.nan + if math.isnan(MSE): + MSE = 1e5 - if math.isnan(MSE): - MSE = 1e5 - - return MSE, u + return MSE, [u] def predict( @@ -145,7 +148,7 @@ def fitness( @pytest.mark.parametrize("yamlfile", cases) -def test_poisson1d(yamlfile): +def test_poisson1d(set_test_dir, yamlfile): filename = os.path.join(os.path.dirname(__file__), yamlfile) regressor_params, config_file_data = util.load_config_data(filename) @@ -165,8 +168,8 @@ def test_poisson1d(yamlfile): f = C.laplacian(u) f.coeffs *= -1.0 - X_train = np.array(f.coeffs, dtype=dctkit.float_dtype) - y_train = np.array(u.coeffs.flatten(), dtype=dctkit.float_dtype) + X_train = np.array(f.coeffs.ravel(), dtype=dctkit.float_dtype) + y_train = np.array(u.coeffs.ravel(), dtype=dctkit.float_dtype) # initial guess for the unknown of the Poisson problem (cochain of nodals values) u_0_vec = np.zeros(num_nodes, dtype=dctkit.float_dtype) @@ -211,12 +214,8 @@ def test_poisson1d(yamlfile): fit_score = gpsr.score(X_train, y_train) - gpsr.__save_best_test_sols(X_train, "./") + gpsr.save_best_test_sols(X_train, "./") ray.shutdown() assert np.allclose(u.coeffs.flatten(), np.ravel(u_best)) assert fit_score <= 1e-12 - - -if __name__ == "__main__": - test_poisson1d("poisson1d_1.yaml") diff --git a/tests/test_regressor.py b/tests/test_regressor.py index e86363f..b41f7bc 100644 --- a/tests/test_regressor.py +++ b/tests/test_regressor.py @@ -54,10 +54,24 @@ def test_check_regressor(): batch_size=100, ) - print(gpsr.get_params()) check_estimator( gpsr, - expected_failed_checks={"check_regressors_train": "dummy model"}, + expected_failed_checks={ + "check_regressors_train": "dummy model", + "check_complex_data": "check_array=False", + "check_dtype_object": "check_array=False", + "check_estimators_empty_data_messages": "check_array=False", + "check_estimators_nan_inf": "check_array=False", + "check_estimator_sparse_tag": "check_array=False", + "check_estimator_sparse_container": "check_array=False", + "check_estimator_sparse_array": "check_array=False", + "check_estimator_sparse_matrix": "check_array=False", + "check_regressor_data_not_an_array": "check_array=False", + "check_supervised_y_2d": "check_array=False", + "check_supervised_y_no_nan": "check_array=False", + "check_fit1d": "check_array=False", + "check_fit2d_predict1d": "check_array=False", + }, ) # # Generate synthetic data From 5c500a5b4d9e79fdf9c7d81062416680630b2bd0 Mon Sep 17 00:00:00 2001 From: Alessandro Lucantonio Date: Mon, 31 Mar 2025 20:56:20 +0200 Subject: [PATCH 22/23] Formatting. --- docs/conf.py | 15 +- setup.py | 1 + src/alpine/data.py | 7 +- src/alpine/gp/cochain_primitives.py | 491 +++++++++++++++++++--------- src/alpine/gp/jax_primitives.py | 29 +- src/alpine/gp/primitives.py | 92 +++--- 6 files changed, 412 insertions(+), 223 deletions(-) diff --git a/docs/conf.py b/docs/conf.py index 8d8d180..9e2e502 100644 --- a/docs/conf.py +++ b/docs/conf.py @@ -176,10 +176,7 @@ # Theme options are theme-specific and customize the look and feel of a theme # further. For a list of options available for each theme, see the # documentation. -html_theme_options = { - "sidebar_width": "300px", - "page_width": "1200px" -} +html_theme_options = {"sidebar_width": "300px", "page_width": "1200px"} # Add any paths that contain custom themes here, relative to this directory. # html_theme_path = [] @@ -264,7 +261,13 @@ # Grouping the document tree into LaTeX files. List of tuples # (source start file, target name, title, author, documentclass [howto/manual]). latex_documents = [ - ("index", "user_guide.tex", "alpine Documentation", "Alessandro Lucantonio", "manual") + ( + "index", + "user_guide.tex", + "alpine Documentation", + "Alessandro Lucantonio", + "manual", + ) ] # The name of an image file (relative to this directory) to place at the top of @@ -301,4 +304,4 @@ "pyscaffold": ("https://pyscaffold.org/en/stable", None), } -print(f"loading configurations for {project} {version} ...", file=sys.stderr) \ No newline at end of file +print(f"loading configurations for {project} {version} ...", file=sys.stderr) diff --git a/setup.py b/setup.py index 90cb5f2..9c039ca 100644 --- a/setup.py +++ b/setup.py @@ -6,6 +6,7 @@ PyScaffold helps you to put up the scaffold of your new Python project. Learn more under: https://pyscaffold.org/ """ + from setuptools import setup if __name__ == "__main__": diff --git a/src/alpine/data.py b/src/alpine/data.py index 875bd9d..297f6e2 100644 --- a/src/alpine/data.py +++ b/src/alpine/data.py @@ -2,9 +2,10 @@ from jax import Array -class Dataset(): - def __init__(self, name: str, X: Array | npt.NDArray, - y: Array | npt.NDArray | None = None) -> None: +class Dataset: + def __init__( + self, name: str, X: Array | npt.NDArray, y: Array | npt.NDArray | None = None + ) -> None: self.name = name self.X = X self.y = y diff --git a/src/alpine/gp/cochain_primitives.py b/src/alpine/gp/cochain_primitives.py index cec2404..d836388 100644 --- a/src/alpine/gp/cochain_primitives.py +++ b/src/alpine/gp/cochain_primitives.py @@ -5,175 +5,346 @@ from .primitives import switch_category, generate_primitive_variants # Define the modules and functions needed to eval inputs and outputs -modules_functions = { - 'dctkit.dec': ['cochain'] -} +modules_functions = {"dctkit.dec": ["cochain"]} def inv_scalar_mul(c, f): try: - return C.scalar_mul(c, 1/f) + return C.scalar_mul(c, 1 / f) except ZeroDivisionError: return C.scalar_mul(c, jax.numpy.nan) # define cochain primitives -add_coch = {'fun_info': {'name': 'AddC', 'fun': C.add}, - 'input': ["cochain.Cochain", "cochain.Cochain"], - 'output': "cochain.Cochain", - 'att_input': {'category': ('P', 'D'), 'dimension': ('0', '1', '2'), - "rank": ("SC", "V", "T")}, - 'map_rule': {'category': lambda x: x, 'dimension': lambda x: x, - "rank": lambda x: x}} -sub_coch = {'fun_info': {'name': 'SubC', 'fun': C.sub}, - 'input': ["cochain.Cochain", "cochain.Cochain"], - 'output': "cochain.Cochain", - 'att_input': {'category': ('P', 'D'), 'dimension': ('0', '1', '2'), - "rank": ("SC", "V", "T")}, - 'map_rule': {'category': lambda x: x, 'dimension': lambda x: x, - "rank": lambda x: x}} -coboundary = {'fun_info': {'name': 'cob', 'fun': C.coboundary}, - 'input': ["cochain.Cochain"], - 'output': "cochain.Cochain", - 'att_input': {'category': ('P', 'D'), 'dimension': ('0', '1'), - "rank": ("SC", "V", "T")}, - 'map_rule': {'category': lambda x: x, - 'dimension': partial(operator.add, 1), - "rank": lambda x: x}} -codifferential = {'fun_info': {'name': 'del', 'fun': C.codifferential}, - 'input': ["cochain.Cochain"], - 'output': "cochain.Cochain", - 'att_input': {'category': ('P', 'D'), 'dimension': ('1', '2'), - "rank": ("SC", "V", "T")}, - 'map_rule': {'category': lambda x: x, 'dimension': - partial(operator.add, -1), "rank": lambda x: x}} -tr_coch = {'fun_info': {'name': 'tr', 'fun': C.trace}, - 'input': ["cochain.Cochain"], - 'output': "cochain.Cochain", - 'att_input': {'category': ('P', 'D'), 'dimension': ('0', '1', '2'), - "rank": ("T",)}, - 'map_rule': {'category': lambda x: x, 'dimension': lambda x: x, - "rank": lambda x: ""}} -mul_FT = {'fun_info': {'name': 'MF', 'fun': C.scalar_mul}, - 'input': ["cochain.Cochain", "float"], - 'output': "cochain.Cochain", - 'att_input': {'category': ('P', 'D'), 'dimension': ('0', '1', '2'), - "rank": ("SC", "V", "T")}, - 'map_rule': {'category': lambda x: x, 'dimension': lambda x: x, - "rank": lambda x: x}} -inv_mul_FT = {'fun_info': {'name': 'InvM', 'fun': inv_scalar_mul}, - 'input': ["cochain.Cochain", "float"], - 'output': "cochain.Cochain", - 'att_input': {'category': ('P', 'D'), 'dimension': ('0', '1', '2'), - "rank": ("SC", "V", "T")}, - 'map_rule': {'category': lambda x: x, 'dimension': lambda x: x, - "rank": lambda x: x}} -mul_coch = {'fun_info': {'name': 'CMul', 'fun': C.cochain_mul}, - 'input': ["cochain.Cochain", "cochain.Cochain"], - 'output': "cochain.Cochain", - 'att_input': {'category': ('P', 'D'), 'dimension': ('0', '1', '2'), - "rank": ("SC",)}, - 'map_rule': {'category': lambda x: x, 'dimension': lambda x: x, - "rank": lambda x: x}} -tran_coch = {'fun_info': {'name': 'tran', 'fun': C.transpose}, - 'input': ["cochain.Cochain"], - 'output': "cochain.Cochain", - 'att_input': {'category': ('P', 'D'), 'dimension': ('0', '1', '2'), - "rank": ("T",)}, - 'map_rule': {'category': lambda x: x, 'dimension': lambda x: x, - "rank": lambda x: x}} -sym_coch = {'fun_info': {'name': 'sym', 'fun': C.sym}, - 'input': ["cochain.Cochain"], - 'output': "cochain.Cochain", - 'att_input': {'category': ('P', 'D'), 'dimension': ('0', '1', '2'), - "rank": ("T",)}, - 'map_rule': {'category': lambda x: x, 'dimension': lambda x: x, - "rank": lambda x: x}} -star_1 = {'fun_info': {'name': 'St1', 'fun': C.star}, - 'input': ["cochain.Cochain"], - 'output': "cochain.Cochain", - 'att_input': {'category': ('P', 'D'), 'dimension': ('0', '1'), - "rank": ("SC", "V", "T")}, - 'map_rule': {'category': partial(switch_category, ('P', 'D')), - 'dimension': partial(lambda x, y: y - x, y=1), - "rank": lambda x: x}} -star_2 = {'fun_info': {'name': 'St2', 'fun': C.star}, - 'input': ["cochain.Cochain"], - 'output': "cochain.Cochain", - 'att_input': {'category': ('P', 'D'), 'dimension': ('0', '1', '2'), - "rank": ("SC", "V", "T")}, - 'map_rule': {'category': partial(switch_category, ('P', 'D')), - 'dimension': partial(lambda x, y: y-x, y=2), - "rank": lambda x: x}} -inner_product = {'fun_info': {'name': 'Inn', 'fun': C.inner}, - 'input': ["cochain.Cochain", "cochain.Cochain"], - 'output': "float", - 'att_input': {'category': ('P', 'D'), 'dimension': ('0', '1', '2'), - "rank": ("SC", "V", "T")}, - 'map_rule': {'category': lambda x: "", 'dimension': lambda x: "", - "rank": lambda x: ""}} -sin_coch = {'fun_info': {'name': 'Sin', 'fun': C.sin}, - 'input': ["cochain.Cochain"], - 'output': "cochain.Cochain", - 'att_input': {'category': ('P', 'D'), 'dimension': ('0', '1', '2'), - "rank": ("SC", "V", "T")}, - 'map_rule': {'category': lambda x: x, 'dimension': lambda x: x, - "rank": lambda x: x}} -arcsin_coch = {'fun_info': {'name': 'ArcSin', 'fun': C.arcsin}, - 'input': ["cochain.Cochain"], - 'output': "cochain.Cochain", - 'att_input': {'category': ('P', 'D'), 'dimension': ('0', '1', '2'), - "rank": ("SC", "V", "T")}, - 'map_rule': {'category': lambda x: x, 'dimension': lambda x: x, - "rank": lambda x: x}} -cos_coch = {'fun_info': {'name': 'Cos', 'fun': C.cos}, - 'input': ["cochain.Cochain"], - 'output': "cochain.Cochain", - 'att_input': {'category': ('P', 'D'), 'dimension': ('0', '1', '2'), - "rank": ("SC", "V", "T")}, - 'map_rule': {'category': lambda x: x, 'dimension': lambda x: x, - "rank": lambda x: x}} -arccos_coch = {'fun_info': {'name': 'ArcCos', 'fun': C.arccos}, - 'input': ["cochain.Cochain"], - 'output': "cochain.Cochain", - 'att_input': {'category': ('P', 'D'), 'dimension': ('0', '1', '2'), - "rank": ("SC", "V", "T")}, - 'map_rule': {'category': lambda x: x, 'dimension': lambda x: x, - "rank": lambda x: x}} -exp_coch = {'fun_info': {'name': 'Exp', 'fun': C.exp}, - 'input': ["cochain.Cochain"], - 'output': "cochain.Cochain", - 'att_input': {'category': ('P', 'D'), 'dimension': ('0', '1', '2'), - "rank": ("SC", "V", "T")}, - 'map_rule': {'category': lambda x: x, 'dimension': lambda x: x, - "rank": lambda x: x}} -log_coch = {'fun_info': {'name': 'Log', 'fun': C.log}, - 'input': ["cochain.Cochain"], - 'output': "cochain.Cochain", - 'att_input': {'category': ('P', 'D'), 'dimension': ('0', '1', '2'), - "rank": ("SC", "V", "T")}, - 'map_rule': {'category': lambda x: x, 'dimension': lambda x: x, - "rank": lambda x: x}} -sqrt_coch = {'fun_info': {'name': 'Sqrt', 'fun': C.sqrt}, - 'input': ["cochain.Cochain"], - 'output': "cochain.Cochain", - 'att_input': {'category': ('P', 'D'), 'dimension': ('0', '1', '2'), - "rank": ("SC", "V", "T")}, - 'map_rule': {'category': lambda x: x, 'dimension': lambda x: x, - "rank": lambda x: x}} -square_coch = {'fun_info': {'name': 'Square', 'fun': C.square}, - 'input': ["cochain.Cochain"], - 'output': "cochain.Cochain", - 'att_input': {'category': ('P', 'D'), 'dimension': ('0', '1', '2'), - "rank": ("SC", "V", "T")}, - 'map_rule': {'category': lambda x: x, 'dimension': lambda x: x, - "rank": lambda x: x}} +add_coch = { + "fun_info": {"name": "AddC", "fun": C.add}, + "input": ["cochain.Cochain", "cochain.Cochain"], + "output": "cochain.Cochain", + "att_input": { + "category": ("P", "D"), + "dimension": ("0", "1", "2"), + "rank": ("SC", "V", "T"), + }, + "map_rule": { + "category": lambda x: x, + "dimension": lambda x: x, + "rank": lambda x: x, + }, +} +sub_coch = { + "fun_info": {"name": "SubC", "fun": C.sub}, + "input": ["cochain.Cochain", "cochain.Cochain"], + "output": "cochain.Cochain", + "att_input": { + "category": ("P", "D"), + "dimension": ("0", "1", "2"), + "rank": ("SC", "V", "T"), + }, + "map_rule": { + "category": lambda x: x, + "dimension": lambda x: x, + "rank": lambda x: x, + }, +} +coboundary = { + "fun_info": {"name": "cob", "fun": C.coboundary}, + "input": ["cochain.Cochain"], + "output": "cochain.Cochain", + "att_input": { + "category": ("P", "D"), + "dimension": ("0", "1"), + "rank": ("SC", "V", "T"), + }, + "map_rule": { + "category": lambda x: x, + "dimension": partial(operator.add, 1), + "rank": lambda x: x, + }, +} +codifferential = { + "fun_info": {"name": "del", "fun": C.codifferential}, + "input": ["cochain.Cochain"], + "output": "cochain.Cochain", + "att_input": { + "category": ("P", "D"), + "dimension": ("1", "2"), + "rank": ("SC", "V", "T"), + }, + "map_rule": { + "category": lambda x: x, + "dimension": partial(operator.add, -1), + "rank": lambda x: x, + }, +} +tr_coch = { + "fun_info": {"name": "tr", "fun": C.trace}, + "input": ["cochain.Cochain"], + "output": "cochain.Cochain", + "att_input": {"category": ("P", "D"), "dimension": ("0", "1", "2"), "rank": ("T",)}, + "map_rule": { + "category": lambda x: x, + "dimension": lambda x: x, + "rank": lambda x: "", + }, +} +mul_FT = { + "fun_info": {"name": "MF", "fun": C.scalar_mul}, + "input": ["cochain.Cochain", "float"], + "output": "cochain.Cochain", + "att_input": { + "category": ("P", "D"), + "dimension": ("0", "1", "2"), + "rank": ("SC", "V", "T"), + }, + "map_rule": { + "category": lambda x: x, + "dimension": lambda x: x, + "rank": lambda x: x, + }, +} +inv_mul_FT = { + "fun_info": {"name": "InvM", "fun": inv_scalar_mul}, + "input": ["cochain.Cochain", "float"], + "output": "cochain.Cochain", + "att_input": { + "category": ("P", "D"), + "dimension": ("0", "1", "2"), + "rank": ("SC", "V", "T"), + }, + "map_rule": { + "category": lambda x: x, + "dimension": lambda x: x, + "rank": lambda x: x, + }, +} +mul_coch = { + "fun_info": {"name": "CMul", "fun": C.cochain_mul}, + "input": ["cochain.Cochain", "cochain.Cochain"], + "output": "cochain.Cochain", + "att_input": { + "category": ("P", "D"), + "dimension": ("0", "1", "2"), + "rank": ("SC",), + }, + "map_rule": { + "category": lambda x: x, + "dimension": lambda x: x, + "rank": lambda x: x, + }, +} +tran_coch = { + "fun_info": {"name": "tran", "fun": C.transpose}, + "input": ["cochain.Cochain"], + "output": "cochain.Cochain", + "att_input": {"category": ("P", "D"), "dimension": ("0", "1", "2"), "rank": ("T",)}, + "map_rule": { + "category": lambda x: x, + "dimension": lambda x: x, + "rank": lambda x: x, + }, +} +sym_coch = { + "fun_info": {"name": "sym", "fun": C.sym}, + "input": ["cochain.Cochain"], + "output": "cochain.Cochain", + "att_input": {"category": ("P", "D"), "dimension": ("0", "1", "2"), "rank": ("T",)}, + "map_rule": { + "category": lambda x: x, + "dimension": lambda x: x, + "rank": lambda x: x, + }, +} +star_1 = { + "fun_info": {"name": "St1", "fun": C.star}, + "input": ["cochain.Cochain"], + "output": "cochain.Cochain", + "att_input": { + "category": ("P", "D"), + "dimension": ("0", "1"), + "rank": ("SC", "V", "T"), + }, + "map_rule": { + "category": partial(switch_category, ("P", "D")), + "dimension": partial(lambda x, y: y - x, y=1), + "rank": lambda x: x, + }, +} +star_2 = { + "fun_info": {"name": "St2", "fun": C.star}, + "input": ["cochain.Cochain"], + "output": "cochain.Cochain", + "att_input": { + "category": ("P", "D"), + "dimension": ("0", "1", "2"), + "rank": ("SC", "V", "T"), + }, + "map_rule": { + "category": partial(switch_category, ("P", "D")), + "dimension": partial(lambda x, y: y - x, y=2), + "rank": lambda x: x, + }, +} +inner_product = { + "fun_info": {"name": "Inn", "fun": C.inner}, + "input": ["cochain.Cochain", "cochain.Cochain"], + "output": "float", + "att_input": { + "category": ("P", "D"), + "dimension": ("0", "1", "2"), + "rank": ("SC", "V", "T"), + }, + "map_rule": { + "category": lambda x: "", + "dimension": lambda x: "", + "rank": lambda x: "", + }, +} +sin_coch = { + "fun_info": {"name": "Sin", "fun": C.sin}, + "input": ["cochain.Cochain"], + "output": "cochain.Cochain", + "att_input": { + "category": ("P", "D"), + "dimension": ("0", "1", "2"), + "rank": ("SC", "V", "T"), + }, + "map_rule": { + "category": lambda x: x, + "dimension": lambda x: x, + "rank": lambda x: x, + }, +} +arcsin_coch = { + "fun_info": {"name": "ArcSin", "fun": C.arcsin}, + "input": ["cochain.Cochain"], + "output": "cochain.Cochain", + "att_input": { + "category": ("P", "D"), + "dimension": ("0", "1", "2"), + "rank": ("SC", "V", "T"), + }, + "map_rule": { + "category": lambda x: x, + "dimension": lambda x: x, + "rank": lambda x: x, + }, +} +cos_coch = { + "fun_info": {"name": "Cos", "fun": C.cos}, + "input": ["cochain.Cochain"], + "output": "cochain.Cochain", + "att_input": { + "category": ("P", "D"), + "dimension": ("0", "1", "2"), + "rank": ("SC", "V", "T"), + }, + "map_rule": { + "category": lambda x: x, + "dimension": lambda x: x, + "rank": lambda x: x, + }, +} +arccos_coch = { + "fun_info": {"name": "ArcCos", "fun": C.arccos}, + "input": ["cochain.Cochain"], + "output": "cochain.Cochain", + "att_input": { + "category": ("P", "D"), + "dimension": ("0", "1", "2"), + "rank": ("SC", "V", "T"), + }, + "map_rule": { + "category": lambda x: x, + "dimension": lambda x: x, + "rank": lambda x: x, + }, +} +exp_coch = { + "fun_info": {"name": "Exp", "fun": C.exp}, + "input": ["cochain.Cochain"], + "output": "cochain.Cochain", + "att_input": { + "category": ("P", "D"), + "dimension": ("0", "1", "2"), + "rank": ("SC", "V", "T"), + }, + "map_rule": { + "category": lambda x: x, + "dimension": lambda x: x, + "rank": lambda x: x, + }, +} +log_coch = { + "fun_info": {"name": "Log", "fun": C.log}, + "input": ["cochain.Cochain"], + "output": "cochain.Cochain", + "att_input": { + "category": ("P", "D"), + "dimension": ("0", "1", "2"), + "rank": ("SC", "V", "T"), + }, + "map_rule": { + "category": lambda x: x, + "dimension": lambda x: x, + "rank": lambda x: x, + }, +} +sqrt_coch = { + "fun_info": {"name": "Sqrt", "fun": C.sqrt}, + "input": ["cochain.Cochain"], + "output": "cochain.Cochain", + "att_input": { + "category": ("P", "D"), + "dimension": ("0", "1", "2"), + "rank": ("SC", "V", "T"), + }, + "map_rule": { + "category": lambda x: x, + "dimension": lambda x: x, + "rank": lambda x: x, + }, +} +square_coch = { + "fun_info": {"name": "Square", "fun": C.square}, + "input": ["cochain.Cochain"], + "output": "cochain.Cochain", + "att_input": { + "category": ("P", "D"), + "dimension": ("0", "1", "2"), + "rank": ("SC", "V", "T"), + }, + "map_rule": { + "category": lambda x: x, + "dimension": lambda x: x, + "rank": lambda x: x, + }, +} -coch_prim_list = [add_coch, sub_coch, coboundary, codifferential, tr_coch, mul_FT, - inv_mul_FT, mul_coch, tran_coch, sym_coch, star_1, star_2, - inner_product, sin_coch, arcsin_coch, cos_coch, arccos_coch, exp_coch, - log_coch, sqrt_coch, square_coch] +coch_prim_list = [ + add_coch, + sub_coch, + coboundary, + codifferential, + tr_coch, + mul_FT, + inv_mul_FT, + mul_coch, + tran_coch, + sym_coch, + star_1, + star_2, + inner_product, + sin_coch, + arcsin_coch, + cos_coch, + arccos_coch, + exp_coch, + log_coch, + sqrt_coch, + square_coch, +] -coch_primitives = list(map(partial(generate_primitive_variants, - imports=modules_functions), coch_prim_list)) +coch_primitives = list( + map(partial(generate_primitive_variants, imports=modules_functions), coch_prim_list) +) coch_primitives = {k: v for d in coch_primitives for k, v in d.items()} diff --git a/src/alpine/gp/jax_primitives.py b/src/alpine/gp/jax_primitives.py index aeef1ad..13f74d9 100644 --- a/src/alpine/gp/jax_primitives.py +++ b/src/alpine/gp/jax_primitives.py @@ -25,7 +25,7 @@ def protectedSqrt(x): def inv_float(x): - return protectedDiv(1., x) + return protectedDiv(1.0, x) def square_mod(x): @@ -34,16 +34,17 @@ def square_mod(x): jax_primitives = { # scalar operations (JAX backend) - 'AddF': PrimitiveParams(jnp.add, [float, float], float), - 'SubF': PrimitiveParams(jnp.subtract, [float, float], float), - 'MulF': PrimitiveParams(jnp.multiply, [float, float], float), - 'Div': PrimitiveParams(protectedDiv, [float, float], float), - 'SinF': PrimitiveParams(jnp.sin, [float], float), - 'ArcsinF': PrimitiveParams(jnp.arcsin, [float], float), - 'CosF': PrimitiveParams(jnp.cos, [float], float), - 'ArccosF': PrimitiveParams(jnp.arccos, [float], float), - 'ExpF': PrimitiveParams(jnp.exp, [float], float), - 'LogF': PrimitiveParams(protectedLog, [float], float), - 'SqrtF': PrimitiveParams(protectedSqrt, [float], float), - 'SquareF': PrimitiveParams(jnp.square, [float], float), - 'InvF': PrimitiveParams(inv_float, [float], float)} + "AddF": PrimitiveParams(jnp.add, [float, float], float), + "SubF": PrimitiveParams(jnp.subtract, [float, float], float), + "MulF": PrimitiveParams(jnp.multiply, [float, float], float), + "Div": PrimitiveParams(protectedDiv, [float, float], float), + "SinF": PrimitiveParams(jnp.sin, [float], float), + "ArcsinF": PrimitiveParams(jnp.arcsin, [float], float), + "CosF": PrimitiveParams(jnp.cos, [float], float), + "ArccosF": PrimitiveParams(jnp.arccos, [float], float), + "ExpF": PrimitiveParams(jnp.exp, [float], float), + "LogF": PrimitiveParams(protectedLog, [float], float), + "SqrtF": PrimitiveParams(protectedSqrt, [float], float), + "SquareF": PrimitiveParams(jnp.square, [float], float), + "InvF": PrimitiveParams(inv_float, [float], float), +} diff --git a/src/alpine/gp/primitives.py b/src/alpine/gp/primitives.py index 82ee80b..b101842 100644 --- a/src/alpine/gp/primitives.py +++ b/src/alpine/gp/primitives.py @@ -15,9 +15,10 @@ def __init__(self, op, in_types, out_type) -> None: self.out_type = out_type -def generate_primitive_variants(primitive: Dict[str, Dict[str, Callable] | List[str] - | str | Dict], - imports: Dict = None) -> Dict: +def generate_primitive_variants( + primitive: Dict[str, Dict[str, Callable] | List[str] | str | Dict], + imports: Dict = None, +) -> Dict: """Generate primitive variants given a typed primitive. Args: @@ -39,9 +40,9 @@ def generate_primitive_variants(primitive: Dict[str, Dict[str, Callable] | List[ a dict in which each key is the name of the primitive variant and each value is a PrimitiveParams object. """ - base_primitive = primitive['fun_info'] - in_attribute = primitive['att_input'] - map_rule = primitive['map_rule'] + base_primitive = primitive["fun_info"] + in_attribute = primitive["att_input"] + map_rule = primitive["map_rule"] primitive_dictionary = dict() # Dynamically import modules and functions needed to eval input/output types @@ -54,39 +55,39 @@ def generate_primitive_variants(primitive: Dict[str, Dict[str, Callable] | List[ def eval_with_globals(expression): return eval(expression, custom_globals) - for in_category in in_attribute['category']: - for in_dim in in_attribute['dimension']: - for in_rank in in_attribute['rank']: + for in_category in in_attribute["category"]: + for in_dim in in_attribute["dimension"]: + for in_rank in in_attribute["rank"]: # compute the primitive name taking into account # the right category, dim and rank in_rank = in_rank.replace("SC", "") - primitive_name = base_primitive['name'] + \ - in_category + in_dim + in_rank + primitive_name = base_primitive["name"] + in_category + in_dim + in_rank in_type_name = [] # compute the input type list - for i, input in enumerate(primitive['input']): + for i, input in enumerate(primitive["input"]): # float type must be handled separately if input == "float": in_type_name.append(input) elif len(in_rank) == 2: # in this case the correct rank must be taken - in_type_name.append(input + in_category + - in_dim + in_rank[i]) + in_type_name.append(input + in_category + in_dim + in_rank[i]) else: in_type_name.append(input + in_category + in_dim + in_rank) in_type = list(map(eval_with_globals, in_type_name)) - out_category = map_rule['category'](in_category) - out_dim = str(map_rule['dimension'](int(in_dim))) - out_rank = map_rule['rank'](in_rank) - out_type_name = primitive['output'] + out_category + out_dim + out_rank + out_category = map_rule["category"](in_category) + out_dim = str(map_rule["dimension"](int(in_dim))) + out_rank = map_rule["rank"](in_rank) + out_type_name = primitive["output"] + out_category + out_dim + out_rank out_type = eval_with_globals(out_type_name) primitive_dictionary[primitive_name] = PrimitiveParams( - base_primitive['fun'], in_type, out_type) + base_primitive["fun"], in_type, out_type + ) return primitive_dictionary -def add_primitives_to_pset(pset: PrimitiveSetTyped, primitives_to_add: list, - primitives_collection: dict): +def add_primitives_to_pset( + pset: PrimitiveSetTyped, primitives_to_add: list, primitives_collection: dict +): """Add a given list of primitives to a given PrimitiveSet. Args: @@ -100,36 +101,47 @@ def add_primitives_to_pset(pset: PrimitiveSetTyped, primitives_to_add: list, """ for primitive in primitives_to_add: # pre-process scalar primitives - if primitive['dimension'] is None: - primitive['dimension'] = [] - if primitive['rank'] is None: - primitive['rank'] = [] + if primitive["dimension"] is None: + primitive["dimension"] = [] + if primitive["rank"] is None: + primitive["rank"] = [] # save dimensions and ranks not admitted for the problem - non_feasible_dimensions = list(set(('0', '1', '2')) - - set(primitive['dimension'])) - non_feasible_ranks = list( - set(("SC", "V", "T")) - set(primitive["rank"])) + non_feasible_dimensions = list( + set(("0", "1", "2")) - set(primitive["dimension"]) + ) + non_feasible_ranks = list(set(("SC", "V", "T")) - set(primitive["rank"])) # iterate over all the primitives, pre-computed and stored in the dictionary # primitives for typed_primitive in primitives_collection.keys(): - if primitive['name'] in typed_primitive: + if primitive["name"] in typed_primitive: # remove the case in which the name of the primitive is a subname # of type_primitive (e.g. if primitive['name'] = sin and typed_primitive # = arcsin, we don't want to add the primitive) - exact_name_check = len( - typed_primitive.replace(primitive['name'], "")) <= 2 + exact_name_check = ( + len(typed_primitive.replace(primitive["name"], "")) <= 2 + ) # check if the dimension/rank of a typed primitive # is admissible, i.e. if it does not coincide with a non-admissible # dimension/rank # FIXME: change this! - check_wrong_dim_primal = sum([typed_primitive.count("P" + obj) - for obj in non_feasible_dimensions]) - check_wrong_dim_dual = sum([typed_primitive.count("D" + obj) - for obj in non_feasible_dimensions]) - check_rank = sum([typed_primitive.count("P" + obj) - for obj in non_feasible_ranks]) - check_wrong_dim_rank = check_wrong_dim_primal + check_wrong_dim_dual +\ - check_rank + check_wrong_dim_primal = sum( + [ + typed_primitive.count("P" + obj) + for obj in non_feasible_dimensions + ] + ) + check_wrong_dim_dual = sum( + [ + typed_primitive.count("D" + obj) + for obj in non_feasible_dimensions + ] + ) + check_rank = sum( + [typed_primitive.count("P" + obj) for obj in non_feasible_ranks] + ) + check_wrong_dim_rank = ( + check_wrong_dim_primal + check_wrong_dim_dual + check_rank + ) if check_wrong_dim_rank == 0 and exact_name_check: op = primitives_collection[typed_primitive].op in_types = primitives_collection[typed_primitive].in_types From a85557775833b8d5e703f253ce3c7d9dba151259 Mon Sep 17 00:00:00 2001 From: Alessandro Lucantonio Date: Tue, 1 Apr 2025 16:29:19 +0200 Subject: [PATCH 23/23] Fixing examples and benchmarks. --- bench/bench.py | 57 +++++++-------------------- bench/bench.sh | 76 ++++++++++++++++++------------------ examples/simple_sr.py | 19 +++------ examples/simple_sr_noyaml.py | 25 +++++------- src/alpine/gp/regressor.py | 9 ++++- src/alpine/gp/util.py | 15 +++++++ 6 files changed, 91 insertions(+), 110 deletions(-) diff --git a/bench/bench.py b/bench/bench.py index 8fb27a0..09b6f4a 100644 --- a/bench/bench.py +++ b/bench/bench.py @@ -50,22 +50,8 @@ def compute_MSE(individual, X, y, consts=[]): # TODO: this could become a library function -def compile_individual_with_consts(tree, toolbox, special_term_name="a"): - const_idx = 0 - tree_clone = toolbox.clone(tree) - for i, node in enumerate(tree_clone): - if isinstance(node, gp.Terminal) and node.name[0:3] != "ARG": - if node.name == special_term_name: - new_node_name = special_term_name + "[" + str(const_idx) + "]" - tree_clone[i] = gp.Terminal(new_node_name, True, float) - const_idx += 1 - - individual = toolbox.compile(expr=tree_clone, extra_args=[special_term_name]) - return individual, const_idx - - def eval_MSE_and_tune_constants(tree, toolbox, X, y): - individual, num_consts = compile_individual_with_consts(tree, toolbox) + individual, num_consts = util.compile_individual_with_consts(tree, toolbox) if num_consts > 0: @@ -130,31 +116,28 @@ def get_features_batch( return individ_length, nested_trigs, num_trigs -@ray.remote(num_cpus=num_cpus) def predict(individuals_str_batch, toolbox, X, penalty, fitness_scale): predictions = [None] * len(individuals_str_batch) for i, tree in enumerate(individuals_str_batch): - callable, _ = compile_individual_with_consts(tree, toolbox) + callable, _ = util.compile_individual_with_consts(tree, toolbox) predictions[i] = eval_model(callable, X, consts=tree.consts) return predictions -@ray.remote(num_cpus=num_cpus) def compute_MSEs(individuals_str_batch, toolbox, X, y, penalty, fitness_scale): total_errs = [None] * len(individuals_str_batch) for i, tree in enumerate(individuals_str_batch): - callable, _ = compile_individual_with_consts(tree, toolbox) + callable, _ = util.compile_individual_with_consts(tree, toolbox) total_errs[i] = compute_MSE(callable, X, y, consts=tree.consts) return total_errs -@ray.remote(num_cpus=num_cpus) def compute_attributes(individuals_str_batch, toolbox, X, y, penalty, fitness_scale): attributes = [None] * len(individuals_str_batch) @@ -238,23 +221,21 @@ def eval(problem, cfgfile, seed=42): gpsr = gps.GPSymbolicRegressor( pset_config=pset, - fitness=compute_attributes.remote, - predict_func=predict.remote, - error_metric=compute_MSEs.remote, + fitness=compute_attributes, + predict_func=predict, + error_metric=compute_MSEs, common_data=common_params, callback_func=callback_func, - print_log=False, + print_log=True, num_best_inds_str=1, save_best_individual=False, output_path="./", seed_str=None, batch_size=batch_size, + num_cpus=num_cpus, **regressor_params, ) - # train_data = Dataset("dataset", X_train_scaled, y_train_scaled) - # test_data = Dataset("dataset", X_test_scaled, y_test) - if num_variables > 1: X_train = [X_train_scaled[:, i] for i in range(num_variables)] X_test = [X_test_scaled[:, i] for i in range(num_variables)] @@ -266,23 +247,17 @@ def eval(problem, cfgfile, seed=42): gpsr.fit(X_train, y_train_scaled) toc = time.time() - if hasattr(gpsr.__best, "consts"): - print("Best parameters = ", gpsr.__best.consts) + best = gpsr.get_best_individual() + if hasattr(best, "consts"): + print("Best parameters = ", best.consts) print("Elapsed time = ", toc - tic) individuals_per_sec = ( - (gpsr.__cgen + 1) * gpsr.NINDIVIDUALS * gpsr.num_islands / (toc - tic) + (gpsr.last_gen + 1) * gpsr.NINDIVIDUALS * gpsr.num_islands / (toc - tic) ) print("Individuals per sec = ", individuals_per_sec) u_best = gpsr.predict(X_test) - # print(u_best) - # print(y_test) - - # plt.figure() - # plt.plot(u_best) - # plt.plot(y_test, "+") - # plt.show() # de-scale outputs before computing errors if scaleXy: @@ -317,7 +292,6 @@ def eval(problem, cfgfile, seed=42): if __name__ == "__main__": import argparse import pathlib - import ray # problems = [ # "Nguyen-1", @@ -337,18 +311,17 @@ def eval(problem, cfgfile, seed=42): parser = argparse.ArgumentParser() - parser.add_argument("problem", help="Name of the PMLB or Nguyen dataset.") parser.add_argument( - "-c", + "cfgfile", type=pathlib.Path, - metavar="cfgfile", help="Path of the YAML config file for the problem.", ) + parser.add_argument("problem", help="Name of the PMLB or Nguyen dataset.") args = parser.parse_args() problem = args.problem - cfgfile = args.c + cfgfile = args.cfgfile # problem = "1089_USCrime" diff --git a/bench/bench.sh b/bench/bench.sh index f935500..4dd331f 100755 --- a/bench/bench.sh +++ b/bench/bench.sh @@ -1,38 +1,38 @@ -python bench.py -c PMLB_base.yaml 1089_USCrime -python bench.py -c PMLB_base.yaml 1027_ESL -python bench.py -c PMLB_base.yaml 1028_SWD -python bench.py -c PMLB_base.yaml 1029_LEV -python bench.py -c PMLB_base.yaml 1030_ERA -python bench.py -c PMLB_base.yaml 1096_FacultySalaries -python bench.py -c PMLB_base.yaml 192_vineyard -python bench.py -c PMLB_base.yaml 197_cpu_act -python bench.py -c PMLB_base.yaml 210_cloud -python bench.py -c PMLB_base.yaml 225_puma8NH -python bench.py -c PMLB_base.yaml 227_cpu_small -python bench.py -c PMLB_base.yaml 228_elusage -python bench.py -c PMLB_base.yaml 230_machine_cpu -python bench.py -c PMLB_base.yaml 523_analcatdata_neavote -python bench.py -c PMLB_base.yaml 547_no2 -python bench.py -c PMLB_base.yaml 663_rabe_266 -python bench.py -c PMLB_base.yaml 666_rmftsa_ladata -python bench.py -c PMLB_base.yaml 678_visualizing_environmental -python bench.py -c PMLB_base.yaml 687_sleuth_ex1605 -python bench.py -c PMLB_base.yaml 695_chatfield_4 -python bench.py -c PMLB_base.yaml 706_sleuth_case1202 -python bench.py -c PMLB_base.yaml 712_chscase_geyser1 -python bench.py -c PMLB_base.yaml 229_pwLinear -python bench.py -c PMLB_base.yaml 485_analcatdata_vehicle -python bench.py -c PMLB_base.yaml 522_pm10 -python bench.py -c PMLB_base.yaml 527_analcatdata_election2000 -python bench.py -c PMLB_base.yaml 542_pollution -python bench.py -c PMLB_base.yaml 556_analcatdata_apnea2 -python bench.py -c PMLB_base.yaml 557_analcatdata_apnea1 -python bench.py -c PMLB_base.yaml 560_bodyfat -python bench.py -c PMLB_base.yaml 561_cpu -python bench.py -c PMLB_base.yaml 659_sleuth_ex1714 -python bench.py -c PMLB_base.yaml 665_sleuth_case2002 -python bench.py -c PMLB_base.yaml 690_visualizing_galaxy -python bench.py -c PMLB_base.yaml 519_vinnie -python bench.py -c PMLB_base.yaml 529_pollen -python bench.py -c PMLB_base.yaml 503_wind -python bench.py -c PMLB_base.yaml 505_tecator \ No newline at end of file +python bench.py PMLB_base.yaml 1089_USCrime +python bench.py PMLB_base.yaml 1027_ESL +python bench.py PMLB_base.yaml 1028_SWD +python bench.py PMLB_base.yaml 1029_LEV +python bench.py PMLB_base.yaml 1030_ERA +python bench.py PMLB_base.yaml 1096_FacultySalaries +python bench.py PMLB_base.yaml 192_vineyard +python bench.py PMLB_base.yaml 197_cpu_act +python bench.py PMLB_base.yaml 210_cloud +python bench.py PMLB_base.yaml 225_puma8NH +python bench.py PMLB_base.yaml 227_cpu_small +python bench.py PMLB_base.yaml 228_elusage +python bench.py PMLB_base.yaml 230_machine_cpu +python bench.py PMLB_base.yaml 523_analcatdata_neavote +python bench.py PMLB_base.yaml 547_no2 +python bench.py PMLB_base.yaml 663_rabe_266 +python bench.py PMLB_base.yaml 666_rmftsa_ladata +python bench.py PMLB_base.yaml 678_visualizing_environmental +python bench.py PMLB_base.yaml 687_sleuth_ex1605 +python bench.py PMLB_base.yaml 695_chatfield_4 +python bench.py PMLB_base.yaml 706_sleuth_case1202 +python bench.py PMLB_base.yaml 712_chscase_geyser1 +python bench.py PMLB_base.yaml 229_pwLinear +python bench.py PMLB_base.yaml 485_analcatdata_vehicle +python bench.py PMLB_base.yaml 522_pm10 +python bench.py PMLB_base.yaml 527_analcatdata_election2000 +python bench.py PMLB_base.yaml 542_pollution +python bench.py PMLB_base.yaml 556_analcatdata_apnea2 +python bench.py PMLB_base.yaml 557_analcatdata_apnea1 +python bench.py PMLB_base.yaml 560_bodyfat +python bench.py PMLB_base.yaml 561_cpu +python bench.py PMLB_base.yaml 659_sleuth_ex1714 +python bench.py PMLB_base.yaml 665_sleuth_case2002 +python bench.py PMLB_base.yaml 690_visualizing_galaxy +python bench.py PMLB_base.yaml 519_vinnie +python bench.py PMLB_base.yaml 529_pollen +python bench.py PMLB_base.yaml 503_wind +python bench.py PMLB_base.yaml 505_tecator \ No newline at end of file diff --git a/examples/simple_sr.py b/examples/simple_sr.py index 1d3f48b..bc8a686 100644 --- a/examples/simple_sr.py +++ b/examples/simple_sr.py @@ -8,10 +8,6 @@ from alpine.gp import util -def compile_individuals(toolbox, individuals_str_batch): - return [toolbox.compile(expr=ind) for ind in individuals_str_batch] - - # Ground truth x = np.array([x / 10.0 for x in range(-10, 10)]).reshape(-1, 1) y = (x**4 + x**3 + x**2 + x).ravel() @@ -49,10 +45,9 @@ def eval_MSE_sol(individual, X, y): return MSE, y_pred -@ray.remote def predict(individuals_str, toolbox, X, penalty): - callables = compile_individuals(toolbox, individuals_str) + callables = util.compile_individuals(toolbox, individuals_str) u = [None] * len(individuals_str) @@ -62,10 +57,9 @@ def predict(individuals_str, toolbox, X, penalty): return u -@ray.remote def score(individuals_str, toolbox, X, y, penalty): - callables = compile_individuals(toolbox, individuals_str) + callables = util.compile_individuals(toolbox, individuals_str) MSE = [None] * len(individuals_str) @@ -75,9 +69,8 @@ def score(individuals_str, toolbox, X, y, penalty): return MSE -@ray.remote def fitness(individuals_str, toolbox, X, y, penalty): - callables = compile_individuals(toolbox, individuals_str) + callables = util.compile_individuals(toolbox, individuals_str) individ_length, nested_trigs, num_trigs = get_features_batch(individuals_str) @@ -121,9 +114,9 @@ def main(): gpsr = GPSymbolicRegressor( pset_config=pset, - fitness=fitness.remote, - error_metric=score.remote, - predict_func=predict.remote, + fitness=fitness, + error_metric=score, + predict_func=predict, common_data=common_data, print_log=True, batch_size=100, diff --git a/examples/simple_sr_noyaml.py b/examples/simple_sr_noyaml.py index cc0462b..7e53ad6 100644 --- a/examples/simple_sr_noyaml.py +++ b/examples/simple_sr_noyaml.py @@ -7,13 +7,9 @@ from alpine.gp import util -def compile_individuals(toolbox, individuals_str_batch): - return [toolbox.compile(expr=ind) for ind in individuals_str_batch] - - # Ground truth -x = np.array([x / 10.0 for x in range(-10, 10)]) -y = x**4 + x**3 + x**2 + x +x = np.array([x / 10.0 for x in range(-10, 10)]).reshape(-1, 1) +y = (x**4 + x**3 + x**2 + x).ravel() def check_trig_fn(ind): @@ -41,17 +37,16 @@ def get_features_batch( def eval_MSE_sol(individual, X, y): warnings.filterwarnings("ignore") - y_pred = individual(X) + y_pred = individual(X).ravel() MSE = np.mean(np.square(y_pred - y)) if np.isnan(MSE): MSE = 1e5 return MSE, y_pred -@ray.remote def predict(individuals_str, toolbox, X, penalty): - callables = compile_individuals(toolbox, individuals_str) + callables = util.compile_individuals(toolbox, individuals_str) u = [None] * len(individuals_str) @@ -61,10 +56,9 @@ def predict(individuals_str, toolbox, X, penalty): return u -@ray.remote def score(individuals_str, toolbox, X, y, penalty): - callables = compile_individuals(toolbox, individuals_str) + callables = util.compile_individuals(toolbox, individuals_str) MSE = [None] * len(individuals_str) @@ -74,9 +68,8 @@ def score(individuals_str, toolbox, X, y, penalty): return MSE -@ray.remote def fitness(individuals_str, toolbox, X, y, penalty): - callables = compile_individuals(toolbox, individuals_str) + callables = util.compile_individuals(toolbox, individuals_str) individ_length, nested_trigs, num_trigs = get_features_batch(individuals_str) @@ -128,9 +121,9 @@ def main(): gpsr = GPSymbolicRegressor( pset_config=pset, - fitness=fitness.remote, - error_metric=score.remote, - predict_func=predict.remote, + fitness=fitness, + error_metric=score, + predict_func=predict, common_data=common_data, NINDIVIDUALS=100, num_islands=10, diff --git a/src/alpine/gp/regressor.py b/src/alpine/gp/regressor.py index bdf29ee..506d1c3 100644 --- a/src/alpine/gp/regressor.py +++ b/src/alpine/gp/regressor.py @@ -112,6 +112,7 @@ def __init__( save_train_fit_history: bool = False, output_path: str | None = None, batch_size=1, + num_cpus=1, ): super().__init__() self.pset_config = pset_config @@ -165,6 +166,7 @@ def __init__( self.frac_elitist = frac_elitist self.seed_str = seed_str + self.num_cpus = num_cpus @property def n_elitist(self): @@ -353,7 +355,7 @@ def __plot_genealogy(self, best): # networkx.nx_agraph.write_dot(graph, "genealogy.dot") def __get_remote(self, f): - return (ray.remote(f)).remote + return (ray.remote(f)).options(num_cpus=self.num_cpus).remote def __register_fitness_func(self, toolbox): store = self.__data_store @@ -686,6 +688,8 @@ def __run(self, toolbox): self.__plot_initialized = False print(" -= END OF EVOLUTION =- ", flush=True) + self.last_gen = self.__cgen + print(f"The best individual is {self.__best}", flush=True) print(f"The best fitness on the training set is {self.__train_fit_history[-1]}") @@ -733,6 +737,9 @@ def __save_train_fit_history(self, output_path: str): if self.validate: np.save(join(output_path, "val_fit_history.npy"), self.val_fit_history) + def get_best_individual(self): + return self.__best + def save_best_test_sols(self, X_test, output_path: str): """Compute and save the predictions corresponding to the best individual at the end of the evolution, evaluated over the test dataset. diff --git a/src/alpine/gp/util.py b/src/alpine/gp/util.py index 4d747e2..231dc40 100644 --- a/src/alpine/gp/util.py +++ b/src/alpine/gp/util.py @@ -4,6 +4,7 @@ from itertools import chain import ray import numpy as np +from deap import gp def add_primitives_to_pset_from_dict(pset, primitives_dict): @@ -127,6 +128,20 @@ def compile_individuals(toolbox, individuals_str_batch): return [toolbox.compile(expr=ind) for ind in individuals_str_batch] +def compile_individual_with_consts(tree, toolbox, special_term_name="a"): + const_idx = 0 + tree_clone = toolbox.clone(tree) + for i, node in enumerate(tree_clone): + if isinstance(node, gp.Terminal) and node.name[0:3] != "ARG": + if node.name == special_term_name: + new_node_name = special_term_name + "[" + str(const_idx) + "]" + tree_clone[i] = gp.Terminal(new_node_name, True, float) + const_idx += 1 + + individual = toolbox.compile(expr=tree_clone, extra_args=[special_term_name]) + return individual, const_idx + + def fitness_value(ind): return ind.fitness.values