2
2
from deap import gp
3
3
4
4
from alpine .gp import regressor as gps
5
- from alpine .data import Dataset
6
5
from alpine .gp import util
7
6
import numpy as np
8
7
import ray
9
- import yaml
10
8
11
9
import time
12
10
@@ -33,15 +31,15 @@ def check_nested_trig_fn(ind):
33
31
return util .detect_nested_trigonometric_functions (str (ind ))
34
32
35
33
36
- def eval_model (individual , D , consts = []):
34
+ def eval_model (individual , X , consts = []):
37
35
warnings .filterwarnings ("ignore" )
38
- y_pred = individual (* D . X , consts )
36
+ y_pred = individual (* X , consts )
39
37
return y_pred
40
38
41
39
42
- def compute_MSE (individual , D , consts = []):
43
- y_pred = eval_model (individual , D , consts )
44
- MSE = np .mean ((D . y - y_pred ) ** 2 )
40
+ def compute_MSE (individual , X , y , consts = []):
41
+ y_pred = eval_model (individual , X , consts )
42
+ MSE = np .mean ((y - y_pred ) ** 2 )
45
43
46
44
if np .isnan (MSE ) or np .isinf (MSE ):
47
45
MSE = 1e8
@@ -66,7 +64,7 @@ def compile_individual_with_consts(tree, toolbox, special_term_name="a"):
66
64
return individual , const_idx
67
65
68
66
69
- def eval_MSE_and_tune_constants (tree , toolbox , D ):
67
+ def eval_MSE_and_tune_constants (tree , toolbox , X , y ):
70
68
individual , num_consts = compile_individual_with_consts (tree , toolbox )
71
69
72
70
if num_consts > 0 :
@@ -75,8 +73,8 @@ def eval_MSE_and_tune_constants(tree, toolbox, D):
75
73
# outside?
76
74
def eval_MSE (consts ):
77
75
warnings .filterwarnings ("ignore" )
78
- y_pred = individual (* D . X , consts )
79
- total_err = np .mean ((D . y - y_pred ) ** 2 )
76
+ y_pred = individual (* X , consts )
77
+ total_err = np .mean ((y - y_pred ) ** 2 )
80
78
81
79
return total_err
82
80
@@ -113,7 +111,7 @@ def get_bounds(self):
113
111
if np .isinf (MSE ) or np .isnan (MSE ):
114
112
MSE = 1e8
115
113
else :
116
- MSE = compute_MSE (individual , D )
114
+ MSE = compute_MSE (individual , X , y )
117
115
consts = []
118
116
return MSE , consts
119
117
@@ -133,31 +131,31 @@ def get_features_batch(
133
131
134
132
135
133
@ray .remote (num_cpus = num_cpus )
136
- def predict (individuals_str_batch , toolbox , dataset , penalty , fitness_scale ):
134
+ def predict (individuals_str_batch , toolbox , X , penalty , fitness_scale ):
137
135
138
136
predictions = [None ] * len (individuals_str_batch )
139
137
140
138
for i , tree in enumerate (individuals_str_batch ):
141
139
callable , _ = compile_individual_with_consts (tree , toolbox )
142
- predictions [i ] = eval_model (callable , dataset , consts = tree .consts )
140
+ predictions [i ] = eval_model (callable , X , consts = tree .consts )
143
141
144
142
return predictions
145
143
146
144
147
145
@ray .remote (num_cpus = num_cpus )
148
- def compute_MSEs (individuals_str_batch , toolbox , dataset , penalty , fitness_scale ):
146
+ def compute_MSEs (individuals_str_batch , toolbox , X , y , penalty , fitness_scale ):
149
147
150
148
total_errs = [None ] * len (individuals_str_batch )
151
149
152
150
for i , tree in enumerate (individuals_str_batch ):
153
151
callable , _ = compile_individual_with_consts (tree , toolbox )
154
- total_errs [i ] = compute_MSE (callable , dataset , consts = tree .consts )
152
+ total_errs [i ] = compute_MSE (callable , X , y , consts = tree .consts )
155
153
156
154
return total_errs
157
155
158
156
159
157
@ray .remote (num_cpus = num_cpus )
160
- def compute_attributes (individuals_str_batch , toolbox , dataset , penalty , fitness_scale ):
158
+ def compute_attributes (individuals_str_batch , toolbox , X , y , penalty , fitness_scale ):
161
159
162
160
attributes = [None ] * len (individuals_str_batch )
163
161
@@ -170,7 +168,7 @@ def compute_attributes(individuals_str_batch, toolbox, dataset, penalty, fitness
170
168
consts = None
171
169
fitness = (1e8 ,)
172
170
else :
173
- MSE , consts = eval_MSE_and_tune_constants (tree , toolbox , dataset )
171
+ MSE , consts = eval_MSE_and_tune_constants (tree , toolbox , X , y )
174
172
fitness = (
175
173
fitness_scale
176
174
* (
@@ -192,8 +190,7 @@ def assign_attributes(individuals, attributes):
192
190
193
191
def eval (problem , cfgfile , seed = 42 ):
194
192
195
- with open (cfgfile ) as config_file :
196
- config_file_data = yaml .safe_load (config_file )
193
+ regressor_params , config_file_data = util .load_config_data (cfgfile )
197
194
198
195
scaleXy = config_file_data ["gp" ]["scaleXy" ]
199
196
@@ -219,6 +216,10 @@ def eval(problem, cfgfile, seed=42):
219
216
else :
220
217
pset = gp .PrimitiveSetTyped ("Main" , [float ] * num_variables , float )
221
218
219
+ pset = util .add_primitives_to_pset_from_dict (
220
+ pset , config_file_data ["gp" ]["primitives" ]
221
+ )
222
+
222
223
batch_size = config_file_data ["gp" ]["batch_size" ]
223
224
if config_file_data ["gp" ]["use_constants" ]:
224
225
pset .addTerminal (object , float , "a" )
@@ -244,25 +245,25 @@ def eval(problem, cfgfile, seed=42):
244
245
callback_func = callback_func ,
245
246
print_log = False ,
246
247
num_best_inds_str = 1 ,
247
- config_file_data = config_file_data ,
248
248
save_best_individual = False ,
249
249
output_path = "./" ,
250
250
seed = None ,
251
251
batch_size = batch_size ,
252
+ ** regressor_params ,
252
253
)
253
254
254
- train_data = Dataset ("dataset" , X_train_scaled , y_train_scaled )
255
- test_data = Dataset ("dataset" , X_test_scaled , y_test )
255
+ # train_data = Dataset("dataset", X_train_scaled, y_train_scaled)
256
+ # test_data = Dataset("dataset", X_test_scaled, y_test)
256
257
257
258
if num_variables > 1 :
258
- train_data . X = [train_data . X [:, i ] for i in range (num_variables )]
259
- test_data . X = [test_data . X [:, i ] for i in range (num_variables )]
259
+ X_train = [X_train_scaled [:, i ] for i in range (num_variables )]
260
+ X_test = [X_test_scaled [:, i ] for i in range (num_variables )]
260
261
else :
261
- train_data . X = [train_data . X ]
262
- test_data . X = [test_data . X ]
262
+ X_train = [X_train_scaled ]
263
+ X_test = [X_test_scaled ]
263
264
264
265
tic = time .time ()
265
- gpsr .fit (train_data )
266
+ gpsr .fit (X_train , y_train_scaled )
266
267
toc = time .time ()
267
268
268
269
if hasattr (gpsr .best , "consts" ):
@@ -274,7 +275,7 @@ def eval(problem, cfgfile, seed=42):
274
275
)
275
276
print ("Individuals per sec = " , individuals_per_sec )
276
277
277
- u_best = gpsr .predict (test_data )
278
+ u_best = gpsr .predict (X_test )
278
279
# print(u_best)
279
280
# print(y_test)
280
281
@@ -292,7 +293,7 @@ def eval(problem, cfgfile, seed=42):
292
293
print ("MSE on the test set = " , MSE )
293
294
print ("R^2 on the test set = " , r2_test )
294
295
295
- pred_train = gpsr .predict (train_data )
296
+ pred_train = gpsr .predict (X_train )
296
297
297
298
if scaleXy :
298
299
pred_train = scaler_y .inverse_transform (pred_train .reshape (- 1 , 1 )).flatten ()
0 commit comments