Skip to content

Commit 0ee3699

Browse files
authored
Merge pull request #357 from automl/development
Development
2 parents f4b72be + 03879f4 commit 0ee3699

File tree

16 files changed

+116
-148
lines changed

16 files changed

+116
-148
lines changed

.travis.yml

-3
Original file line numberDiff line numberDiff line change
@@ -55,14 +55,11 @@ before_install:
5555
- bash miniconda.sh -b -p $HOME/miniconda
5656
- export PATH="$HOME/miniconda/bin:$PATH"
5757
- if [[ `which conda` ]]; then echo 'Conda installation successful'; else exit 1; fi
58-
- conda update --yes conda
5958
- conda create -n testenv --yes python=$PYTHON_VERSION pip wheel nose
6059
- source activate testenv
6160
- conda install --yes gcc swig
6261
- echo "Using GCC at "`which gcc`
6362
- export CC=`which gcc`
64-
# Fixes version `GLIBCXX_3.4.21' not found (on Ubuntu 16.04)
65-
- conda install --yes libgcc
6663

6764
install:
6865
# Install general requirements the way setup.py suggests

autosklearn/__init__.py

+3-2
Original file line numberDiff line numberDiff line change
@@ -6,10 +6,11 @@
66
__MANDATORY_PACKAGES__ = '''
77
numpy>=1.9
88
scikit-learn>=0.18.1,<0.19
9-
smac==0.5.0
109
lockfile>=0.10
10+
smac>=0.6.0,<0.7
11+
pyrfr>=0.6.1,<0.7
1112
ConfigSpace>=0.3.3,<0.4
12-
pyrfr>=0.4.0,<0.5
13+
pyrfr>=0.6.0,<0.7
1314
'''
1415

1516
dependencies.verify_packages(__MANDATORY_PACKAGES__)

autosklearn/evaluation/__init__.py

+8-2
Original file line numberDiff line numberDiff line change
@@ -72,8 +72,14 @@ def __init__(self, backend, autosklearn_seed, resampling_strategy, metric,
7272

7373
eval_function = functools.partial(fit_predict_try_except_decorator,
7474
ta=eval_function)
75-
super().__init__(ta=eval_function, stats=stats, runhistory=runhistory,
76-
run_obj=run_obj, par_factor=par_factor)
75+
super().__init__(
76+
ta=eval_function,
77+
stats=stats,
78+
runhistory=runhistory,
79+
run_obj=run_obj,
80+
par_factor=par_factor,
81+
cost_for_crash=WORST_POSSIBLE_RESULT,
82+
)
7783

7884
self.backend = backend
7985
self.autosklearn_seed = autosklearn_seed

autosklearn/smbo.py

+13-89
Original file line numberDiff line numberDiff line change
@@ -450,42 +450,6 @@ def run_smbo(self):
450450
(1, -1))
451451
self.logger.info(list(meta_features_dict.keys()))
452452

453-
# meta_runs = meta_base.get_all_runs(METRIC_TO_STRING[self.metric])
454-
# meta_runs_index = 0
455-
# try:
456-
# meta_durations = meta_base.get_all_runs('runtime')
457-
# read_runtime_data = True
458-
# except KeyError:
459-
# read_runtime_data = False
460-
# self.logger.critical('Cannot read runtime data.')
461-
# if self.acquisition_function == 'EIPS':
462-
# self.logger.critical('Reverting to acquisition function EI!')
463-
# self.acquisition_function = 'EI'
464-
465-
# for meta_dataset in meta_runs.index:
466-
# meta_dataset_start_index = meta_runs_index
467-
# for meta_configuration in meta_runs.columns:
468-
# if np.isfinite(meta_runs.loc[meta_dataset, meta_configuration]):
469-
# try:
470-
# config = meta_base.get_configuration_from_algorithm_index(
471-
# meta_configuration)
472-
# cost = meta_runs.loc[meta_dataset, meta_configuration]
473-
# if read_runtime_data:
474-
# runtime = meta_durations.loc[meta_dataset,
475-
# meta_configuration]
476-
# else:
477-
# runtime = 1
478-
# # TODO read out other status types!
479-
# meta_runhistory.add(config, cost, runtime,
480-
# StatusType.SUCCESS,
481-
# instance_id=meta_dataset)
482-
# meta_runs_index += 1
483-
# except:
484-
# # TODO maybe add warning
485-
# pass
486-
#
487-
# meta_runs_dataset_indices[meta_dataset] = (
488-
# meta_dataset_start_index, meta_runs_index)
489453
else:
490454
meta_features = None
491455
self.logger.warning('Could not find meta-data directory %s' %
@@ -514,13 +478,13 @@ def run_smbo(self):
514478
startup_time = self.watcher.wall_elapsed(self.dataset_name)
515479
total_walltime_limit = self.total_walltime_limit - startup_time - 5
516480
scenario_dict = {'cs': self.config_space,
517-
'cutoff-time': self.func_eval_time_limit,
518-
'memory-limit': self.memory_limit,
519-
'wallclock-limit': total_walltime_limit,
481+
'cutoff_time': self.func_eval_time_limit,
482+
'memory_limit': self.memory_limit,
483+
'wallclock_limit': total_walltime_limit,
520484
'output-dir':
521485
self.backend.get_smac_output_directory(self.seed),
522486
'shared-model': self.shared_mode,
523-
'run-obj': 'quality',
487+
'run_obj': 'quality',
524488
'deterministic': 'true',
525489
'instances': instances}
526490

@@ -631,31 +595,6 @@ def run_smbo(self):
631595
else:
632596
raise ValueError(self.configuration_mode)
633597

634-
# Build a runtime model
635-
# runtime_rf = RandomForestWithInstances(types,
636-
# instance_features=meta_features_list,
637-
# seed=1, num_trees=10)
638-
# runtime_rh2EPM = RunHistory2EPM4EIPS(num_params=num_params,
639-
# scenario=self.scenario,
640-
# success_states=None,
641-
# impute_censored_data=False,
642-
# impute_state=None)
643-
# X_runtime, y_runtime = runtime_rh2EPM.transform(meta_runhistory)
644-
# runtime_rf.train(X_runtime, y_runtime[:, 1].flatten())
645-
# X_meta, Y_meta = rh2EPM.transform(meta_runhistory)
646-
# # Transform Y_meta on a per-dataset base
647-
# for meta_dataset in meta_runs_dataset_indices:
648-
# start_index, end_index = meta_runs_dataset_indices[meta_dataset]
649-
# end_index += 1 # Python indexing
650-
# Y_meta[start_index:end_index, 0]\
651-
# [Y_meta[start_index:end_index, 0] >2.0] = 2.0
652-
# dataset_minimum = np.min(Y_meta[start_index:end_index, 0])
653-
# Y_meta[start_index:end_index, 0] = 1 - (
654-
# (1. - Y_meta[start_index:end_index, 0]) /
655-
# (1. - dataset_minimum))
656-
# Y_meta[start_index:end_index, 0]\
657-
# [Y_meta[start_index:end_index, 0] > 2] = 2
658-
659598
smac.solver.stats.start_timing()
660599
# == first, evaluate all metelearning and default configurations
661600
smac.solver.incumbent = smac.solver.initial_design.run()
@@ -670,9 +609,10 @@ def run_smbo(self):
670609
time_bound=self.total_walltime_limit)
671610

672611
if smac.solver.scenario.shared_model:
673-
pSMAC.write(run_history=smac.solver.runhistory,
674-
output_directory=smac.solver.scenario.output_dir,
675-
num_run=self.seed)
612+
pSMAC.write(
613+
run_history=smac.solver.runhistory,
614+
output_directory=smac.solver.scenario.output_dir,
615+
)
676616

677617
if smac.solver.stats.is_budget_exhausted():
678618
break
@@ -687,14 +627,7 @@ def run_smbo(self):
687627
logger=self.logger)
688628

689629
choose_next_start_time = time.time()
690-
try:
691-
challengers = self.choose_next(smac)
692-
except Exception as e:
693-
self.logger.error(e)
694-
self.logger.error("Error in getting next configurations "
695-
"with SMAC. Using random configuration!")
696-
next_config = self.config_space.sample_configuration()
697-
challengers = [next_config]
630+
challengers = self.choose_next(smac)
698631
time_for_choose_next = time.time() - choose_next_start_time
699632
self.logger.info('Used %g seconds to find next '
700633
'configurations' % (time_for_choose_next))
@@ -708,9 +641,10 @@ def run_smbo(self):
708641
time_bound=time_for_choose_next)
709642

710643
if smac.solver.scenario.shared_model:
711-
pSMAC.write(run_history=smac.solver.runhistory,
712-
output_directory=smac.solver.scenario.output_dir,
713-
num_run=self.seed)
644+
pSMAC.write(
645+
run_history=smac.solver.runhistory,
646+
output_directory=smac.solver.scenario.output_dir,
647+
)
714648

715649
if smac.solver.stats.is_budget_exhausted():
716650
break
@@ -737,18 +671,8 @@ def choose_next(self, smac):
737671
(1. - dataset_minimum))
738672
Y_cfg[:, 0][Y_cfg[:, 0] > 2] = 2
739673

740-
# if len(X_meta) > 0 and len(X_cfg) > 0:
741-
# pass
742-
# X_cfg = np.concatenate((X_meta, X_cfg))
743-
# Y_cfg = np.concatenate((Y_meta, Y_cfg))
744-
# elif len(X_meta) > 0:
745-
# X_cfg = X_meta.copy()
746-
# Y_cfg = Y_meta.copy()
747-
# elif len(X_cfg) > 0:
748674
X_cfg = X_cfg.copy()
749675
Y_cfg = Y_cfg.copy()
750-
# else:
751-
# raise ValueError('No training data for SMAC random forest!')
752676

753677
self.logger.info('Using %d training points for SMAC.' %
754678
X_cfg.shape[0])

ci_scripts/circle_install.sh

+2-1
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,7 @@
66
# here.
77
source activate testenv
88

9+
export CC=`which gcc`
910
# install documentation building dependencies
1011
pip install --upgrade numpy
1112
pip install --upgrade matplotlib setuptools nose coverage sphinx==1.5.5 sphinx_bootstrap_theme numpydoc
@@ -16,4 +17,4 @@ python setup.py clean
1617
python setup.py develop
1718

1819
# pipefail is necessary to propagate exit codes
19-
set -o pipefail && cd doc && make html 2>&1 | tee ~/log.txt
20+
set -o pipefail && cd doc && make html 2>&1 | tee ~/log.txt

circle.yml

+2-1
Original file line numberDiff line numberDiff line change
@@ -26,10 +26,11 @@ dependencies:
2626
- sudo -E apt-get -yq remove texlive-binaries --purge
2727
- sudo -E apt-get -yq update
2828
- sudo -E apt-get -yq --no-install-suggests --no-install-recommends --force-yes install dvipng texlive-latex-base texlive-latex-extra
29+
# Other stuff...
30+
- sudo -E apt-get -yq --no-install-suggests --no-install-recommends --force-yes install build-essential
2931
# Conda installation
3032
- wget http://repo.continuum.io/miniconda/Miniconda3-latest-Linux-x86_64.sh -O ~/miniconda.sh
3133
- bash ~/miniconda.sh -b -p $HOME/miniconda
32-
- conda update --yes conda
3334
- conda create -n testenv --yes python=3.6 pip wheel nose gcc swig
3435

3536
# The --user is needed to let sphinx see the source and the binaries

doc/installation.rst

+1-1
Original file line numberDiff line numberDiff line change
@@ -58,7 +58,7 @@ the Section `Installing auto-sklearn`_.
5858
A common installation problem under recent Linux distribution is the
5959
incompability of the compiler version used to compile the Python binary
6060
shipped by AnaConda and the compiler installed by the distribution. This can
61-
be solved by istalling the *gcc* compiler shipped with AnaConda (as well as
61+
be solved by installing the *gcc* compiler shipped with AnaConda (as well as
6262
*swig*):
6363

6464
.. code:: bash

requirements.txt

+2-2
Original file line numberDiff line numberDiff line change
@@ -18,5 +18,5 @@ pandas
1818

1919
ConfigSpace>=0.3.3,<0.4
2020
pynisher>=0.4
21-
pyrfr>=0.4.0,<0.5
22-
smac==0.5.0
21+
pyrfr>=0.6.1,<0.7
22+
smac>=0.6.0,<0.7

scripts/run_auto-sklearn_for_metadata_generation.py

+6-1
Original file line numberDiff line numberDiff line change
@@ -94,7 +94,12 @@
9494
config = entry.incumbent
9595

9696
logger = logging.getLogger('Testing:)')
97-
stats = Stats(Scenario({'cutoff_time': per_run_time_limit * 2}))
97+
stats = Stats(
98+
Scenario({
99+
'cutoff_time': per_run_time_limit * 2,
100+
'run_obj': 'quality',
101+
})
102+
)
98103
stats.start_timing()
99104
# To avoid the output "first run crashed"...
100105
stats.ta_runs += 1

setup.py

+3-3
Original file line numberDiff line numberDiff line change
@@ -26,9 +26,9 @@
2626
"liac-arff",
2727
"pandas",
2828
"ConfigSpace>=0.3.3,<0.4",
29-
"pynisher>=0.4",
30-
"pyrfr>=0.4,<0.5",
31-
"smac==0.5.0"
29+
"pynisher>=0.4,<0.5",
30+
"pyrfr>=0.6.1,<0.7",
31+
"smac>=0.6.0,<0.7"
3232
]
3333

3434
with open("autosklearn/__version__.py") as fh:

test/test_automl/test_smbo.py

+15-9
Original file line numberDiff line numberDiff line change
@@ -28,18 +28,24 @@ def test_choose_next(self):
2828
total_walltime_limit=total_walltime_limit,
2929
memory_limit=memory_limit,
3030
watcher=None,
31-
metric=accuracy)
31+
metric=accuracy
32+
)
3233
auto.config_space = configspace
33-
scenario = Scenario({'cs': configspace,
34-
'cutoff-time': func_eval_time_limit,
35-
'wallclock-limit': total_walltime_limit,
36-
'memory-limit': memory_limit,
37-
'run-obj': 'quality'})
34+
scenario = Scenario({
35+
'cs': configspace,
36+
'cutoff_time': func_eval_time_limit,
37+
'wallclock_limit': total_walltime_limit,
38+
'memory_limit': memory_limit,
39+
'run_obj': 'quality',
40+
})
3841
smac = SMAC(scenario)
3942

40-
self.assertRaisesRegex(ValueError, 'Cannot use SMBO algorithm on '
41-
'empty runhistory',
42-
auto.choose_next, smac)
43+
self.assertRaisesRegex(
44+
ValueError,
45+
'Cannot use SMBO algorithm on empty runhistory',
46+
auto.choose_next,
47+
smac
48+
)
4349

4450
config = Configuration(configspace, values={'a': 0.1, 'b': 0.2})
4551
# TODO make sure the incumbent is always set?

test/test_metalearning/pyMetaLearn/test_meta_features.py

+13-7
Original file line numberDiff line numberDiff line change
@@ -1,13 +1,15 @@
1+
import os
2+
import tempfile
13
from six import StringIO
24
from unittest import TestCase
35
import unittest
4-
import os
56

67
import arff
78
import numpy as np
89
import scipy.sparse
910
from sklearn.preprocessing.imputation import Imputer
1011
from sklearn.datasets import make_multilabel_classification
12+
from sklearn.externals.joblib import Memory
1113

1214
from autosklearn.pipeline.implementations.OneHotEncoder import OneHotEncoder
1315
from sklearn.preprocessing import StandardScaler
@@ -81,12 +83,16 @@ def tearDown(self):
8183
os.chdir(self.cwd)
8284

8385
def get_multilabel(self):
84-
return make_multilabel_classification(n_samples=100,
85-
n_features=10,
86-
n_classes=5,
87-
n_labels=5,
88-
return_indicator=True,
89-
random_state=1)
86+
cache = Memory(cachedir=tempfile.gettempdir())
87+
cached_func = cache.cache(make_multilabel_classification)
88+
return cached_func(
89+
n_samples=100,
90+
n_features=10,
91+
n_classes=5,
92+
n_labels=5,
93+
return_indicator=True,
94+
random_state=1
95+
)
9096

9197
def test_number_of_instance(self):
9298
mf = self.mf["NumberOfInstances"](self.X, self.y, self.categorical)

test/test_pipeline/components/classification/test_liblinear.py

+1
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,7 @@ class LibLinearComponentTest(BaseClassificationComponentTest):
1616
res["default_iris_proba"] = 0.33728319465089696
1717
res["default_iris_sparse"] = 0.56
1818
res["default_digits"] = 0.91499696417729204
19+
res['default_digits_places'] = 2
1920
res["default_digits_iterative"] = -1
2021
res["default_digits_binary"] = 0.98907103825136611
2122
res["default_digits_multilabel"] = 0.89539354612444322

0 commit comments

Comments
 (0)