Merge pull request #357 from automl/development

mfeurer · web-flow · commit 0ee3699e0714 · 2017-10-02T18:49:40.000+02:00
Development
diff --git a/.travis.yml b/.travis.yml
@@ -55,14 +55,11 @@ before_install:
   - bash miniconda.sh -b -p $HOME/miniconda
   - export PATH="$HOME/miniconda/bin:$PATH"
   - if [[ `which conda` ]]; then echo 'Conda installation successful'; else exit 1; fi
-  - conda update --yes conda
   - conda create -n testenv --yes python=$PYTHON_VERSION pip wheel nose
   - source activate testenv
   - conda install --yes gcc swig
   - echo "Using GCC at "`which gcc`
   - export CC=`which gcc`
-  # Fixes version `GLIBCXX_3.4.21' not found (on Ubuntu 16.04)
-  - conda install --yes libgcc
 
 install:
   # Install general requirements the way setup.py suggests
diff --git a/autosklearn/__init__.py b/autosklearn/__init__.py
@@ -6,10 +6,11 @@
 __MANDATORY_PACKAGES__ = '''
 numpy>=1.9
 scikit-learn>=0.18.1,<0.19
-smac==0.5.0
 lockfile>=0.10
+smac>=0.6.0,<0.7
+pyrfr>=0.6.1,<0.7
 ConfigSpace>=0.3.3,<0.4
-pyrfr>=0.4.0,<0.5
+pyrfr>=0.6.0,<0.7
 '''
 
 dependencies.verify_packages(__MANDATORY_PACKAGES__)
diff --git a/autosklearn/evaluation/__init__.py b/autosklearn/evaluation/__init__.py
@@ -72,8 +72,14 @@ def __init__(self, backend, autosklearn_seed, resampling_strategy, metric,
 
         eval_function = functools.partial(fit_predict_try_except_decorator,
                                           ta=eval_function)
-        super().__init__(ta=eval_function, stats=stats, runhistory=runhistory,
-                         run_obj=run_obj, par_factor=par_factor)
+        super().__init__(
+            ta=eval_function,
+            stats=stats,
+            runhistory=runhistory,
+            run_obj=run_obj,
+            par_factor=par_factor,
+            cost_for_crash=WORST_POSSIBLE_RESULT,
+        )
 
         self.backend = backend
         self.autosklearn_seed = autosklearn_seed
diff --git a/autosklearn/smbo.py b/autosklearn/smbo.py
@@ -450,42 +450,6 @@ def run_smbo(self):
                         (1, -1))
                     self.logger.info(list(meta_features_dict.keys()))
 
-                    # meta_runs = meta_base.get_all_runs(METRIC_TO_STRING[self.metric])
-                    # meta_runs_index = 0
-                    # try:
-                    #    meta_durations = meta_base.get_all_runs('runtime')
-                    #    read_runtime_data = True
-                    # except KeyError:
-                    #    read_runtime_data = False
-                    #    self.logger.critical('Cannot read runtime data.')
-                    #    if self.acquisition_function == 'EIPS':
-                    #        self.logger.critical('Reverting to acquisition function EI!')
-                    #        self.acquisition_function = 'EI'
-
-                    # for meta_dataset in meta_runs.index:
-                    #     meta_dataset_start_index = meta_runs_index
-                    #     for meta_configuration in meta_runs.columns:
-                    #         if np.isfinite(meta_runs.loc[meta_dataset, meta_configuration]):
-                    #             try:
-                    #                 config = meta_base.get_configuration_from_algorithm_index(
-                    #                     meta_configuration)
-                    #                 cost = meta_runs.loc[meta_dataset, meta_configuration]
-                    #                 if read_runtime_data:
-                    #                     runtime = meta_durations.loc[meta_dataset,
-                    #                                                  meta_configuration]
-                    #                 else:
-                    #                     runtime = 1
-                    #                 # TODO read out other status types!
-                    #                 meta_runhistory.add(config, cost, runtime,
-                    #                                     StatusType.SUCCESS,
-                    #                                     instance_id=meta_dataset)
-                    #                 meta_runs_index += 1
-                    #             except:
-                    #                 # TODO maybe add warning
-                    #                 pass
-                    #
-                    #     meta_runs_dataset_indices[meta_dataset] = (
-                    #         meta_dataset_start_index, meta_runs_index)
             else:
                 meta_features = None
                 self.logger.warning('Could not find meta-data directory %s' %
@@ -514,13 +478,13 @@ def run_smbo(self):
         startup_time = self.watcher.wall_elapsed(self.dataset_name)
         total_walltime_limit = self.total_walltime_limit - startup_time - 5
         scenario_dict = {'cs': self.config_space,
-                         'cutoff-time': self.func_eval_time_limit,
-                         'memory-limit': self.memory_limit,
-                         'wallclock-limit': total_walltime_limit,
+                         'cutoff_time': self.func_eval_time_limit,
+                         'memory_limit': self.memory_limit,
+                         'wallclock_limit': total_walltime_limit,
                          'output-dir':
                              self.backend.get_smac_output_directory(self.seed),
                          'shared-model': self.shared_mode,
-                         'run-obj': 'quality',
+                         'run_obj': 'quality',
                          'deterministic': 'true',
                          'instances': instances}
 
@@ -631,31 +595,6 @@ def run_smbo(self):
         else:
             raise ValueError(self.configuration_mode)
 
-        # Build a runtime model
-        # runtime_rf = RandomForestWithInstances(types,
-        #                                        instance_features=meta_features_list,
-        #                                        seed=1, num_trees=10)
-        # runtime_rh2EPM = RunHistory2EPM4EIPS(num_params=num_params,
-        #                                      scenario=self.scenario,
-        #                                      success_states=None,
-        #                                      impute_censored_data=False,
-        #                                      impute_state=None)
-        # X_runtime, y_runtime = runtime_rh2EPM.transform(meta_runhistory)
-        # runtime_rf.train(X_runtime, y_runtime[:, 1].flatten())
-        # X_meta, Y_meta = rh2EPM.transform(meta_runhistory)
-        # # Transform Y_meta on a per-dataset base
-        # for meta_dataset in meta_runs_dataset_indices:
-        #     start_index, end_index = meta_runs_dataset_indices[meta_dataset]
-        #     end_index += 1  # Python indexing
-        #     Y_meta[start_index:end_index, 0]\
-        #         [Y_meta[start_index:end_index, 0] >2.0] =  2.0
-        #     dataset_minimum = np.min(Y_meta[start_index:end_index, 0])
-        #     Y_meta[start_index:end_index, 0] = 1 - (
-        #         (1. - Y_meta[start_index:end_index, 0]) /
-        #         (1. - dataset_minimum))
-        #     Y_meta[start_index:end_index, 0]\
-        #           [Y_meta[start_index:end_index, 0] > 2] = 2
-
         smac.solver.stats.start_timing()
         # == first, evaluate all metelearning and default configurations
         smac.solver.incumbent = smac.solver.initial_design.run()
@@ -670,9 +609,10 @@ def run_smbo(self):
                 time_bound=self.total_walltime_limit)
 
             if smac.solver.scenario.shared_model:
-                pSMAC.write(run_history=smac.solver.runhistory,
-                            output_directory=smac.solver.scenario.output_dir,
-                            num_run=self.seed)
+                pSMAC.write(
+                    run_history=smac.solver.runhistory,
+                    output_directory=smac.solver.scenario.output_dir,
+                )
 
             if smac.solver.stats.is_budget_exhausted():
                 break
@@ -687,14 +627,7 @@ def run_smbo(self):
                            logger=self.logger)
 
             choose_next_start_time = time.time()
-            try:
-                challengers = self.choose_next(smac)
-            except Exception as e:
-                self.logger.error(e)
-                self.logger.error("Error in getting next configurations "
-                                  "with SMAC. Using random configuration!")
-                next_config = self.config_space.sample_configuration()
-                challengers = [next_config]
+            challengers = self.choose_next(smac)
             time_for_choose_next = time.time() - choose_next_start_time
             self.logger.info('Used %g seconds to find next '
                              'configurations' % (time_for_choose_next))
@@ -708,9 +641,10 @@ def run_smbo(self):
                 time_bound=time_for_choose_next)
 
             if smac.solver.scenario.shared_model:
-                pSMAC.write(run_history=smac.solver.runhistory,
-                            output_directory=smac.solver.scenario.output_dir,
-                            num_run=self.seed)
+                pSMAC.write(
+                    run_history=smac.solver.runhistory,
+                    output_directory=smac.solver.scenario.output_dir,
+                )
 
             if smac.solver.stats.is_budget_exhausted():
                 break
@@ -737,18 +671,8 @@ def choose_next(self, smac):
                                (1. - dataset_minimum))
             Y_cfg[:, 0][Y_cfg[:, 0] > 2] = 2
 
-        # if len(X_meta) > 0 and len(X_cfg) > 0:
-        #    pass
-        #    X_cfg = np.concatenate((X_meta, X_cfg))
-        #    Y_cfg = np.concatenate((Y_meta, Y_cfg))
-        # elif len(X_meta) > 0:
-        #    X_cfg = X_meta.copy()
-        #    Y_cfg = Y_meta.copy()
-        # elif len(X_cfg) > 0:
         X_cfg = X_cfg.copy()
         Y_cfg = Y_cfg.copy()
-        # else:
-        #    raise ValueError('No training data for SMAC random forest!')
 
         self.logger.info('Using %d training points for SMAC.' %
                          X_cfg.shape[0])
diff --git a/ci_scripts/circle_install.sh b/ci_scripts/circle_install.sh
@@ -6,6 +6,7 @@
 # here.
 source activate testenv
 
+export CC=`which gcc`
 # install documentation building dependencies
 pip install --upgrade numpy
 pip install --upgrade matplotlib setuptools nose coverage sphinx==1.5.5 sphinx_bootstrap_theme numpydoc
@@ -16,4 +17,4 @@ python setup.py clean
 python setup.py develop
 
 # pipefail is necessary to propagate exit codes
-set -o pipefail && cd doc && make html 2>&1 | tee ~/log.txt
+set -o pipefail && cd doc && make html 2>&1 | tee ~/log.txt
diff --git a/circle.yml b/circle.yml
@@ -26,10 +26,11 @@ dependencies:
     - sudo -E apt-get -yq remove texlive-binaries --purge
     - sudo -E apt-get -yq update
     - sudo -E apt-get -yq --no-install-suggests --no-install-recommends --force-yes install dvipng texlive-latex-base texlive-latex-extra
+    # Other stuff...
+    - sudo -E apt-get -yq --no-install-suggests --no-install-recommends --force-yes install build-essential
     # Conda installation
     - wget http://repo.continuum.io/miniconda/Miniconda3-latest-Linux-x86_64.sh -O ~/miniconda.sh
     - bash ~/miniconda.sh -b -p $HOME/miniconda
-    - conda update --yes conda
     - conda create -n testenv --yes python=3.6 pip wheel nose gcc swig
 
   # The --user is needed to let sphinx see the source and the binaries
diff --git a/doc/installation.rst b/doc/installation.rst
@@ -58,7 +58,7 @@ the Section `Installing auto-sklearn`_.
 A common installation problem under recent Linux distribution is the
 incompability of the compiler version used to compile the Python binary
 shipped by AnaConda and the compiler installed by the distribution. This can
-be solved by istalling the *gcc* compiler shipped with AnaConda (as well as
+be solved by installing the *gcc* compiler shipped with AnaConda (as well as
 *swig*):
 
 .. code:: bash
diff --git a/requirements.txt b/requirements.txt
@@ -18,5 +18,5 @@ pandas
 
 ConfigSpace>=0.3.3,<0.4
 pynisher>=0.4
-pyrfr>=0.4.0,<0.5
-smac==0.5.0
+pyrfr>=0.6.1,<0.7
+smac>=0.6.0,<0.7
diff --git a/scripts/run_auto-sklearn_for_metadata_generation.py b/scripts/run_auto-sklearn_for_metadata_generation.py
@@ -94,7 +94,12 @@
         config = entry.incumbent
 
         logger = logging.getLogger('Testing:)')
-        stats = Stats(Scenario({'cutoff_time': per_run_time_limit * 2}))
+        stats = Stats(
+            Scenario({
+                'cutoff_time': per_run_time_limit * 2,
+                'run_obj': 'quality',
+            })
+        )
         stats.start_timing()
         # To avoid the output "first run crashed"...
         stats.ta_runs += 1
diff --git a/setup.py b/setup.py
@@ -26,9 +26,9 @@
     "liac-arff",
     "pandas",
     "ConfigSpace>=0.3.3,<0.4",
-    "pynisher>=0.4",
-    "pyrfr>=0.4,<0.5",
-    "smac==0.5.0"
+    "pynisher>=0.4,<0.5",
+    "pyrfr>=0.6.1,<0.7",
+    "smac>=0.6.0,<0.7"
 ]
 
 with open("autosklearn/__version__.py") as fh:
diff --git a/test/test_automl/test_smbo.py b/test/test_automl/test_smbo.py
@@ -28,18 +28,24 @@ def test_choose_next(self):
             total_walltime_limit=total_walltime_limit,
             memory_limit=memory_limit,
             watcher=None,
-            metric=accuracy)
+            metric=accuracy
+        )
         auto.config_space = configspace
-        scenario = Scenario({'cs': configspace,
-                             'cutoff-time': func_eval_time_limit,
-                             'wallclock-limit': total_walltime_limit,
-                             'memory-limit': memory_limit,
-                             'run-obj': 'quality'})
+        scenario = Scenario({
+            'cs': configspace,
+            'cutoff_time': func_eval_time_limit,
+            'wallclock_limit': total_walltime_limit,
+            'memory_limit': memory_limit,
+            'run_obj': 'quality',
+        })
         smac = SMAC(scenario)
 
-        self.assertRaisesRegex(ValueError, 'Cannot use SMBO algorithm on '
-                                           'empty runhistory',
-                               auto.choose_next, smac)
+        self.assertRaisesRegex(
+            ValueError,
+            'Cannot use SMBO algorithm on empty runhistory',
+            auto.choose_next,
+            smac
+        )
 
         config = Configuration(configspace, values={'a': 0.1, 'b': 0.2})
         # TODO make sure the incumbent is always set?
diff --git a/test/test_metalearning/pyMetaLearn/test_meta_features.py b/test/test_metalearning/pyMetaLearn/test_meta_features.py
@@ -1,13 +1,15 @@
+import os
+import tempfile
 from six import StringIO
 from unittest import TestCase
 import unittest
-import os
 
 import arff
 import numpy as np
 import scipy.sparse
 from sklearn.preprocessing.imputation import Imputer
 from sklearn.datasets import make_multilabel_classification
+from sklearn.externals.joblib import Memory
 
 from autosklearn.pipeline.implementations.OneHotEncoder import OneHotEncoder
 from sklearn.preprocessing import StandardScaler
@@ -81,12 +83,16 @@ def tearDown(self):
         os.chdir(self.cwd)
 
     def get_multilabel(self):
-        return make_multilabel_classification(n_samples=100,
-                                              n_features=10,
-                                              n_classes=5,
-                                              n_labels=5,
-                                              return_indicator=True,
-                                              random_state=1)
+        cache = Memory(cachedir=tempfile.gettempdir())
+        cached_func = cache.cache(make_multilabel_classification)
+        return cached_func(
+            n_samples=100,
+            n_features=10,
+            n_classes=5,
+            n_labels=5,
+            return_indicator=True,
+            random_state=1
+        )
 
     def test_number_of_instance(self):
         mf = self.mf["NumberOfInstances"](self.X, self.y, self.categorical)
diff --git a/test/test_pipeline/components/classification/test_liblinear.py b/test/test_pipeline/components/classification/test_liblinear.py
@@ -16,6 +16,7 @@ class LibLinearComponentTest(BaseClassificationComponentTest):
     res["default_iris_proba"] = 0.33728319465089696
     res["default_iris_sparse"] = 0.56
     res["default_digits"] = 0.91499696417729204
+    res['default_digits_places'] = 2
     res["default_digits_iterative"] = -1
     res["default_digits_binary"] = 0.98907103825136611
     res["default_digits_multilabel"] = 0.89539354612444322
diff --git a/test/test_pipeline/components/regression/test_base.py b/test/test_pipeline/components/regression/test_base.py
diff --git a/test/test_pipeline/components/regression/test_liblinear_svr.py b/test/test_pipeline/components/regression/test_liblinear_svr.py
diff --git a/test/test_pipeline/test_classification.py b/test/test_pipeline/test_classification.py